On ajoute un parse de plus pour les dates

This commit is contained in:
Jean-Marie Favreau 2024-11-29 12:16:02 +01:00
parent 7120da3e28
commit 1256adcb8a

View File

@ -49,7 +49,7 @@ class Extractor(ABC):
return i + 1 return i + 1
return None return None
def parse_french_date(text): def parse_french_date(text, default_year=None):
# format NomJour Numero Mois Année # format NomJour Numero Mois Année
m = re.search( m = re.search(
"[a-zA-ZéÉûÛ:.]+[ ]*([0-9]+)[er]*[ ]*([a-zA-ZéÉûÛ:.]+)[ ]*([0-9]+)", text "[a-zA-ZéÉûÛ:.]+[ ]*([0-9]+)[er]*[ ]*([a-zA-ZéÉûÛ:.]+)[ ]*([0-9]+)", text
@ -73,8 +73,15 @@ class Extractor(ABC):
month = int(m.group(2)) month = int(m.group(2))
year = m.group(3) year = m.group(3)
else: else:
# TODO: consolider les cas non satisfaits # format Numero Mois Annee
return None m = re.search("([0-9]+)[er]*[ ]*([a-zA-ZéÉûÛ:.]+)", text)
if m:
day = m.group(1)
month = Extractor.guess_month(m.group(2))
year = default_year
else:
# TODO: consolider les cas non satisfaits
return None
if month is None: if month is None:
return None return None