Robustification du parse de date

This commit is contained in:
Jean-Marie Favreau 2025-04-12 08:33:24 +02:00
parent 4c37aafc72
commit 516430f9f4

View File

@ -72,7 +72,7 @@ class Extractor(ABC):
def parse_french_date(text, default_year=None, default_year_by_proximity=None): def parse_french_date(text, default_year=None, default_year_by_proximity=None):
# format NomJour Numero Mois Année # format NomJour Numero Mois Année
m = re.search( m = re.search(
"[a-zA-ZéÉûÛ:.]+[ ]*([0-9]+)[er]*[ ]*([a-zA-ZéÉûÛ:.]+)[ ]*([0-9]+)", "[a-zA-ZéÉûÛ:.]+[  ]*([0-9]+)[er]*[  ]*([a-zA-ZéÉûÛ:.]+)[  ]*([0-9]+)",
text, text,
) )
if m: if m:
@ -81,7 +81,7 @@ class Extractor(ABC):
year = m.group(3) year = m.group(3)
else: else:
# format Numero Mois Annee # format Numero Mois Annee
m = re.search("([0-9]+)[er]*[ ]*([a-zA-ZéÉûÛ:.]+)[ ]*([0-9]+)", text) m = re.search("([0-9]+)[er]*[  ]*([a-zA-ZéÉûÛ:.]+)[  ]*([0-9]+)", text)
if m: if m:
day = m.group(1) day = m.group(1)
month = Extractor.guess_month(m.group(2)) month = Extractor.guess_month(m.group(2))
@ -95,7 +95,7 @@ class Extractor(ABC):
year = m.group(3) year = m.group(3)
else: else:
# format Numero Mois Annee # format Numero Mois Annee
m = re.search("([0-9]+)[er]*[ ]*([a-zA-ZéÉûÛ:.]+)", text) m = re.search("([0-9]+)[er]*[  ]*([a-zA-ZéÉûÛ:.]+)", text)
if m: if m:
day = m.group(1) day = m.group(1)
month = Extractor.guess_month(m.group(2)) month = Extractor.guess_month(m.group(2))