diff --git a/src/agenda_culturel/import_tasks/extractor.py b/src/agenda_culturel/import_tasks/extractor.py index 3e75870..6822b47 100644 --- a/src/agenda_culturel/import_tasks/extractor.py +++ b/src/agenda_culturel/import_tasks/extractor.py @@ -49,7 +49,7 @@ class Extractor(ABC): return i + 1 return None - def parse_french_date(text): + def parse_french_date(text, default_year=None): # format NomJour Numero Mois Année m = re.search( "[a-zA-ZéÉûÛ:.]+[ ]*([0-9]+)[er]*[ ]*([a-zA-ZéÉûÛ:.]+)[ ]*([0-9]+)", text @@ -73,8 +73,15 @@ class Extractor(ABC): month = int(m.group(2)) year = m.group(3) else: - # TODO: consolider les cas non satisfaits - return None + # format Numero Mois Annee + m = re.search("([0-9]+)[er]*[ ]*([a-zA-ZéÉûÛ:.]+)", text) + if m: + day = m.group(1) + month = Extractor.guess_month(m.group(2)) + year = default_year + else: + # TODO: consolider les cas non satisfaits + return None if month is None: return None