diff --git a/experimentations/get_c3c_events.py b/experimentations/get_c3c_events.py index 7e6fee4..67a1cbe 100755 --- a/experimentations/get_c3c_events.py +++ b/experimentations/get_c3c_events.py @@ -28,7 +28,7 @@ from src.agenda_culturel.import_tasks.custom_extractors import * if __name__ == "__main__": - u2e = URL2Events(SimpleDownloader(), c3c.CExtractor()) + u2e = URL2Events(ChromiumHeadlessDownloader(), c3c.CExtractor()) url = "https://billetterie-c3c.clermont-ferrand.fr/" url_human = "https://billetterie-c3c.clermont-ferrand.fr/" diff --git a/src/agenda_culturel/import_tasks/custom_extractors/c3c.py b/src/agenda_culturel/import_tasks/custom_extractors/c3c.py index 6f28001..4281767 100644 --- a/src/agenda_culturel/import_tasks/custom_extractors/c3c.py +++ b/src/agenda_culturel/import_tasks/custom_extractors/c3c.py @@ -64,6 +64,7 @@ class CExtractor(TwoStepsExtractor): tags.append(tag) # TODO: parser les dates, récupérer les heures () + dates = [self.parse_french_date(o.get("value")) for o in soup.select("select.datedleb_resa option")] diff --git a/src/agenda_culturel/import_tasks/extractor.py b/src/agenda_culturel/import_tasks/extractor.py index 7eb6f80..e4ec159 100644 --- a/src/agenda_culturel/import_tasks/extractor.py +++ b/src/agenda_culturel/import_tasks/extractor.py @@ -65,8 +65,15 @@ class Extractor(ABC): month = self.guess_month(m.group(2)) year = m.group(3) else: - # TODO: consolider les cas non satisfaits - return None + # format Numero Mois Annee + m = re.search("([0-9]+)/([0-9]+)/([0-9]+)", text) + if m: + day = m.group(1) + month = int(m.group(2)) + year = m.group(3) + else: + # TODO: consolider les cas non satisfaits + return None if month is None: return None