From b186001132b7c304418f28b87a42b4bcd810493e Mon Sep 17 00:00:00 2001 From: Jean-Marie Favreau Date: Sat, 12 Apr 2025 08:33:35 +0200 Subject: [PATCH] =?UTF-8?q?Am=C3=A9lioration=20parse=20du=20Wix=20de=20la?= =?UTF-8?q?=20puce=20=C3=A0=20l'oreille?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../import_tasks/custom_extractors/lapucealoreille.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/agenda_culturel/import_tasks/custom_extractors/lapucealoreille.py b/src/agenda_culturel/import_tasks/custom_extractors/lapucealoreille.py index 9da1e6b..71d4e1c 100644 --- a/src/agenda_culturel/import_tasks/custom_extractors/lapucealoreille.py +++ b/src/agenda_culturel/import_tasks/custom_extractors/lapucealoreille.py @@ -31,9 +31,12 @@ class CExtractor(TwoStepsExtractor): title = soup.select("h2")[0].get_text() - start_day = Extractor.parse_french_date( - soup.select("h2")[1].get_text() - ) # pas parfait, mais bordel que ce site est mal construit + # pas parfait, mais bordel que ce site est mal construit + for h2 in range(1, 4): + print(soup.select("h2")[h2].get_text()) + start_day = Extractor.parse_french_date(soup.select("h2")[h2].get_text()) + if start_day is not None: + break spans = soup.select("div[data-testid=richTextElement] span") start_time = None