From a4f1af4d638e74c9c9947ac2660f16eee73f08d3 Mon Sep 17 00:00:00 2001 From: Jean-Marie Favreau Date: Sat, 8 Feb 2025 11:27:23 +0100 Subject: [PATCH] =?UTF-8?q?On=20r=C3=A9cup=C3=A8re=20l'erreur=20d'ex=C3=A9?= =?UTF-8?q?cution=20par=20catch=20de=20l'exception?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../import_tasks/extractor_facebook.py | 4 --- .../import_tasks/generic_extractors.py | 26 ++++++++++--------- src/agenda_culturel/import_tasks/importer.py | 11 ++++---- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/agenda_culturel/import_tasks/extractor_facebook.py b/src/agenda_culturel/import_tasks/extractor_facebook.py index f8b7fad..9b249a3 100644 --- a/src/agenda_culturel/import_tasks/extractor_facebook.py +++ b/src/agenda_culturel/import_tasks/extractor_facebook.py @@ -236,7 +236,6 @@ class FacebookEventExtractor(Extractor): self.has_2nd_method = True def prepare_2nd_extract_dler(downloader): - logger.warning("prepare_2nd_extract_dler") if downloader.support_2nd_extract: from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait @@ -245,11 +244,8 @@ class FacebookEventExtractor(Extractor): path = './/div[not(@aria-hidden)]/div[@aria-label="Allow all cookies"]' element = WebDriverWait(downloader.driver, 10).until(EC.visibility_of_element_located((By.XPATH, path))) button = downloader.driver.find_element(By.XPATH, path) - logger.warning("button") - logger.warning(button) button.click() t.sleep(3) - logger.warning(downloader.driver.page_source) def prepare_2nd_extract(self): FacebookEventExtractor.prepare_2nd_extract_dler(self.downloader) diff --git a/src/agenda_culturel/import_tasks/generic_extractors.py b/src/agenda_culturel/import_tasks/generic_extractors.py index bc20357..b22d565 100644 --- a/src/agenda_culturel/import_tasks/generic_extractors.py +++ b/src/agenda_culturel/import_tasks/generic_extractors.py @@ -253,18 +253,20 @@ class TwoStepsExtractor(Extractor): raise Exception(msg) else: # then extract event information from this html document - self.add_event_from_content( - content_event, event_url, url_human, default_values, published - ) - # some website (FB) sometime need a second step - if first and len(self.events) == 0 and self.has_2nd_method_in_list and self.downloader.support_2nd_extract: - first = False - self.prepare_2nd_extract_in_list() - content_event = self.downloader.get_content(event_url) - if not content_event is None: - self.add_event_from_content( - content_event, event_url, url_human, default_values, published - ) + try: + self.add_event_from_content( + content_event, event_url, url_human, default_values, published + ) + except Exception as e: + # some website (FB) sometime need a second step + if first and len(self.events) == 0 and self.has_2nd_method_in_list and self.downloader.support_2nd_extract: + first = False + self.prepare_2nd_extract_in_list() + content_event = self.downloader.get_content(event_url) + if not content_event is None: + self.add_event_from_content( + content_event, event_url, url_human, default_values, published + ) return self.get_structure() diff --git a/src/agenda_culturel/import_tasks/importer.py b/src/agenda_culturel/import_tasks/importer.py index 5a2e264..fd4fcd4 100644 --- a/src/agenda_culturel/import_tasks/importer.py +++ b/src/agenda_culturel/import_tasks/importer.py @@ -37,11 +37,12 @@ class URL2Events: for e in Extractor.get_default_extractors(self.single_event): logger.warning('Extractor::' + type(e).__name__) e.set_downloader(self.downloader) - events = e.extract(content, url, url_human, default_values, published) - if events is not None: - if len(events) > 0: - return events - else: + try: + events = e.extract(content, url, url_human, default_values, published) + if events is not None: + if len(events) > 0: + return events + except Exception as e: logger.warning("cas sans event") if first and FacebookEventExtractor.is_known_url(url) and self.downloader.support_2nd_extract and e.has_2nd_method: logger.warning("on avance")