From 70755b8f5e7da7ba0da66b8b9845f6487285bed4 Mon Sep 17 00:00:00 2001 From: Jean-Marie Favreau Date: Wed, 19 Feb 2025 16:46:40 +0100 Subject: [PATCH] On ajoute un outil de tracking pour comprendre les erreurs FB --- .../custom_extractors/fbevents.py | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/agenda_culturel/import_tasks/custom_extractors/fbevents.py b/src/agenda_culturel/import_tasks/custom_extractors/fbevents.py index 24b07eb..424b00c 100644 --- a/src/agenda_culturel/import_tasks/custom_extractors/fbevents.py +++ b/src/agenda_culturel/import_tasks/custom_extractors/fbevents.py @@ -69,15 +69,19 @@ class CExtractor(TwoStepsExtractor): raise Exception(_("the page was not yet populated with events, so the loading time was probably too short")) if not self.found and debug: - directory = "errors/" - if not os.path.exists(directory): - os.makedirs(directory) - now = datetime.now() - filename = directory + now.strftime("%Y%m%d_%H%M%S") + ".html" logger.warning("cannot find any event link in events page. Save content page in " + filename) - with open(filename, "w") as text_file: - text_file.write("\n\n") - text_file.write(content) + self.dump_content_for_debug(content) + + + def dump_content_for_debug(self, content): + directory = "errors/" + if not os.path.exists(directory): + os.makedirs(directory) + now = datetime.now() + filename = directory + now.strftime("%Y%m%d_%H%M%S") + ".html" + with open(filename, "w") as text_file: + text_file.write("\n\n") + text_file.write(content) @@ -90,7 +94,8 @@ class CExtractor(TwoStepsExtractor): default_values=None, published=False, ): - + debug = True + fevent = None soup = BeautifulSoup(event_content, "html.parser") for json_script in soup.find_all("script", type="application/json"): @@ -106,6 +111,7 @@ class CExtractor(TwoStepsExtractor): self.add_event(default_values, **event) else: + self.dump_content_for_debug(event_content) raise Exception( _("Cannot get Facebook event from {}").format(event_url) )