diff --git a/src/agenda_culturel/import_tasks/custom_extractors/fbevents.py b/src/agenda_culturel/import_tasks/custom_extractors/fbevents.py index ac413a2..49b72c8 100644 --- a/src/agenda_culturel/import_tasks/custom_extractors/fbevents.py +++ b/src/agenda_culturel/import_tasks/custom_extractors/fbevents.py @@ -17,30 +17,28 @@ logger = logging.getLogger(__name__) # such as https://www.facebook.com/laJeteeClermont/events class CExtractor(TwoStepsExtractor): - def find_event_id_fragment_in_array(self, array, first=True): - found = False + def find_event_id_fragment_in_array(self, array): if isinstance(array, dict): if "__typename" in array and array["__typename"] == "Event" and "id" in array: self.add_event_url("https://www.facebook.com/events/" + array["id"] + "/") - found = True - if not found: + self.found = True + else: for k in array: - found = self.find_event_id_fragment_in_array(array[k], False) or found + if k == "pageItems": + self.has_page_items = True + self.find_event_id_fragment_in_array(array[k]) elif isinstance(array, list): for e in array: - found = self.find_event_id_fragment_in_array(e, False) or found - return found + self.find_event_id_fragment_in_array(e) def find_in_js(self, soup): - found = False for json_script in soup.find_all("script", type="application/json"): json_txt = json_script.get_text() json_struct = json.loads(json_txt) - found = self.find_event_id_fragment_in_array(json_struct) or found + self.find_event_id_fragment_in_array(json_struct) - return found def build_event_url_list(self, content): @@ -48,16 +46,20 @@ class CExtractor(TwoStepsExtractor): debug = False - found = False + self.found = False links = soup.find_all("a") for link in links: if link.get("href").startswith('https://www.facebook.com/events/'): self.add_event_url(link.get('href').split('?')[0]) - found = True + self.found = True - found = self.find_in_js(soup) or found + self.has_page_items = False + self.find_in_js(soup) - if not found and debug: + if not self.has_page_items: + raise Exception(_("the page was not yet populated with events, so the loading time was probably too short")) + + if not self.found and debug: directory = "errors/" if not os.path.exists(directory): os.makedirs(directory) diff --git a/src/agenda_culturel/locale/fr/LC_MESSAGES/django.po b/src/agenda_culturel/locale/fr/LC_MESSAGES/django.po index aacc813..415d93f 100644 --- a/src/agenda_culturel/locale/fr/LC_MESSAGES/django.po +++ b/src/agenda_culturel/locale/fr/LC_MESSAGES/django.po @@ -2,13 +2,13 @@ # Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER # This file is distributed under the same license as the PACKAGE package. # Jean-Marie Favreau , 2023. -# +# #, fuzzy msgid "" msgstr "" "Project-Id-Version: agenda_culturel\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-02-07 10:17+0100\n" +"POT-Creation-Date: 2025-02-07 11:55+0100\n" "PO-Revision-Date: 2023-10-29 14:16+0000\n" "Last-Translator: Jean-Marie Favreau \n" "Language-Team: Jean-Marie Favreau \n" @@ -352,7 +352,15 @@ msgstr "Informations" msgid "Add a comment" msgstr "Ajouter un commentaire" -#: agenda_culturel/import_tasks/custom_extractors/fbevents.py:99 +#: agenda_culturel/import_tasks/custom_extractors/fbevents.py:60 +msgid "" +"the page was not yet populated with events, so the loading time was probably " +"too short" +msgstr "" +"la page n'était pas encore peuplée des événements, le temps de chargement a " +"sans doute été trop court" + +#: agenda_culturel/import_tasks/custom_extractors/fbevents.py:101 msgid "Cannot get Facebook event from {}" msgstr "Impossible de récupérer un événement Facebook depuis {}"