diff --git a/src/agenda_culturel/import_tasks/custom_extractors/fbevents.py b/src/agenda_culturel/import_tasks/custom_extractors/fbevents.py index b9e3b32..ca74ec6 100644 --- a/src/agenda_culturel/import_tasks/custom_extractors/fbevents.py +++ b/src/agenda_culturel/import_tasks/custom_extractors/fbevents.py @@ -3,6 +3,12 @@ from ..extractor_facebook import FacebookEvent import json5 from bs4 import BeautifulSoup import json +import os +from datetime import datetime + +import logging + +logger = logging.getLogger(__name__) # A class dedicated to get events from a facebook events page @@ -13,10 +19,24 @@ class CExtractor(TwoStepsExtractor): def build_event_url_list(self, content): soup = BeautifulSoup(content, "html.parser") + found = False links = soup.find_all("a") for link in links: if link.get("href").startswith('https://www.facebook.com/events/'): self.add_event_url(link.get('href').split('?')[0]) + found = True + + if not found: + directory = "errors/" + if not os.path.exists(directory): + os.makedirs(directory) + now = datetime.now() + filename = directory + now.strftime("%Y%m%d_%H%M%S") + ".html" + logger.warning("cannot find any event link in events page. Save content page in " + filename) + with open(filename, "w") as text_file: + text_file.write(content) + + def add_event_from_content( @@ -42,4 +62,7 @@ class CExtractor(TwoStepsExtractor): event["published"] = published self.add_event(default_values, **event) + else: + logger.warning("cannot find any event in page") +