From 9b429f6951843f7ec8833a7f6275455f93c26993 Mon Sep 17 00:00:00 2001 From: Jean-Marie Favreau Date: Sun, 1 Sep 2024 19:00:29 +0200 Subject: [PATCH] =?UTF-8?q?On=20introduit=20un=20outil=20pour=20importer?= =?UTF-8?q?=20les=20=C3=A9v=C3=A9nements=20depuis=20une=20page=20qui=20a?= =?UTF-8?q?=20un=20lien=20google=20agenda=20(ou=20=C3=A9quivalent)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/agenda_culturel/import_tasks/extractor.py | 10 ++- .../import_tasks/extractor_facebook.py | 3 +- .../import_tasks/extractor_ggcal_link.py | 65 +++++++++++++++++++ .../import_tasks/generic_extractors.py | 39 ++++++++++- 4 files changed, 111 insertions(+), 6 deletions(-) create mode 100644 src/agenda_culturel/import_tasks/extractor_ggcal_link.py diff --git a/src/agenda_culturel/import_tasks/extractor.py b/src/agenda_culturel/import_tasks/extractor.py index ccb4380..21d96a1 100644 --- a/src/agenda_culturel/import_tasks/extractor.py +++ b/src/agenda_culturel/import_tasks/extractor.py @@ -199,7 +199,10 @@ class Extractor(ABC): ) def get_structure(self): - return {"header": self.header, "events": self.events} + if len(self.events) == 0: + return None + else: + return {"header": self.header, "events": self.events} def clean_url(url): from .extractor_ical import ICALExtractor @@ -213,8 +216,9 @@ class Extractor(ABC): def get_default_extractors(single_event=False): from .extractor_ical import ICALExtractor from .extractor_facebook import FacebookEventExtractor + from .extractor_ggcal_link import GoogleCalendarLinkEventExtractor if single_event: - return [FacebookEventExtractor(single_event=True)] + return [FacebookEventExtractor(), GoogleCalendarLinkEventExtractor()] else: - return [ICALExtractor(), FacebookEventExtractor(single_event=False)] + return [ICALExtractor(), FacebookEventExtractor(), GoogleCalendarLinkEventExtractor()] diff --git a/src/agenda_culturel/import_tasks/extractor_facebook.py b/src/agenda_culturel/import_tasks/extractor_facebook.py index 2069479..3e30b02 100644 --- a/src/agenda_culturel/import_tasks/extractor_facebook.py +++ b/src/agenda_culturel/import_tasks/extractor_facebook.py @@ -225,8 +225,7 @@ class FacebookEvent: class FacebookEventExtractor(Extractor): - def __init__(self, single_event=False): - self.single_event = single_event + def __init__(self): super().__init__() def clean_url(url): diff --git a/src/agenda_culturel/import_tasks/extractor_ggcal_link.py b/src/agenda_culturel/import_tasks/extractor_ggcal_link.py new file mode 100644 index 0000000..092dd01 --- /dev/null +++ b/src/agenda_culturel/import_tasks/extractor_ggcal_link.py @@ -0,0 +1,65 @@ +from datetime import datetime +from bs4 import BeautifulSoup +from urllib.parse import urlparse + +from .extractor import * +from .generic_extractors import * + +import json + +import logging + +logger = logging.getLogger(__name__) + +class GoogleCalendarLinkEventExtractor(Extractor): + def __init__(self): + super().__init__() + self.possible_urls = ["https://calendar.google.com/calendar/", "https://addtocalendar.com/"] + + def extract( + self, content, url, url_human=None, default_values=None, published=False + ): + # default_values are not used + soup = BeautifulSoup(content, "html.parser") + + for ggu in self.possible_urls: + + link_calendar = soup.select('a[href^="' + ggu + '"]') + if len(link_calendar) != 0: + + gg_cal = GGCalendar(link_calendar[0]["href"]) + + if gg_cal.is_valid_event(): + start_day = gg_cal.start_day + start_time = gg_cal.start_time + description = gg_cal.description.replace(' ', '') + end_day = gg_cal.end_day + end_time = gg_cal.end_time + location = gg_cal.location + title = gg_cal.title + url_human = url + + self.set_header(url) + + self.add_event( + title=title, + category=None, + start_day=start_day, + location=location, + description=description, + tags=None, + uuids=[url], + recurrences=None, + url_human=url_human, + start_time=start_time, + end_day=end_day, + end_time=end_time, + published=published, + image=None, + ) + + break + + + logger.warning("ça marche") + return self.get_structure() \ No newline at end of file diff --git a/src/agenda_culturel/import_tasks/generic_extractors.py b/src/agenda_culturel/import_tasks/generic_extractors.py index 0ba6fc8..e386406 100644 --- a/src/agenda_culturel/import_tasks/generic_extractors.py +++ b/src/agenda_culturel/import_tasks/generic_extractors.py @@ -18,12 +18,32 @@ class GGCalendar: self.url = url self.extract_info() + def filter_keys(params): + result = {} + + for k, v in params.items(): + if k.startswith('e[0]'): + result[k.replace('e[0][', '')[:-1]] = v + else: + result[k] = v + + return result + + def is_valid_event(self): + return self.start_day is not None and self.title is not None + def extract_info(self): parsed_url = urlparse(self.url.replace("#", "%23")) params = parse_qs(parsed_url.query) + params = GGCalendar.filter_keys(params) + + logger.warning(self.url) + logger.warning(params) + self.location = params["location"][0] if "location" in params else None + self.title = params["text"][0] if "text" in params else params["title"][0] if "title" in params else None + self.description = params["description"][0] if "description" in params else None self.location = params["location"][0] if "location" in params else None - self.title = params["text"][0] if "text" in params else None if "dates" in params: dates = [x.replace(" ", "+") for x in params["dates"][0].split("/")] if len(dates) > 0: @@ -37,7 +57,24 @@ class GGCalendar: else: self.end_day = None self.end_time = None + elif "date_start" in params: + date = parser.parse(params["date_start"][0]) + self.start_day = date.date() + self.start_time = date.time() + if "date_end" in params: + dateend = parser.parse(params["date_end"][0]) + if dateend != date: + self.end_day = dateend.date() + self.end_time = dateend.time() + else: + self.end_day = None + self.end_time = None + if self.start_time == datetime.time(0): + self.start_time = None + else: + self.end_day = None + self.end_time = None else: raise Exception("Unable to find a date in google calendar URL") self.start_day = None