From cf4f506168ad593259b0585910e74f5037cac082 Mon Sep 17 00:00:00 2001 From: Jean-Marie Favreau Date: Wed, 19 Mar 2025 21:47:56 +0100 Subject: [PATCH] =?UTF-8?q?Am=C3=A9lioration=20robustesse=20import=20gglin?= =?UTF-8?q?k?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/agenda_culturel/import_tasks/extractor.py | 1 + .../generic_extractors/ggcal_link.py | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/agenda_culturel/import_tasks/extractor.py b/src/agenda_culturel/import_tasks/extractor.py index 305b2f8..cd0e081 100644 --- a/src/agenda_culturel/import_tasks/extractor.py +++ b/src/agenda_culturel/import_tasks/extractor.py @@ -322,6 +322,7 @@ class Extractor(ABC): FacebookEventExtractor(), GoogleCalendarLinkEventExtractor(), AssociationsCF(), + ICALExtractor(), EventNotFoundExtractor(), ] else: diff --git a/src/agenda_culturel/import_tasks/generic_extractors/ggcal_link.py b/src/agenda_culturel/import_tasks/generic_extractors/ggcal_link.py index fe2b118..dcb430b 100644 --- a/src/agenda_culturel/import_tasks/generic_extractors/ggcal_link.py +++ b/src/agenda_culturel/import_tasks/generic_extractors/ggcal_link.py @@ -1,6 +1,7 @@ import logging from datetime import datetime from urllib.parse import parse_qs, urlparse +from html import unescape import dateutil.parser from bs4 import BeautifulSoup @@ -26,6 +27,13 @@ class GGCalendar: return result + def parse_date(d): + try: + return dateutil.parser.parse(d) + except Exception: + d = d.split("+")[0] + return dateutil.parser.parse(d) + def is_valid_event(self): return self.start_day is not None and self.title is not None @@ -36,7 +44,7 @@ class GGCalendar: params = GGCalendar.filter_keys(params) self.location = params["location"][0] if "location" in params else "" - self.title = ( + self.title = unescape( params["text"][0] if "text" in params else params["title"][0] if "title" in params else "" @@ -51,22 +59,22 @@ class GGCalendar: if "dates" in params: dates = [x.replace(" ", "+") for x in params["dates"][0].split("/")] if len(dates) > 0: - date = dateutil.parser.parse(dates[0]) + date = GGCalendar.parse_date(dates[0]) self.start_day = date.date() self.start_time = date.time() if len(dates) == 2: - date = dateutil.parser.parse(dates[1]) + date = GGCalendar.parse_date(dates[1]) self.end_day = date.date() self.end_time = date.time() else: self.end_day = None self.end_time = None elif "date_start" in params: - date = dateutil.parser.parse(params["date_start"][0]) + date = GGCalendar.parse_date(params["date_start"][0]) self.start_day = date.date() self.start_time = date.time() if "date_end" in params: - dateend = dateutil.parser.parse(params["date_end"][0]) + dateend = GGCalendar.parse_date(params["date_end"][0]) if dateend != date: self.end_day = dateend.date() self.end_time = dateend.time()