From 9b429f6951843f7ec8833a7f6275455f93c26993 Mon Sep 17 00:00:00 2001
From: Jean-Marie Favreau <j-marie.favreau@uca.fr>
Date: Sun, 1 Sep 2024 19:00:29 +0200
Subject: [PATCH] =?UTF-8?q?On=20introduit=20un=20outil=20pour=20importer?=
 =?UTF-8?q?=20les=20=C3=A9v=C3=A9nements=20depuis=20une=20page=20qui=20a?=
 =?UTF-8?q?=20un=20lien=20google=20agenda=20(ou=20=C3=A9quivalent)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/agenda_culturel/import_tasks/extractor.py | 10 ++-
 .../import_tasks/extractor_facebook.py        |  3 +-
 .../import_tasks/extractor_ggcal_link.py      | 65 +++++++++++++++++++
 .../import_tasks/generic_extractors.py        | 39 ++++++++++-
 4 files changed, 111 insertions(+), 6 deletions(-)
 create mode 100644 src/agenda_culturel/import_tasks/extractor_ggcal_link.py

diff --git a/src/agenda_culturel/import_tasks/extractor.py b/src/agenda_culturel/import_tasks/extractor.py
index ccb4380..21d96a1 100644
--- a/src/agenda_culturel/import_tasks/extractor.py
+++ b/src/agenda_culturel/import_tasks/extractor.py
@@ -199,7 +199,10 @@ class Extractor(ABC):
         )
 
     def get_structure(self):
-        return {"header": self.header, "events": self.events}
+        if len(self.events) == 0:
+            return None
+        else:
+            return {"header": self.header, "events": self.events}
 
     def clean_url(url):
         from .extractor_ical import ICALExtractor
@@ -213,8 +216,9 @@ class Extractor(ABC):
     def get_default_extractors(single_event=False):
         from .extractor_ical import ICALExtractor
         from .extractor_facebook import FacebookEventExtractor
+        from .extractor_ggcal_link import GoogleCalendarLinkEventExtractor
 
         if single_event:
-            return [FacebookEventExtractor(single_event=True)]
+            return [FacebookEventExtractor(), GoogleCalendarLinkEventExtractor()]
         else:
-            return [ICALExtractor(), FacebookEventExtractor(single_event=False)]
+            return [ICALExtractor(), FacebookEventExtractor(), GoogleCalendarLinkEventExtractor()]
diff --git a/src/agenda_culturel/import_tasks/extractor_facebook.py b/src/agenda_culturel/import_tasks/extractor_facebook.py
index 2069479..3e30b02 100644
--- a/src/agenda_culturel/import_tasks/extractor_facebook.py
+++ b/src/agenda_culturel/import_tasks/extractor_facebook.py
@@ -225,8 +225,7 @@ class FacebookEvent:
 
 class FacebookEventExtractor(Extractor):
 
-    def __init__(self, single_event=False):
-        self.single_event = single_event
+    def __init__(self):
         super().__init__()
 
     def clean_url(url):
diff --git a/src/agenda_culturel/import_tasks/extractor_ggcal_link.py b/src/agenda_culturel/import_tasks/extractor_ggcal_link.py
new file mode 100644
index 0000000..092dd01
--- /dev/null
+++ b/src/agenda_culturel/import_tasks/extractor_ggcal_link.py
@@ -0,0 +1,65 @@
+from datetime import datetime
+from bs4 import BeautifulSoup
+from urllib.parse import urlparse
+
+from .extractor import *
+from .generic_extractors import *
+
+import json
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+class GoogleCalendarLinkEventExtractor(Extractor):
+    def __init__(self):
+        super().__init__()
+        self.possible_urls = ["https://calendar.google.com/calendar/", "https://addtocalendar.com/"]
+
+    def extract(
+        self, content, url, url_human=None, default_values=None, published=False
+    ):
+        # default_values are not used
+        soup = BeautifulSoup(content, "html.parser")
+
+        for ggu in self.possible_urls:
+
+            link_calendar = soup.select('a[href^="' + ggu + '"]')
+            if len(link_calendar) != 0:
+
+                gg_cal = GGCalendar(link_calendar[0]["href"])
+
+                if gg_cal.is_valid_event():
+                    start_day = gg_cal.start_day
+                    start_time = gg_cal.start_time
+                    description = gg_cal.description.replace('&nbsp;', '')
+                    end_day = gg_cal.end_day
+                    end_time = gg_cal.end_time
+                    location = gg_cal.location
+                    title = gg_cal.title
+                    url_human = url
+
+                    self.set_header(url)
+
+                    self.add_event(
+                        title=title,
+                        category=None,
+                        start_day=start_day,
+                        location=location,
+                        description=description,
+                        tags=None,
+                        uuids=[url],
+                        recurrences=None,
+                        url_human=url_human,
+                        start_time=start_time,
+                        end_day=end_day,
+                        end_time=end_time,
+                        published=published,
+                        image=None,
+                    )
+
+                    break
+
+
+        logger.warning("ça marche")
+        return self.get_structure()
\ No newline at end of file
diff --git a/src/agenda_culturel/import_tasks/generic_extractors.py b/src/agenda_culturel/import_tasks/generic_extractors.py
index 0ba6fc8..e386406 100644
--- a/src/agenda_culturel/import_tasks/generic_extractors.py
+++ b/src/agenda_culturel/import_tasks/generic_extractors.py
@@ -18,12 +18,32 @@ class GGCalendar:
         self.url = url
         self.extract_info()
 
+    def filter_keys(params):
+        result = {}
+
+        for k, v in params.items():
+            if k.startswith('e[0]'):
+                result[k.replace('e[0][', '')[:-1]] = v
+            else:
+                result[k] = v
+
+        return result
+
+    def is_valid_event(self):
+        return self.start_day is not None and self.title is not None
+
     def extract_info(self):
         parsed_url = urlparse(self.url.replace("#", "%23"))
         params = parse_qs(parsed_url.query)
 
+        params = GGCalendar.filter_keys(params)
+
+        logger.warning(self.url)
+        logger.warning(params)
+        self.location = params["location"][0] if "location" in params else None
+        self.title = params["text"][0] if "text" in params else params["title"][0] if "title" in params else None
+        self.description = params["description"][0] if "description" in params else None
         self.location = params["location"][0] if "location" in params else None
-        self.title = params["text"][0] if "text" in params else None
         if "dates" in params:
             dates = [x.replace(" ", "+") for x in params["dates"][0].split("/")]
             if len(dates) > 0:
@@ -37,7 +57,24 @@ class GGCalendar:
                 else:
                     self.end_day = None
                     self.end_time = None
+        elif "date_start" in params:
+            date = parser.parse(params["date_start"][0])
+            self.start_day = date.date()
+            self.start_time = date.time()
+            if "date_end" in params:
+                dateend = parser.parse(params["date_end"][0])
+                if dateend != date:
+                    self.end_day = dateend.date()
+                    self.end_time = dateend.time()
+                else:
+                    self.end_day = None
+                    self.end_time = None
+                    if self.start_time == datetime.time(0):
+                        self.start_time = None
 
+            else:
+                self.end_day = None
+                self.end_time = None
         else:
             raise Exception("Unable to find a date in google calendar URL")
             self.start_day = None