From 938ece53262be1054a46b16f0a5a25eed6234847 Mon Sep 17 00:00:00 2001
From: Jean-Marie Favreau <j-marie.favreau@uca.fr>
Date: Sun, 9 Feb 2025 10:30:08 +0100
Subject: [PATCH] =?UTF-8?q?Am=C3=A9lioration=20de=20l'import=20des=20?=
 =?UTF-8?q?=C3=A9v=C3=A9nements=20avec=20google=20calendar?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../import_tasks/extractor_ggcal_link.py      | 26 +++++++++++++++++--
 .../import_tasks/generic_extractors.py        | 11 +++++---
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/src/agenda_culturel/import_tasks/extractor_ggcal_link.py b/src/agenda_culturel/import_tasks/extractor_ggcal_link.py
index da5344e..5ce6ead 100644
--- a/src/agenda_culturel/import_tasks/extractor_ggcal_link.py
+++ b/src/agenda_culturel/import_tasks/extractor_ggcal_link.py
@@ -14,7 +14,27 @@ logger = logging.getLogger(__name__)
 class GoogleCalendarLinkEventExtractor(Extractor):
     def __init__(self):
         super().__init__()
-        self.possible_urls = ["https://calendar.google.com/calendar/", "https://addtocalendar.com/"]
+        self.possible_urls = ["https://calendar.google.com/calendar/", "https://addtocalendar.com/", "https://www.google.com/calendar/event"]
+
+
+    def guess_image(self, soup, url):
+        image = soup.find("meta", property="og:image")
+
+        if image is None:
+            for img in soup.select('img'):
+                if img.find_parent(name='nav'):
+                    continue
+                image = img["src"]
+                break
+        else:
+            image = image["content"]
+
+        if image.startswith("/"):
+            root_url = "https://" + urlparse(url).netloc + "/"
+            image = root_url + image
+
+        return image
+
 
     def extract(
         self, content, url, url_human=None, default_values=None, published=False
@@ -40,6 +60,8 @@ class GoogleCalendarLinkEventExtractor(Extractor):
 
                     self.set_header(url)
 
+                    image = self.guess_image(soup, url)
+
                     category = None
 
                     self.add_event(
@@ -57,7 +79,7 @@ class GoogleCalendarLinkEventExtractor(Extractor):
                         end_day=end_day,
                         end_time=end_time,
                         published=published,
-                        image=None,
+                        image=image,
                     )
 
                     break
diff --git a/src/agenda_culturel/import_tasks/generic_extractors.py b/src/agenda_culturel/import_tasks/generic_extractors.py
index 6380e3f..532c2ef 100644
--- a/src/agenda_culturel/import_tasks/generic_extractors.py
+++ b/src/agenda_culturel/import_tasks/generic_extractors.py
@@ -1,6 +1,8 @@
 from abc import abstractmethod
 from urllib.parse import urlparse
 from urllib.parse import parse_qs
+from bs4 import BeautifulSoup
+
 
 import logging
 
@@ -38,10 +40,11 @@ class GGCalendar:
 
         params = GGCalendar.filter_keys(params)
 
-        self.location = params["location"][0] if "location" in params else None
-        self.title = params["text"][0] if "text" in params else params["title"][0] if "title" in params else None
-        self.description = params["description"][0] if "description" in params else None
-        self.location = params["location"][0] if "location" in params else None
+        self.location = params["location"][0] if "location" in params else ""
+        self.title = params["text"][0] if "text" in params else params["title"][0] if "title" in params else ""
+        self.description = params["description"][0] if "description" in params else params["details"][0] if "details" in params else ""
+        if self.description != "":
+            self.description = BeautifulSoup(self.description, "html.parser").text
         if "dates" in params:
             dates = [x.replace(" ", "+") for x in params["dates"][0].split("/")]
             if len(dates) > 0: