Amélioration de l'import des événements avec google calendar

2025-02-09 10:30:08 +01:00 · 2025-02-09 10:30:08 +01:00 · 938ece5326
commit 938ece5326
parent d9f4f74937
2 changed files with 31 additions and 6 deletions
--- a/src/agenda_culturel/import_tasks/extractor_ggcal_link.py
+++ b/src/agenda_culturel/import_tasks/extractor_ggcal_link.py
@ -14,7 +14,27 @@ logger = logging.getLogger(__name__)
 class GoogleCalendarLinkEventExtractor(Extractor):
    def __init__(self):
        super().__init__()
-        self.possible_urls = ["https://calendar.google.com/calendar/", "https://addtocalendar.com/"]
+        self.possible_urls = ["https://calendar.google.com/calendar/", "https://addtocalendar.com/", "https://www.google.com/calendar/event"]
+
+
+    def guess_image(self, soup, url):
+        image = soup.find("meta", property="og:image")
+
+        if image is None:
+            for img in soup.select('img'):
+                if img.find_parent(name='nav'):
+                    continue
+                image = img["src"]
+                break
+        else:
+            image = image["content"]
+
+        if image.startswith("/"):
+            root_url = "https://" + urlparse(url).netloc + "/"
+            image = root_url + image
+
+        return image
+

    def extract(
        self, content, url, url_human=None, default_values=None, published=False
@ -40,6 +60,8 @@ class GoogleCalendarLinkEventExtractor(Extractor):

                    self.set_header(url)

+                    image = self.guess_image(soup, url)
+
                    category = None

                    self.add_event(
@ -57,7 +79,7 @@ class GoogleCalendarLinkEventExtractor(Extractor):
                        end_day=end_day,
                        end_time=end_time,
                        published=published,
-                        image=None,
+                        image=image,
                    )

                    break
--- a/src/agenda_culturel/import_tasks/generic_extractors.py
+++ b/src/agenda_culturel/import_tasks/generic_extractors.py
@ -1,6 +1,8 @@
 from abc import abstractmethod
 from urllib.parse import urlparse
 from urllib.parse import parse_qs
+from bs4 import BeautifulSoup
+

 import logging

@ -38,10 +40,11 @@ class GGCalendar:

        params = GGCalendar.filter_keys(params)

-        self.location = params["location"][0] if "location" in params else None
-        self.title = params["text"][0] if "text" in params else params["title"][0] if "title" in params else None
-        self.description = params["description"][0] if "description" in params else None
-        self.location = params["location"][0] if "location" in params else None
+        self.location = params["location"][0] if "location" in params else ""
+        self.title = params["text"][0] if "text" in params else params["title"][0] if "title" in params else ""
+        self.description = params["description"][0] if "description" in params else params["details"][0] if "details" in params else ""
+        if self.description != "":
+            self.description = BeautifulSoup(self.description, "html.parser").text
        if "dates" in params:
            dates = [x.replace(" ", "+") for x in params["dates"][0].split("/")]
            if len(dates) > 0: