Ajout import la Raymonde

Fix #234
2025-01-18 12:23:48 +01:00
parent 0ff5dbe917
commit 15aa712199
5 changed files with 114 additions and 1 deletions
--- a/experimentations/get_la_raymonde.py
+++ b/experimentations/get_la_raymonde.py
@@ -0,0 +1,44 @@
+#!/usr/bin/python3
+# coding: utf-8
+
+import os
+import json
+import sys
+
+# getting the name of the directory
+# where the this file is present.
+current = os.path.dirname(os.path.realpath(__file__))
+ 
+# Getting the parent directory name
+# where the current directory is present.
+parent = os.path.dirname(current)
+ 
+# adding the parent directory to 
+# the sys.path.
+sys.path.append(parent)
+sys.path.append(parent + "/src")
+
+from src.agenda_culturel.import_tasks.downloader import *
+from src.agenda_culturel.import_tasks.extractor import *
+from src.agenda_culturel.import_tasks.importer import *
+from src.agenda_culturel.import_tasks.custom_extractors import *
+
+
+
+
+
+if __name__ == "__main__":
+
+    u2e = URL2Events(SimpleDownloader(), laraymonde.CExtractor())
+    url = "https://www.raymondbar.net/"
+    url_human = "https://www.raymondbar.net/"
+
+    try:
+        events = u2e.process(url, url_human, cache = "cache-la-raymonde.html", default_values = {"location": "La Raymonde", "category": "Fêtes & Concerts"}, published = True)
+
+        exportfile = "events-la-raymonde.json"
+        print("Saving events to file {}".format(exportfile))
+        with open(exportfile, "w") as f:
+            json.dump(events, f, indent=4, default=str)
+    except Exception as e:
+        print("Exception: " + str(e))
--- a/src/agenda_culturel/celery.py
+++ b/src/agenda_culturel/celery.py
@@ -150,6 +150,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
        extractor = arachnee.CExtractor()
    elif rimport.processor == RecurrentImport.PROCESSOR.LERIO:
        extractor = lerio.CExtractor()
+    elif rimport.processor == RecurrentImport.PROCESSOR.LARAYMONDE:
+        extractor = laraymonde.CExtractor()
    else:
        extractor = None

--- a/src/agenda_culturel/import_tasks/custom_extractors/laraymonde.py
+++ b/src/agenda_culturel/import_tasks/custom_extractors/laraymonde.py
@@ -0,0 +1,67 @@
+from ..generic_extractors import *
+from bs4 import BeautifulSoup
+from datetime import datetime
+
+# A class dedicated to get events from Raymond Bar
+# URL: https://www.raymondbar.net/
+class CExtractor(TwoStepsExtractorNoPause):
+
+    def __init__(self):
+        super().__init__()
+
+    def build_event_url_list(self, content, infuture_days=180):
+        
+        soup = BeautifulSoup(content, "html.parser")
+
+        links = soup.select(".showsList .showMore")
+        if links:
+            for l in links:
+                print(l["href"])
+                self.add_event_url(l["href"])
+
+
+    def add_event_from_content(
+        self,
+        event_content,
+        event_url,
+        url_human=None,
+        default_values=None,
+        published=False,
+    ):
+        soup = BeautifulSoup(event_content, "html.parser")
+        
+        title = soup.select_one(".showDesc h4 a.summary").text
+        start_day = soup.select_one(".showDate .value-title")
+
+        if not start_day is None:
+            start_day = start_day["title"]
+            if not start_day is None:
+                start_day = start_day.split("T")[0]
+        if start_day is None:
+            print("impossible de récupérer la date")
+            return
+        description = soup.select_one('.showDetails.description').text
+        image = soup.select('.showDetails.description img')
+        if not image is None:
+            image_alt = image[-1]["alt"]
+            image = image[-1]["src"]
+
+        self.add_event_with_props(
+                    default_values,
+                    event_url,
+                    title,
+                    None,
+                    start_day,
+                    None,
+                    description,
+                    [],
+                    recurrences=None,
+                    uuids=[event_url],
+                    url_human=event_url,
+                    start_time=None,
+                    end_day=None,
+                    end_time=None,
+                    published=published,
+                    image=image,
+                    image_alt=image_alt
+                )
--- a/src/agenda_culturel/import_tasks/custom_extractors/lerio.py
+++ b/src/agenda_culturel/import_tasks/custom_extractors/lerio.py
@@ -18,7 +18,6 @@ class CExtractor(TwoStepsExtractorNoPause):
        links = soup.select("td.seance_link a")
        if links:
            for l in links:
-                print(l["href"])
                self.add_event_url(l["href"])

    def to_text_select_one(soup, filter):
--- a/src/agenda_culturel/models.py
+++ b/src/agenda_culturel/models.py
@@ -2009,6 +2009,7 @@ class RecurrentImport(models.Model):
        C3C = "cour3coquins", _("la cour des 3 coquins")
        ARACHNEE = "arachnee", _("Arachnée concert")
        LERIO = "rio", _('Le Rio')
+        LARAYMONDE = "raymonde", _('La Raymonde')

    class DOWNLOADER(models.TextChoices):
        SIMPLE = "simple", _("simple")