Implémentation de l'import echosciences auvergne

2025-04-04 00:43:32 +02:00 · 2025-04-04 00:43:32 +02:00 · b173baa76c
commit b173baa76c
parent 9ee1975528
7 changed files with 229 additions and 2 deletions
--- a/experimentations/get_echosciences.py
+++ b/experimentations/get_echosciences.py
@ -0,0 +1,47 @@
+#!/usr/bin/python3
+# coding: utf-8
+
+# To be able to run this import out of django, you'll have to set an environment variable with the Echosciences token:
+# export ECHOSCIENCES_TOKEN=<your token>
+
+import json
+import os
+import sys
+
+# getting the name of the directory
+# where the this file is present.
+current = os.path.dirname(os.path.realpath(__file__))
+
+# Getting the parent directory name
+# where the current directory is present.
+parent = os.path.dirname(current)
+
+# adding the parent directory to
+# the sys.path.
+sys.path.append(parent)
+sys.path.append(parent + "/src")
+
+from src.agenda_culturel.import_tasks.downloader import SimpleDownloader
+from src.agenda_culturel.import_tasks.generic_extractors.echosciences import (
+    CExtractor,
+)
+from src.agenda_culturel.import_tasks.importer import URL2Events
+
+
+if __name__ == "__main__":
+    u2e = URL2Events(SimpleDownloader(), CExtractor())
+    url = "https://www.echosciences-auvergne.fr"
+    url_human = "https://www.echosciences-auvergne.fr"
+
+    events = u2e.process(
+        url,
+        url_human,
+        cache="cache-echosciences.html",
+        default_values={},
+        published=True,
+    )
+
+    exportfile = "events-echosciences.json"
+    print("Saving events to file {}".format(exportfile))
+    with open(exportfile, "w") as f:
+        json.dump(events, f, indent=4, default=str)
--- a/src/agenda_culturel/celery.py
+++ b/src/agenda_culturel/celery.py
@ -38,6 +38,7 @@ from .import_tasks.generic_extractors import (
    mobilizon,
    ical,
    fbevents,
+    echosciences,
 )
 from .import_tasks.importer import URL2Events

@ -207,6 +208,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
        extractor = mobilizon.CExtractor()
    elif rimport.processor == RecurrentImport.PROCESSOR.LECAMELEON:
        extractor = lecameleon.CExtractor()
+    elif rimport.processor == RecurrentImport.PROCESSOR.ECHOSCIENCES:
+        extractor = echosciences.CExtractor()
    else:
        extractor = None

--- a/src/agenda_culturel/import_tasks/generic_extractors/echosciences.py
+++ b/src/agenda_culturel/import_tasks/generic_extractors/echosciences.py
@ -0,0 +1,131 @@
+import logging
+from datetime import datetime, timedelta
+from django.conf import settings
+import dateutil.parser
+import requests
+from bs4 import BeautifulSoup
+
+from ..extractor import Extractor
+
+logger = logging.getLogger(__name__)
+
+
+# A class dedicated to get events from echosciences
+class CExtractor(Extractor):
+    events_api = "/open_api/v1/events"
+    event_api = "/open_api/v1/events/"
+
+    def __init__(self):
+        super().__init__()
+        self.no_downloader = True
+
+    def token_available(self):
+        return settings.ECHOSCIENCES_TOKEN != ""
+
+    def _get_data(self, url, params={}):
+        headers = {"Authorization": "Token token=" + settings.ECHOSCIENCES_TOKEN}
+
+        response = requests.get(
+            url=url,
+            json=params,
+            headers=headers,
+        )
+
+        if response.status_code == 200:
+            response_json = response.json()
+            if "errors" in response_json:
+                raise Exception(
+                    f"Errors while requesting {url}. {str(response_json['error'])}"
+                )
+
+            return response_json
+        else:
+            raise Exception(
+                f"Error while requesting. Status code: {response.status_code}"
+            )
+
+    def _oncoming_events(self, url):
+        result = []
+
+        for i in range(1, 200):
+            r = self._get_data(
+                url + CExtractor.events_api,
+                {
+                    "per_page": 20,
+                    "page": i,
+                    "order_by": "start_date",
+                    "order_type": "asc",
+                    "start_date": datetime.today().strftime("%Y-%m-%d"),
+                    "end_date": (datetime.today() + timedelta(weeks=30)).strftime(
+                        "%Y-%m-%d"
+                    ),
+                },
+            )
+            if len(r) == 0:
+                break
+            result = result + r
+
+        return result
+
+    def _get_event(self, url, slug):
+        return self._get_data(url + CExtractor.event_api + slug)
+
+    def extract(
+        self,
+        content,
+        url,
+        url_human=None,
+        default_values=None,
+        published=False,
+    ):
+
+        self.set_header(url)
+        self.clear_events()
+
+        if not self.token_available():
+            raise Exception("Token not available")
+
+        events = self._oncoming_events(url)
+
+        for e in events:
+            event = self._get_event(url, e["slug"])
+
+            title = event["name"]
+            event_url = event["url"]
+            image = event["image"]["url"]
+
+            location = []
+            if "place" in event and event["place"] is not None:
+                location.append(event["place"]["name"])
+            location.append(event["address"])
+            location = ", ".join(location)
+            soup = BeautifulSoup(event["body"], "html.parser")
+
+            description = soup.get_text()
+            start = dateutil.parser.isoparse(event["start_date"])
+            end = dateutil.parser.isoparse(event["end_date"])
+
+            start_day = start.date()
+            start_time = start.time()
+            end_day = end.date()
+            end_time = end.time()
+
+            self.add_event(
+                default_values,
+                title,
+                None,
+                start_day,
+                location,
+                description,
+                [],
+                uuids=[event_url],
+                recurrences=None,
+                url_human=event_url,
+                start_time=start_time,
+                published=published,
+                image=image,
+                end_day=end_day,
+                end_time=end_time,
+            )
+
+        return self.get_structure()
--- a/src/agenda_culturel/import_tasks/generic_extractors/mobilizon.py
+++ b/src/agenda_culturel/import_tasks/generic_extractors/mobilizon.py
@ -11,8 +11,7 @@ from ..extractor import Extractor
 logger = logging.getLogger(__name__)


-# A class dedicated to get events from les amis du temps des cerises
-# Website https://amisdutempsdescerises.org/
+# A class dedicated to get events from Mobilizon
 class CExtractor(Extractor):
    def __init__(self):
        super().__init__()
--- a/src/agenda_culturel/migrations/0164_alter_recurrentimport_processor.py
+++ b/src/agenda_culturel/migrations/0164_alter_recurrentimport_processor.py
@ -0,0 +1,45 @@
+# Generated by Django 4.2.19 on 2025-04-04 00:24
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("agenda_culturel", "0163_alter_tag_name"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="recurrentimport",
+            name="processor",
+            field=models.CharField(
+                choices=[
+                    ("ical", "ical"),
+                    ("icalnobusy", "ical no busy"),
+                    ("icalnovc", "ical no VC"),
+                    ("ical naive tz", "ical naive timezone"),
+                    ("lacoope", "lacoope.org"),
+                    ("lacomedie", "la comédie"),
+                    ("lefotomat", "le fotomat"),
+                    ("lapucealoreille", "la puce à l'oreille"),
+                    ("Plugin wordpress MEC", "Plugin wordpress MEC"),
+                    ("Facebook events", "Événements d'une page FB"),
+                    ("Billetterie CF", "Billetterie Clermont-Ferrand"),
+                    ("arachnee", "Arachnée concert"),
+                    ("rio", "Le Rio"),
+                    ("raymonde", "La Raymonde"),
+                    ("apidae", "Agenda apidae tourisme"),
+                    ("iguana", "Agenda iguana (médiathèques)"),
+                    ("Mille formes", "Mille formes"),
+                    ("Amis cerises", "Les Amis du Temps des Cerises"),
+                    ("Mobilizon", "Mobilizon"),
+                    ("Le Caméléon", "Le caméléon"),
+                    ("Echosciences", "Echosciences"),
+                ],
+                default="ical",
+                max_length=20,
+                verbose_name="Processor",
+            ),
+        ),
+    ]
--- a/src/agenda_culturel/models.py
+++ b/src/agenda_culturel/models.py
@ -2817,6 +2817,7 @@ class RecurrentImport(models.Model):
        AMISCERISES = "Amis cerises", _("Les Amis du Temps des Cerises")
        MOBILIZON = "Mobilizon", _("Mobilizon")
        LECAMELEON = "Le Caméléon", _("Le caméléon")
+        ECHOSCIENCES = "Echosciences", _("Echosciences")

    class DOWNLOADER(models.TextChoices):
        SIMPLE = "simple", _("simple")
--- a/src/agenda_culturel/settings/base.py
+++ b/src/agenda_culturel/settings/base.py
@ -35,6 +35,7 @@ ADMINS = [tuple(a.split(",")) for a in os_getenv("ADMINS", "").split(";")]
 MANAGERS = [tuple(a.split(",")) for a in os_getenv("MANAGERS", "").split(";")]
 SERVER_EMAIL = os_getenv("SERVER_EMAIL", "")

+ECHOSCIENCES_TOKEN = os_getenv("ECHOSCIENCES_TOKEN", "")

 # Application definition