diff --git a/experimentations/get_echosciences.py b/experimentations/get_echosciences.py new file mode 100755 index 0000000..a0ced38 --- /dev/null +++ b/experimentations/get_echosciences.py @@ -0,0 +1,47 @@ +#!/usr/bin/python3 +# coding: utf-8 + +# To be able to run this import out of django, you'll have to set an environment variable with the Echosciences token: +# export ECHOSCIENCES_TOKEN= + +import json +import os +import sys + +# getting the name of the directory +# where the this file is present. +current = os.path.dirname(os.path.realpath(__file__)) + +# Getting the parent directory name +# where the current directory is present. +parent = os.path.dirname(current) + +# adding the parent directory to +# the sys.path. +sys.path.append(parent) +sys.path.append(parent + "/src") + +from src.agenda_culturel.import_tasks.downloader import SimpleDownloader +from src.agenda_culturel.import_tasks.generic_extractors.echosciences import ( + CExtractor, +) +from src.agenda_culturel.import_tasks.importer import URL2Events + + +if __name__ == "__main__": + u2e = URL2Events(SimpleDownloader(), CExtractor()) + url = "https://www.echosciences-auvergne.fr" + url_human = "https://www.echosciences-auvergne.fr" + + events = u2e.process( + url, + url_human, + cache="cache-echosciences.html", + default_values={}, + published=True, + ) + + exportfile = "events-echosciences.json" + print("Saving events to file {}".format(exportfile)) + with open(exportfile, "w") as f: + json.dump(events, f, indent=4, default=str) diff --git a/src/agenda_culturel/celery.py b/src/agenda_culturel/celery.py index 258352d..900cb10 100644 --- a/src/agenda_culturel/celery.py +++ b/src/agenda_culturel/celery.py @@ -38,6 +38,7 @@ from .import_tasks.generic_extractors import ( mobilizon, ical, fbevents, + echosciences, ) from .import_tasks.importer import URL2Events @@ -207,6 +208,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id): extractor = mobilizon.CExtractor() elif rimport.processor == RecurrentImport.PROCESSOR.LECAMELEON: extractor = lecameleon.CExtractor() + elif rimport.processor == RecurrentImport.PROCESSOR.ECHOSCIENCES: + extractor = echosciences.CExtractor() else: extractor = None diff --git a/src/agenda_culturel/import_tasks/generic_extractors/echosciences.py b/src/agenda_culturel/import_tasks/generic_extractors/echosciences.py new file mode 100644 index 0000000..72d80ea --- /dev/null +++ b/src/agenda_culturel/import_tasks/generic_extractors/echosciences.py @@ -0,0 +1,131 @@ +import logging +from datetime import datetime, timedelta +from django.conf import settings +import dateutil.parser +import requests +from bs4 import BeautifulSoup + +from ..extractor import Extractor + +logger = logging.getLogger(__name__) + + +# A class dedicated to get events from echosciences +class CExtractor(Extractor): + events_api = "/open_api/v1/events" + event_api = "/open_api/v1/events/" + + def __init__(self): + super().__init__() + self.no_downloader = True + + def token_available(self): + return settings.ECHOSCIENCES_TOKEN != "" + + def _get_data(self, url, params={}): + headers = {"Authorization": "Token token=" + settings.ECHOSCIENCES_TOKEN} + + response = requests.get( + url=url, + json=params, + headers=headers, + ) + + if response.status_code == 200: + response_json = response.json() + if "errors" in response_json: + raise Exception( + f"Errors while requesting {url}. {str(response_json['error'])}" + ) + + return response_json + else: + raise Exception( + f"Error while requesting. Status code: {response.status_code}" + ) + + def _oncoming_events(self, url): + result = [] + + for i in range(1, 200): + r = self._get_data( + url + CExtractor.events_api, + { + "per_page": 20, + "page": i, + "order_by": "start_date", + "order_type": "asc", + "start_date": datetime.today().strftime("%Y-%m-%d"), + "end_date": (datetime.today() + timedelta(weeks=30)).strftime( + "%Y-%m-%d" + ), + }, + ) + if len(r) == 0: + break + result = result + r + + return result + + def _get_event(self, url, slug): + return self._get_data(url + CExtractor.event_api + slug) + + def extract( + self, + content, + url, + url_human=None, + default_values=None, + published=False, + ): + + self.set_header(url) + self.clear_events() + + if not self.token_available(): + raise Exception("Token not available") + + events = self._oncoming_events(url) + + for e in events: + event = self._get_event(url, e["slug"]) + + title = event["name"] + event_url = event["url"] + image = event["image"]["url"] + + location = [] + if "place" in event and event["place"] is not None: + location.append(event["place"]["name"]) + location.append(event["address"]) + location = ", ".join(location) + soup = BeautifulSoup(event["body"], "html.parser") + + description = soup.get_text() + start = dateutil.parser.isoparse(event["start_date"]) + end = dateutil.parser.isoparse(event["end_date"]) + + start_day = start.date() + start_time = start.time() + end_day = end.date() + end_time = end.time() + + self.add_event( + default_values, + title, + None, + start_day, + location, + description, + [], + uuids=[event_url], + recurrences=None, + url_human=event_url, + start_time=start_time, + published=published, + image=image, + end_day=end_day, + end_time=end_time, + ) + + return self.get_structure() diff --git a/src/agenda_culturel/import_tasks/generic_extractors/mobilizon.py b/src/agenda_culturel/import_tasks/generic_extractors/mobilizon.py index c5806d0..ae418f2 100644 --- a/src/agenda_culturel/import_tasks/generic_extractors/mobilizon.py +++ b/src/agenda_culturel/import_tasks/generic_extractors/mobilizon.py @@ -11,8 +11,7 @@ from ..extractor import Extractor logger = logging.getLogger(__name__) -# A class dedicated to get events from les amis du temps des cerises -# Website https://amisdutempsdescerises.org/ +# A class dedicated to get events from Mobilizon class CExtractor(Extractor): def __init__(self): super().__init__() diff --git a/src/agenda_culturel/migrations/0164_alter_recurrentimport_processor.py b/src/agenda_culturel/migrations/0164_alter_recurrentimport_processor.py new file mode 100644 index 0000000..da575b9 --- /dev/null +++ b/src/agenda_culturel/migrations/0164_alter_recurrentimport_processor.py @@ -0,0 +1,45 @@ +# Generated by Django 4.2.19 on 2025-04-04 00:24 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("agenda_culturel", "0163_alter_tag_name"), + ] + + operations = [ + migrations.AlterField( + model_name="recurrentimport", + name="processor", + field=models.CharField( + choices=[ + ("ical", "ical"), + ("icalnobusy", "ical no busy"), + ("icalnovc", "ical no VC"), + ("ical naive tz", "ical naive timezone"), + ("lacoope", "lacoope.org"), + ("lacomedie", "la comédie"), + ("lefotomat", "le fotomat"), + ("lapucealoreille", "la puce à l'oreille"), + ("Plugin wordpress MEC", "Plugin wordpress MEC"), + ("Facebook events", "Événements d'une page FB"), + ("Billetterie CF", "Billetterie Clermont-Ferrand"), + ("arachnee", "Arachnée concert"), + ("rio", "Le Rio"), + ("raymonde", "La Raymonde"), + ("apidae", "Agenda apidae tourisme"), + ("iguana", "Agenda iguana (médiathèques)"), + ("Mille formes", "Mille formes"), + ("Amis cerises", "Les Amis du Temps des Cerises"), + ("Mobilizon", "Mobilizon"), + ("Le Caméléon", "Le caméléon"), + ("Echosciences", "Echosciences"), + ], + default="ical", + max_length=20, + verbose_name="Processor", + ), + ), + ] diff --git a/src/agenda_culturel/models.py b/src/agenda_culturel/models.py index 83c5265..8c0025e 100644 --- a/src/agenda_culturel/models.py +++ b/src/agenda_culturel/models.py @@ -2817,6 +2817,7 @@ class RecurrentImport(models.Model): AMISCERISES = "Amis cerises", _("Les Amis du Temps des Cerises") MOBILIZON = "Mobilizon", _("Mobilizon") LECAMELEON = "Le Caméléon", _("Le caméléon") + ECHOSCIENCES = "Echosciences", _("Echosciences") class DOWNLOADER(models.TextChoices): SIMPLE = "simple", _("simple") diff --git a/src/agenda_culturel/settings/base.py b/src/agenda_culturel/settings/base.py index fb61dd2..48423f2 100644 --- a/src/agenda_culturel/settings/base.py +++ b/src/agenda_culturel/settings/base.py @@ -35,6 +35,7 @@ ADMINS = [tuple(a.split(",")) for a in os_getenv("ADMINS", "").split(";")] MANAGERS = [tuple(a.split(",")) for a in os_getenv("MANAGERS", "").split(";")] SERVER_EMAIL = os_getenv("SERVER_EMAIL", "") +ECHOSCIENCES_TOKEN = os_getenv("ECHOSCIENCES_TOKEN", "") # Application definition