diff --git a/experimentations/get_mobilizon.py b/experimentations/get_mobilizon.py new file mode 100644 index 0000000..6e3db53 --- /dev/null +++ b/experimentations/get_mobilizon.py @@ -0,0 +1,44 @@ +#!/usr/bin/python3 +# coding: utf-8 + +import os +import json +import sys + +# getting the name of the directory +# where the this file is present. +current = os.path.dirname(os.path.realpath(__file__)) + +# Getting the parent directory name +# where the current directory is present. +parent = os.path.dirname(current) + +# adding the parent directory to +# the sys.path. +sys.path.append(parent) +sys.path.append(parent + "/src") + +from src.agenda_culturel.import_tasks.downloader import * +from src.agenda_culturel.import_tasks.extractor import * +from src.agenda_culturel.import_tasks.importer import * +from src.agenda_culturel.import_tasks.custom_extractors import * + + + + + +if __name__ == "__main__": + + u2e = URL2Events(SimpleDownloader(), mobilizon.CExtractor()) + url = "https://mobilizon.fr/@attac63/events?" + url_human = "htthttps://mobilizon.fr/@attac63/events" + + try: + events = u2e.process(url, url_human, cache = "cache-attac63.html", default_values = {}, published = True) + + exportfile = "events-attac63.json" + print("Saving events to file {}".format(exportfile)) + with open(exportfile, "w") as f: + json.dump(events, f, indent=4, default=str) + except Exception as e: + print("Exception: " + str(e)) diff --git a/src/agenda_culturel/celery.py b/src/agenda_culturel/celery.py index 395d871..67b8183 100644 --- a/src/agenda_culturel/celery.py +++ b/src/agenda_culturel/celery.py @@ -160,6 +160,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id): extractor = mille_formes.CExtractor() elif rimport.processor == RecurrentImport.PROCESSOR.AMISCERISES: extractor = amisdutempsdescerises.CExtractor() + elif rimport.processor == RecurrentImport.PROCESSOR.MOBILIZON: + extractor = mobilizon.CExtractor() else: extractor = None diff --git a/src/agenda_culturel/import_tasks/custom_extractors/mobilizon.py b/src/agenda_culturel/import_tasks/custom_extractors/mobilizon.py new file mode 100644 index 0000000..debfcd8 --- /dev/null +++ b/src/agenda_culturel/import_tasks/custom_extractors/mobilizon.py @@ -0,0 +1,182 @@ +from ..extractor import * +import json +import dateutil.parser +from datetime import datetime, timezone +import requests +from urllib.parse import urlparse +from bs4 import BeautifulSoup + +import logging + +logger = logging.getLogger(__name__) + +# A class dedicated to get events from les amis du temps des cerises +# Website https://amisdutempsdescerises.org/ +class CExtractor(Extractor): + + def __init__(self): + super().__init__() + self.no_downloader = True + + # Source code adapted from https://framagit.org/Marc-AntoineA/mobilizon-client-python + def _request(self, body, data): + + headers = {} + + response = requests.post(url=self._api_end_point, json={ "query": body, "variables": data }, headers=headers) + + if response.status_code == 200: + response_json = response.json() + if 'errors' in response_json: + raise Exception(f'Errors while requesting { body }. { str(response_json["errors"]) }') + + return response_json['data'] + else: + raise Exception(f'Error while requesting. Status code: { response.status_code }') + + def _oncoming_events_number(self): + + query = ''' +query($preferredUsername: String!, $afterDatetime: DateTime) { + group(preferredUsername: $preferredUsername) { + organizedEvents(afterDatetime: $afterDatetime) { + total, + } + } +} + ''' + today = datetime.now(timezone.utc).isoformat() + data = { + 'preferredUsername': self._group_id, + 'afterDatetime': today + } + r = self._request(query, data) + return r['group']['organizedEvents']['total'] + + + def _oncoming_events(self): + def _oncoming_events_page(page): + query = ''' +query($preferredUsername: String!, $afterDatetime: DateTime, $page: Int) { + group(preferredUsername: $preferredUsername) { + organizedEvents(afterDatetime: $afterDatetime, page: $page) { + elements { + id, + title, + url, + beginsOn, + endsOn, + options { + showStartTime, + showEndTime, + timezone + }, + attributedTo { + avatar { + url, + } + name, + preferredUsername, + }, + description, + onlineAddress, + physicalAddress { + locality, + description, + region + }, + tags { + title, + id, + slug + }, + picture { + url + }, + status + } + } + } +} + ''' + + today = datetime.now(timezone.utc).isoformat() + data = { + 'preferredUsername': self._group_id, + 'afterDatetime': today, + 'page': page + } + r = self._request(query, data) + return r['group']['organizedEvents']['elements'] + + number_events = self._oncoming_events_number() + + events = [] + page = 1 + while len(events) < number_events: + events.extend(_oncoming_events_page(page)) + page += 1 + return events + + def extract( + self, content, url, url_human=None, default_values=None, published=False + ): + self.set_header(url) + self.clear_events() + + if "@" in url: + + # TODO: quand on a + # https://mobilizon.fr/@xr_clermont_ferrand@mobilizon.extinctionrebellion.fr/events + # on doit retourner : + # https://mobilizon.extinctionrebellion.fr/@xr_clermont_ferrand/events + + # split url to identify server url and actor id + elems = [x for x in url.split('/') if len(x) > 0 and x[0] == "@"] + if len(elems) == 1: + params = elems[0].split('@') + if len(params) == 2: + self._api_end_point = "https://" + urlparse(url).netloc + "/api" + self._group_id = params[1] + else: + self._api_end_point = "https://" + params[2] + "/api" + self._group_id = params[1] + + events = self._oncoming_events() + + + for e in events: + title = e["title"] + event_url = e["url"] + image = e["picture"]["url"] + location = e["physicalAddress"]["description"] + ', ' + e["physicalAddress"]["locality"] + soup = BeautifulSoup(e["description"], "html.parser") + + description = soup.text + start = dateutil.parser.isoparse(e["beginsOn"]) + end = dateutil.parser.isoparse(e["endsOn"]) + + start_day = start.date() + start_time = start.time() if e["options"]["showStartTime"] else None + end_day = end.date() + end_time = end.time() if e["options"]["showEndTime"] else None + + + self.add_event( + default_values, + title, + None, + start_day, + location, + description, + [], + uuids=[event_url], + recurrences=None, + url_human=event_url, + start_time=start_time, + published=published, + image=image, + end_day=end_day, + end_time=end_time) + + return self.get_structure() diff --git a/src/agenda_culturel/import_tasks/extractor.py b/src/agenda_culturel/import_tasks/extractor.py index dceb7ec..a73ae22 100644 --- a/src/agenda_culturel/import_tasks/extractor.py +++ b/src/agenda_culturel/import_tasks/extractor.py @@ -20,6 +20,11 @@ class Extractor(ABC): self.events = [] self.downloader = None self.has_2nd_method = False + + # defined it to true in herited classes will + # avoid the importer to use the downloader on the url + # (used for extractors that are self-sufficient) + self.no_downloader = False # parameters used by the downloader to get the content self.referer = "" diff --git a/src/agenda_culturel/migrations/0147_alter_recurrentimport_processor.py b/src/agenda_culturel/migrations/0147_alter_recurrentimport_processor.py new file mode 100644 index 0000000..5971ae4 --- /dev/null +++ b/src/agenda_culturel/migrations/0147_alter_recurrentimport_processor.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.9 on 2025-02-12 14:17 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('agenda_culturel', '0146_alter_recurrentimport_processor'), + ] + + operations = [ + migrations.AlterField( + model_name='recurrentimport', + name='processor', + field=models.CharField(choices=[('ical', 'ical'), ('icalnobusy', 'ical no busy'), ('icalnovc', 'ical no VC'), ('lacoope', 'lacoope.org'), ('lacomedie', 'la comédie'), ('lefotomat', 'le fotomat'), ('lapucealoreille', "la puce à l'oreille"), ('Plugin wordpress MEC', 'Plugin wordpress MEC'), ('Facebook events', "Événements d'une page FB"), ('Billetterie CF', 'Billetterie Clermont-Ferrand'), ('arachnee', 'Arachnée concert'), ('rio', 'Le Rio'), ('raymonde', 'La Raymonde'), ('apidae', 'Agenda apidae tourisme'), ('iguana', 'Agenda iguana (médiathèques)'), ('Mille formes', 'Mille formes'), ('Amis cerises', 'Les Amis du Temps des Cerises'), ('Mobilizon', 'Mobilizon')], default='ical', max_length=20, verbose_name='Processor'), + ), + ] diff --git a/src/agenda_culturel/models.py b/src/agenda_culturel/models.py index 3e2dbee..651ad98 100644 --- a/src/agenda_culturel/models.py +++ b/src/agenda_culturel/models.py @@ -2135,6 +2135,7 @@ class RecurrentImport(models.Model): IGUANA = 'iguana', _('Agenda iguana (médiathèques)') MILLEFORMES = 'Mille formes', _('Mille formes') AMISCERISES = 'Amis cerises', _('Les Amis du Temps des Cerises') + MOBILIZON = 'Mobilizon', _('Mobilizon') class DOWNLOADER(models.TextChoices): SIMPLE = "simple", _("simple") diff --git a/src/requirements.txt b/src/requirements.txt index a677f0b..2b82b17 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -45,4 +45,5 @@ django-cache-cleaner==0.1.0 emoji==2.14.0 django-honeypot==1.2.1 django-autoslug==1.9.9 -django-debug-toolbar-template-profiler==2.1.0 \ No newline at end of file +django-debug-toolbar-template-profiler==2.1.0 +requests==2.32.3 \ No newline at end of file