parent
b25ae9e354
commit
c1234449f7
44
experimentations/get_mobilizon.py
Normal file
44
experimentations/get_mobilizon.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# getting the name of the directory
|
||||||
|
# where the this file is present.
|
||||||
|
current = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
# Getting the parent directory name
|
||||||
|
# where the current directory is present.
|
||||||
|
parent = os.path.dirname(current)
|
||||||
|
|
||||||
|
# adding the parent directory to
|
||||||
|
# the sys.path.
|
||||||
|
sys.path.append(parent)
|
||||||
|
sys.path.append(parent + "/src")
|
||||||
|
|
||||||
|
from src.agenda_culturel.import_tasks.downloader import *
|
||||||
|
from src.agenda_culturel.import_tasks.extractor import *
|
||||||
|
from src.agenda_culturel.import_tasks.importer import *
|
||||||
|
from src.agenda_culturel.import_tasks.custom_extractors import *
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
u2e = URL2Events(SimpleDownloader(), mobilizon.CExtractor())
|
||||||
|
url = "https://mobilizon.fr/@attac63/events?"
|
||||||
|
url_human = "htthttps://mobilizon.fr/@attac63/events"
|
||||||
|
|
||||||
|
try:
|
||||||
|
events = u2e.process(url, url_human, cache = "cache-attac63.html", default_values = {}, published = True)
|
||||||
|
|
||||||
|
exportfile = "events-attac63.json"
|
||||||
|
print("Saving events to file {}".format(exportfile))
|
||||||
|
with open(exportfile, "w") as f:
|
||||||
|
json.dump(events, f, indent=4, default=str)
|
||||||
|
except Exception as e:
|
||||||
|
print("Exception: " + str(e))
|
@ -160,6 +160,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
|
|||||||
extractor = mille_formes.CExtractor()
|
extractor = mille_formes.CExtractor()
|
||||||
elif rimport.processor == RecurrentImport.PROCESSOR.AMISCERISES:
|
elif rimport.processor == RecurrentImport.PROCESSOR.AMISCERISES:
|
||||||
extractor = amisdutempsdescerises.CExtractor()
|
extractor = amisdutempsdescerises.CExtractor()
|
||||||
|
elif rimport.processor == RecurrentImport.PROCESSOR.MOBILIZON:
|
||||||
|
extractor = mobilizon.CExtractor()
|
||||||
else:
|
else:
|
||||||
extractor = None
|
extractor = None
|
||||||
|
|
||||||
|
182
src/agenda_culturel/import_tasks/custom_extractors/mobilizon.py
Normal file
182
src/agenda_culturel/import_tasks/custom_extractors/mobilizon.py
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
from ..extractor import *
|
||||||
|
import json
|
||||||
|
import dateutil.parser
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
import requests
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# A class dedicated to get events from les amis du temps des cerises
|
||||||
|
# Website https://amisdutempsdescerises.org/
|
||||||
|
class CExtractor(Extractor):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.no_downloader = True
|
||||||
|
|
||||||
|
# Source code adapted from https://framagit.org/Marc-AntoineA/mobilizon-client-python
|
||||||
|
def _request(self, body, data):
|
||||||
|
|
||||||
|
headers = {}
|
||||||
|
|
||||||
|
response = requests.post(url=self._api_end_point, json={ "query": body, "variables": data }, headers=headers)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
response_json = response.json()
|
||||||
|
if 'errors' in response_json:
|
||||||
|
raise Exception(f'Errors while requesting { body }. { str(response_json["errors"]) }')
|
||||||
|
|
||||||
|
return response_json['data']
|
||||||
|
else:
|
||||||
|
raise Exception(f'Error while requesting. Status code: { response.status_code }')
|
||||||
|
|
||||||
|
def _oncoming_events_number(self):
|
||||||
|
|
||||||
|
query = '''
|
||||||
|
query($preferredUsername: String!, $afterDatetime: DateTime) {
|
||||||
|
group(preferredUsername: $preferredUsername) {
|
||||||
|
organizedEvents(afterDatetime: $afterDatetime) {
|
||||||
|
total,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
today = datetime.now(timezone.utc).isoformat()
|
||||||
|
data = {
|
||||||
|
'preferredUsername': self._group_id,
|
||||||
|
'afterDatetime': today
|
||||||
|
}
|
||||||
|
r = self._request(query, data)
|
||||||
|
return r['group']['organizedEvents']['total']
|
||||||
|
|
||||||
|
|
||||||
|
def _oncoming_events(self):
|
||||||
|
def _oncoming_events_page(page):
|
||||||
|
query = '''
|
||||||
|
query($preferredUsername: String!, $afterDatetime: DateTime, $page: Int) {
|
||||||
|
group(preferredUsername: $preferredUsername) {
|
||||||
|
organizedEvents(afterDatetime: $afterDatetime, page: $page) {
|
||||||
|
elements {
|
||||||
|
id,
|
||||||
|
title,
|
||||||
|
url,
|
||||||
|
beginsOn,
|
||||||
|
endsOn,
|
||||||
|
options {
|
||||||
|
showStartTime,
|
||||||
|
showEndTime,
|
||||||
|
timezone
|
||||||
|
},
|
||||||
|
attributedTo {
|
||||||
|
avatar {
|
||||||
|
url,
|
||||||
|
}
|
||||||
|
name,
|
||||||
|
preferredUsername,
|
||||||
|
},
|
||||||
|
description,
|
||||||
|
onlineAddress,
|
||||||
|
physicalAddress {
|
||||||
|
locality,
|
||||||
|
description,
|
||||||
|
region
|
||||||
|
},
|
||||||
|
tags {
|
||||||
|
title,
|
||||||
|
id,
|
||||||
|
slug
|
||||||
|
},
|
||||||
|
picture {
|
||||||
|
url
|
||||||
|
},
|
||||||
|
status
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
|
||||||
|
today = datetime.now(timezone.utc).isoformat()
|
||||||
|
data = {
|
||||||
|
'preferredUsername': self._group_id,
|
||||||
|
'afterDatetime': today,
|
||||||
|
'page': page
|
||||||
|
}
|
||||||
|
r = self._request(query, data)
|
||||||
|
return r['group']['organizedEvents']['elements']
|
||||||
|
|
||||||
|
number_events = self._oncoming_events_number()
|
||||||
|
|
||||||
|
events = []
|
||||||
|
page = 1
|
||||||
|
while len(events) < number_events:
|
||||||
|
events.extend(_oncoming_events_page(page))
|
||||||
|
page += 1
|
||||||
|
return events
|
||||||
|
|
||||||
|
def extract(
|
||||||
|
self, content, url, url_human=None, default_values=None, published=False
|
||||||
|
):
|
||||||
|
self.set_header(url)
|
||||||
|
self.clear_events()
|
||||||
|
|
||||||
|
if "@" in url:
|
||||||
|
|
||||||
|
# TODO: quand on a
|
||||||
|
# https://mobilizon.fr/@xr_clermont_ferrand@mobilizon.extinctionrebellion.fr/events
|
||||||
|
# on doit retourner :
|
||||||
|
# https://mobilizon.extinctionrebellion.fr/@xr_clermont_ferrand/events
|
||||||
|
|
||||||
|
# split url to identify server url and actor id
|
||||||
|
elems = [x for x in url.split('/') if len(x) > 0 and x[0] == "@"]
|
||||||
|
if len(elems) == 1:
|
||||||
|
params = elems[0].split('@')
|
||||||
|
if len(params) == 2:
|
||||||
|
self._api_end_point = "https://" + urlparse(url).netloc + "/api"
|
||||||
|
self._group_id = params[1]
|
||||||
|
else:
|
||||||
|
self._api_end_point = "https://" + params[2] + "/api"
|
||||||
|
self._group_id = params[1]
|
||||||
|
|
||||||
|
events = self._oncoming_events()
|
||||||
|
|
||||||
|
|
||||||
|
for e in events:
|
||||||
|
title = e["title"]
|
||||||
|
event_url = e["url"]
|
||||||
|
image = e["picture"]["url"]
|
||||||
|
location = e["physicalAddress"]["description"] + ', ' + e["physicalAddress"]["locality"]
|
||||||
|
soup = BeautifulSoup(e["description"], "html.parser")
|
||||||
|
|
||||||
|
description = soup.text
|
||||||
|
start = dateutil.parser.isoparse(e["beginsOn"])
|
||||||
|
end = dateutil.parser.isoparse(e["endsOn"])
|
||||||
|
|
||||||
|
start_day = start.date()
|
||||||
|
start_time = start.time() if e["options"]["showStartTime"] else None
|
||||||
|
end_day = end.date()
|
||||||
|
end_time = end.time() if e["options"]["showEndTime"] else None
|
||||||
|
|
||||||
|
|
||||||
|
self.add_event(
|
||||||
|
default_values,
|
||||||
|
title,
|
||||||
|
None,
|
||||||
|
start_day,
|
||||||
|
location,
|
||||||
|
description,
|
||||||
|
[],
|
||||||
|
uuids=[event_url],
|
||||||
|
recurrences=None,
|
||||||
|
url_human=event_url,
|
||||||
|
start_time=start_time,
|
||||||
|
published=published,
|
||||||
|
image=image,
|
||||||
|
end_day=end_day,
|
||||||
|
end_time=end_time)
|
||||||
|
|
||||||
|
return self.get_structure()
|
@ -21,6 +21,11 @@ class Extractor(ABC):
|
|||||||
self.downloader = None
|
self.downloader = None
|
||||||
self.has_2nd_method = False
|
self.has_2nd_method = False
|
||||||
|
|
||||||
|
# defined it to true in herited classes will
|
||||||
|
# avoid the importer to use the downloader on the url
|
||||||
|
# (used for extractors that are self-sufficient)
|
||||||
|
self.no_downloader = False
|
||||||
|
|
||||||
# parameters used by the downloader to get the content
|
# parameters used by the downloader to get the content
|
||||||
self.referer = ""
|
self.referer = ""
|
||||||
self.data = None
|
self.data = None
|
||||||
|
@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 4.2.9 on 2025-02-12 14:17
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('agenda_culturel', '0146_alter_recurrentimport_processor'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='recurrentimport',
|
||||||
|
name='processor',
|
||||||
|
field=models.CharField(choices=[('ical', 'ical'), ('icalnobusy', 'ical no busy'), ('icalnovc', 'ical no VC'), ('lacoope', 'lacoope.org'), ('lacomedie', 'la comédie'), ('lefotomat', 'le fotomat'), ('lapucealoreille', "la puce à l'oreille"), ('Plugin wordpress MEC', 'Plugin wordpress MEC'), ('Facebook events', "Événements d'une page FB"), ('Billetterie CF', 'Billetterie Clermont-Ferrand'), ('arachnee', 'Arachnée concert'), ('rio', 'Le Rio'), ('raymonde', 'La Raymonde'), ('apidae', 'Agenda apidae tourisme'), ('iguana', 'Agenda iguana (médiathèques)'), ('Mille formes', 'Mille formes'), ('Amis cerises', 'Les Amis du Temps des Cerises'), ('Mobilizon', 'Mobilizon')], default='ical', max_length=20, verbose_name='Processor'),
|
||||||
|
),
|
||||||
|
]
|
@ -2135,6 +2135,7 @@ class RecurrentImport(models.Model):
|
|||||||
IGUANA = 'iguana', _('Agenda iguana (médiathèques)')
|
IGUANA = 'iguana', _('Agenda iguana (médiathèques)')
|
||||||
MILLEFORMES = 'Mille formes', _('Mille formes')
|
MILLEFORMES = 'Mille formes', _('Mille formes')
|
||||||
AMISCERISES = 'Amis cerises', _('Les Amis du Temps des Cerises')
|
AMISCERISES = 'Amis cerises', _('Les Amis du Temps des Cerises')
|
||||||
|
MOBILIZON = 'Mobilizon', _('Mobilizon')
|
||||||
|
|
||||||
class DOWNLOADER(models.TextChoices):
|
class DOWNLOADER(models.TextChoices):
|
||||||
SIMPLE = "simple", _("simple")
|
SIMPLE = "simple", _("simple")
|
||||||
|
@ -46,3 +46,4 @@ emoji==2.14.0
|
|||||||
django-honeypot==1.2.1
|
django-honeypot==1.2.1
|
||||||
django-autoslug==1.9.9
|
django-autoslug==1.9.9
|
||||||
django-debug-toolbar-template-profiler==2.1.0
|
django-debug-toolbar-template-profiler==2.1.0
|
||||||
|
requests==2.32.3
|
Loading…
x
Reference in New Issue
Block a user