Implémentation de l'import echosciences auvergne

This commit is contained in:
Jean-Marie Favreau 2025-04-04 00:43:32 +02:00
parent 9ee1975528
commit b173baa76c
7 changed files with 229 additions and 2 deletions

View File

@ -0,0 +1,47 @@
#!/usr/bin/python3
# coding: utf-8
# To be able to run this import out of django, you'll have to set an environment variable with the Echosciences token:
# export ECHOSCIENCES_TOKEN=<your token>
import json
import os
import sys
# getting the name of the directory
# where the this file is present.
current = os.path.dirname(os.path.realpath(__file__))
# Getting the parent directory name
# where the current directory is present.
parent = os.path.dirname(current)
# adding the parent directory to
# the sys.path.
sys.path.append(parent)
sys.path.append(parent + "/src")
from src.agenda_culturel.import_tasks.downloader import SimpleDownloader
from src.agenda_culturel.import_tasks.generic_extractors.echosciences import (
CExtractor,
)
from src.agenda_culturel.import_tasks.importer import URL2Events
if __name__ == "__main__":
u2e = URL2Events(SimpleDownloader(), CExtractor())
url = "https://www.echosciences-auvergne.fr"
url_human = "https://www.echosciences-auvergne.fr"
events = u2e.process(
url,
url_human,
cache="cache-echosciences.html",
default_values={},
published=True,
)
exportfile = "events-echosciences.json"
print("Saving events to file {}".format(exportfile))
with open(exportfile, "w") as f:
json.dump(events, f, indent=4, default=str)

View File

@ -38,6 +38,7 @@ from .import_tasks.generic_extractors import (
mobilizon,
ical,
fbevents,
echosciences,
)
from .import_tasks.importer import URL2Events
@ -207,6 +208,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
extractor = mobilizon.CExtractor()
elif rimport.processor == RecurrentImport.PROCESSOR.LECAMELEON:
extractor = lecameleon.CExtractor()
elif rimport.processor == RecurrentImport.PROCESSOR.ECHOSCIENCES:
extractor = echosciences.CExtractor()
else:
extractor = None

View File

@ -0,0 +1,131 @@
import logging
from datetime import datetime, timedelta
from django.conf import settings
import dateutil.parser
import requests
from bs4 import BeautifulSoup
from ..extractor import Extractor
logger = logging.getLogger(__name__)
# A class dedicated to get events from echosciences
class CExtractor(Extractor):
events_api = "/open_api/v1/events"
event_api = "/open_api/v1/events/"
def __init__(self):
super().__init__()
self.no_downloader = True
def token_available(self):
return settings.ECHOSCIENCES_TOKEN != ""
def _get_data(self, url, params={}):
headers = {"Authorization": "Token token=" + settings.ECHOSCIENCES_TOKEN}
response = requests.get(
url=url,
json=params,
headers=headers,
)
if response.status_code == 200:
response_json = response.json()
if "errors" in response_json:
raise Exception(
f"Errors while requesting {url}. {str(response_json['error'])}"
)
return response_json
else:
raise Exception(
f"Error while requesting. Status code: {response.status_code}"
)
def _oncoming_events(self, url):
result = []
for i in range(1, 200):
r = self._get_data(
url + CExtractor.events_api,
{
"per_page": 20,
"page": i,
"order_by": "start_date",
"order_type": "asc",
"start_date": datetime.today().strftime("%Y-%m-%d"),
"end_date": (datetime.today() + timedelta(weeks=30)).strftime(
"%Y-%m-%d"
),
},
)
if len(r) == 0:
break
result = result + r
return result
def _get_event(self, url, slug):
return self._get_data(url + CExtractor.event_api + slug)
def extract(
self,
content,
url,
url_human=None,
default_values=None,
published=False,
):
self.set_header(url)
self.clear_events()
if not self.token_available():
raise Exception("Token not available")
events = self._oncoming_events(url)
for e in events:
event = self._get_event(url, e["slug"])
title = event["name"]
event_url = event["url"]
image = event["image"]["url"]
location = []
if "place" in event and event["place"] is not None:
location.append(event["place"]["name"])
location.append(event["address"])
location = ", ".join(location)
soup = BeautifulSoup(event["body"], "html.parser")
description = soup.get_text()
start = dateutil.parser.isoparse(event["start_date"])
end = dateutil.parser.isoparse(event["end_date"])
start_day = start.date()
start_time = start.time()
end_day = end.date()
end_time = end.time()
self.add_event(
default_values,
title,
None,
start_day,
location,
description,
[],
uuids=[event_url],
recurrences=None,
url_human=event_url,
start_time=start_time,
published=published,
image=image,
end_day=end_day,
end_time=end_time,
)
return self.get_structure()

View File

@ -11,8 +11,7 @@ from ..extractor import Extractor
logger = logging.getLogger(__name__)
# A class dedicated to get events from les amis du temps des cerises
# Website https://amisdutempsdescerises.org/
# A class dedicated to get events from Mobilizon
class CExtractor(Extractor):
def __init__(self):
super().__init__()

View File

@ -0,0 +1,45 @@
# Generated by Django 4.2.19 on 2025-04-04 00:24
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("agenda_culturel", "0163_alter_tag_name"),
]
operations = [
migrations.AlterField(
model_name="recurrentimport",
name="processor",
field=models.CharField(
choices=[
("ical", "ical"),
("icalnobusy", "ical no busy"),
("icalnovc", "ical no VC"),
("ical naive tz", "ical naive timezone"),
("lacoope", "lacoope.org"),
("lacomedie", "la comédie"),
("lefotomat", "le fotomat"),
("lapucealoreille", "la puce à l'oreille"),
("Plugin wordpress MEC", "Plugin wordpress MEC"),
("Facebook events", "Événements d'une page FB"),
("Billetterie CF", "Billetterie Clermont-Ferrand"),
("arachnee", "Arachnée concert"),
("rio", "Le Rio"),
("raymonde", "La Raymonde"),
("apidae", "Agenda apidae tourisme"),
("iguana", "Agenda iguana (médiathèques)"),
("Mille formes", "Mille formes"),
("Amis cerises", "Les Amis du Temps des Cerises"),
("Mobilizon", "Mobilizon"),
("Le Caméléon", "Le caméléon"),
("Echosciences", "Echosciences"),
],
default="ical",
max_length=20,
verbose_name="Processor",
),
),
]

View File

@ -2817,6 +2817,7 @@ class RecurrentImport(models.Model):
AMISCERISES = "Amis cerises", _("Les Amis du Temps des Cerises")
MOBILIZON = "Mobilizon", _("Mobilizon")
LECAMELEON = "Le Caméléon", _("Le caméléon")
ECHOSCIENCES = "Echosciences", _("Echosciences")
class DOWNLOADER(models.TextChoices):
SIMPLE = "simple", _("simple")

View File

@ -35,6 +35,7 @@ ADMINS = [tuple(a.split(",")) for a in os_getenv("ADMINS", "").split(";")]
MANAGERS = [tuple(a.split(",")) for a in os_getenv("MANAGERS", "").split(";")]
SERVER_EMAIL = os_getenv("SERVER_EMAIL", "")
ECHOSCIENCES_TOKEN = os_getenv("ECHOSCIENCES_TOKEN", "")
# Application definition