Implémentation de l'import echosciences auvergne
This commit is contained in:
parent
9ee1975528
commit
b173baa76c
47
experimentations/get_echosciences.py
Executable file
47
experimentations/get_echosciences.py
Executable file
@ -0,0 +1,47 @@
|
||||
#!/usr/bin/python3
|
||||
# coding: utf-8
|
||||
|
||||
# To be able to run this import out of django, you'll have to set an environment variable with the Echosciences token:
|
||||
# export ECHOSCIENCES_TOKEN=<your token>
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
# getting the name of the directory
|
||||
# where the this file is present.
|
||||
current = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
# Getting the parent directory name
|
||||
# where the current directory is present.
|
||||
parent = os.path.dirname(current)
|
||||
|
||||
# adding the parent directory to
|
||||
# the sys.path.
|
||||
sys.path.append(parent)
|
||||
sys.path.append(parent + "/src")
|
||||
|
||||
from src.agenda_culturel.import_tasks.downloader import SimpleDownloader
|
||||
from src.agenda_culturel.import_tasks.generic_extractors.echosciences import (
|
||||
CExtractor,
|
||||
)
|
||||
from src.agenda_culturel.import_tasks.importer import URL2Events
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
u2e = URL2Events(SimpleDownloader(), CExtractor())
|
||||
url = "https://www.echosciences-auvergne.fr"
|
||||
url_human = "https://www.echosciences-auvergne.fr"
|
||||
|
||||
events = u2e.process(
|
||||
url,
|
||||
url_human,
|
||||
cache="cache-echosciences.html",
|
||||
default_values={},
|
||||
published=True,
|
||||
)
|
||||
|
||||
exportfile = "events-echosciences.json"
|
||||
print("Saving events to file {}".format(exportfile))
|
||||
with open(exportfile, "w") as f:
|
||||
json.dump(events, f, indent=4, default=str)
|
@ -38,6 +38,7 @@ from .import_tasks.generic_extractors import (
|
||||
mobilizon,
|
||||
ical,
|
||||
fbevents,
|
||||
echosciences,
|
||||
)
|
||||
from .import_tasks.importer import URL2Events
|
||||
|
||||
@ -207,6 +208,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
|
||||
extractor = mobilizon.CExtractor()
|
||||
elif rimport.processor == RecurrentImport.PROCESSOR.LECAMELEON:
|
||||
extractor = lecameleon.CExtractor()
|
||||
elif rimport.processor == RecurrentImport.PROCESSOR.ECHOSCIENCES:
|
||||
extractor = echosciences.CExtractor()
|
||||
else:
|
||||
extractor = None
|
||||
|
||||
|
@ -0,0 +1,131 @@
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from django.conf import settings
|
||||
import dateutil.parser
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from ..extractor import Extractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# A class dedicated to get events from echosciences
|
||||
class CExtractor(Extractor):
|
||||
events_api = "/open_api/v1/events"
|
||||
event_api = "/open_api/v1/events/"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.no_downloader = True
|
||||
|
||||
def token_available(self):
|
||||
return settings.ECHOSCIENCES_TOKEN != ""
|
||||
|
||||
def _get_data(self, url, params={}):
|
||||
headers = {"Authorization": "Token token=" + settings.ECHOSCIENCES_TOKEN}
|
||||
|
||||
response = requests.get(
|
||||
url=url,
|
||||
json=params,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
response_json = response.json()
|
||||
if "errors" in response_json:
|
||||
raise Exception(
|
||||
f"Errors while requesting {url}. {str(response_json['error'])}"
|
||||
)
|
||||
|
||||
return response_json
|
||||
else:
|
||||
raise Exception(
|
||||
f"Error while requesting. Status code: {response.status_code}"
|
||||
)
|
||||
|
||||
def _oncoming_events(self, url):
|
||||
result = []
|
||||
|
||||
for i in range(1, 200):
|
||||
r = self._get_data(
|
||||
url + CExtractor.events_api,
|
||||
{
|
||||
"per_page": 20,
|
||||
"page": i,
|
||||
"order_by": "start_date",
|
||||
"order_type": "asc",
|
||||
"start_date": datetime.today().strftime("%Y-%m-%d"),
|
||||
"end_date": (datetime.today() + timedelta(weeks=30)).strftime(
|
||||
"%Y-%m-%d"
|
||||
),
|
||||
},
|
||||
)
|
||||
if len(r) == 0:
|
||||
break
|
||||
result = result + r
|
||||
|
||||
return result
|
||||
|
||||
def _get_event(self, url, slug):
|
||||
return self._get_data(url + CExtractor.event_api + slug)
|
||||
|
||||
def extract(
|
||||
self,
|
||||
content,
|
||||
url,
|
||||
url_human=None,
|
||||
default_values=None,
|
||||
published=False,
|
||||
):
|
||||
|
||||
self.set_header(url)
|
||||
self.clear_events()
|
||||
|
||||
if not self.token_available():
|
||||
raise Exception("Token not available")
|
||||
|
||||
events = self._oncoming_events(url)
|
||||
|
||||
for e in events:
|
||||
event = self._get_event(url, e["slug"])
|
||||
|
||||
title = event["name"]
|
||||
event_url = event["url"]
|
||||
image = event["image"]["url"]
|
||||
|
||||
location = []
|
||||
if "place" in event and event["place"] is not None:
|
||||
location.append(event["place"]["name"])
|
||||
location.append(event["address"])
|
||||
location = ", ".join(location)
|
||||
soup = BeautifulSoup(event["body"], "html.parser")
|
||||
|
||||
description = soup.get_text()
|
||||
start = dateutil.parser.isoparse(event["start_date"])
|
||||
end = dateutil.parser.isoparse(event["end_date"])
|
||||
|
||||
start_day = start.date()
|
||||
start_time = start.time()
|
||||
end_day = end.date()
|
||||
end_time = end.time()
|
||||
|
||||
self.add_event(
|
||||
default_values,
|
||||
title,
|
||||
None,
|
||||
start_day,
|
||||
location,
|
||||
description,
|
||||
[],
|
||||
uuids=[event_url],
|
||||
recurrences=None,
|
||||
url_human=event_url,
|
||||
start_time=start_time,
|
||||
published=published,
|
||||
image=image,
|
||||
end_day=end_day,
|
||||
end_time=end_time,
|
||||
)
|
||||
|
||||
return self.get_structure()
|
@ -11,8 +11,7 @@ from ..extractor import Extractor
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# A class dedicated to get events from les amis du temps des cerises
|
||||
# Website https://amisdutempsdescerises.org/
|
||||
# A class dedicated to get events from Mobilizon
|
||||
class CExtractor(Extractor):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
@ -0,0 +1,45 @@
|
||||
# Generated by Django 4.2.19 on 2025-04-04 00:24
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("agenda_culturel", "0163_alter_tag_name"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="recurrentimport",
|
||||
name="processor",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("ical", "ical"),
|
||||
("icalnobusy", "ical no busy"),
|
||||
("icalnovc", "ical no VC"),
|
||||
("ical naive tz", "ical naive timezone"),
|
||||
("lacoope", "lacoope.org"),
|
||||
("lacomedie", "la comédie"),
|
||||
("lefotomat", "le fotomat"),
|
||||
("lapucealoreille", "la puce à l'oreille"),
|
||||
("Plugin wordpress MEC", "Plugin wordpress MEC"),
|
||||
("Facebook events", "Événements d'une page FB"),
|
||||
("Billetterie CF", "Billetterie Clermont-Ferrand"),
|
||||
("arachnee", "Arachnée concert"),
|
||||
("rio", "Le Rio"),
|
||||
("raymonde", "La Raymonde"),
|
||||
("apidae", "Agenda apidae tourisme"),
|
||||
("iguana", "Agenda iguana (médiathèques)"),
|
||||
("Mille formes", "Mille formes"),
|
||||
("Amis cerises", "Les Amis du Temps des Cerises"),
|
||||
("Mobilizon", "Mobilizon"),
|
||||
("Le Caméléon", "Le caméléon"),
|
||||
("Echosciences", "Echosciences"),
|
||||
],
|
||||
default="ical",
|
||||
max_length=20,
|
||||
verbose_name="Processor",
|
||||
),
|
||||
),
|
||||
]
|
@ -2817,6 +2817,7 @@ class RecurrentImport(models.Model):
|
||||
AMISCERISES = "Amis cerises", _("Les Amis du Temps des Cerises")
|
||||
MOBILIZON = "Mobilizon", _("Mobilizon")
|
||||
LECAMELEON = "Le Caméléon", _("Le caméléon")
|
||||
ECHOSCIENCES = "Echosciences", _("Echosciences")
|
||||
|
||||
class DOWNLOADER(models.TextChoices):
|
||||
SIMPLE = "simple", _("simple")
|
||||
|
@ -35,6 +35,7 @@ ADMINS = [tuple(a.split(",")) for a in os_getenv("ADMINS", "").split(";")]
|
||||
MANAGERS = [tuple(a.split(",")) for a in os_getenv("MANAGERS", "").split(";")]
|
||||
SERVER_EMAIL = os_getenv("SERVER_EMAIL", "")
|
||||
|
||||
ECHOSCIENCES_TOKEN = os_getenv("ECHOSCIENCES_TOKEN", "")
|
||||
|
||||
# Application definition
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user