Import du caméléon

Fix #115
This commit is contained in:
Jean-Marie Favreau 2025-03-16 15:01:42 +01:00
parent a53f404c24
commit f03ebb6458
5 changed files with 173 additions and 0 deletions

View File

@ -0,0 +1,44 @@
#!/usr/bin/python3
# coding: utf-8
import json
import os
import sys
# getting the name of the directory
# where the this file is present.
current = os.path.dirname(os.path.realpath(__file__))
# Getting the parent directory name
# where the current directory is present.
parent = os.path.dirname(current)
# adding the parent directory to
# the sys.path.
sys.path.append(parent)
sys.path.append(parent + "/src")
from src.agenda_culturel.import_tasks.custom_extractors import lecameleon
from src.agenda_culturel.import_tasks.downloader import SimpleDownloader
from src.agenda_culturel.import_tasks.importer import URL2Events
if __name__ == "__main__":
u2e = URL2Events(SimpleDownloader(), lecameleon.CExtractor())
url = "https://lesrdvducameleon.wixsite.com/lesrdv/les-spectacles"
url_human = "https://lesrdvducameleon.wixsite.com/lesrdv/les-spectacles"
try:
events = u2e.process(
url,
url_human,
cache="cache-lecameleon.xml",
default_values={},
published=True,
)
exportfile = "events-lecameleon.json"
print("Saving events to file {}".format(exportfile))
with open(exportfile, "w") as f:
json.dump(events, f, indent=4, default=str)
except Exception as e:
print("Exception: " + str(e))

View File

@ -24,6 +24,7 @@ from .import_tasks.custom_extractors import (
laraymonde,
mille_formes,
amisdutempsdescerises,
lecameleon,
)
from .import_tasks.downloader import (
ChromiumHeadlessDownloader,
@ -204,6 +205,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
extractor = amisdutempsdescerises.CExtractor()
elif rimport.processor == RecurrentImport.PROCESSOR.MOBILIZON:
extractor = mobilizon.CExtractor()
elif rimport.processor == RecurrentImport.PROCESSOR.LECAMELEON:
extractor = lecameleon.CExtractor()
else:
extractor = None

View File

@ -0,0 +1,81 @@
from bs4 import BeautifulSoup
from datetime import date
from ..twosteps_extractor import TwoStepsExtractorNoPause
from ..extractor import Extractor
# A class dedicated to get events from Le Caméléon
# URL: https://lesrdvducameleon.wixsite.com/lesrdv/les-spectacles
class CExtractor(TwoStepsExtractorNoPause):
def __init__(self):
super().__init__()
self.today = date.today()
def build_event_url_list(self, content, infuture_days=180):
soup = BeautifulSoup(content, "html.parser")
links = soup.select("a[data-anchor='event-details']")
if links:
for lk in links:
self.add_event_url(lk["href"])
def add_event_from_content(
self,
event_content,
event_url,
url_human=None,
default_values=None,
published=False,
):
soup = BeautifulSoup(event_content, "html.parser")
title = soup.select_one("[data-hook='event-title']").text
d = soup.select_one("[data-hook='event-full-date']").text
location = soup.select_one("[data-hook='event-full-location']").text
description = (
soup.select_one("[data-hook='about-section-text']").get_text(separator="\n")
+ "\n"
+ soup.select_one("[data-hook='event-description']").get_text(
separator="\n"
)
)
img = soup.select_one("[data-hook='event-image'] img")
els = d.split(",")
if len(els) > 1:
start_day = Extractor.parse_french_date(
els[0], default_year_by_proximity=self.today
)
end_day = start_day
es = els[1].split("")
start_time = Extractor.parse_french_time(es[0])
end_time = Extractor.parse_french_time(es[1])
else:
start_day = None
start_time = None
start_time = None
end_time = None
image = img["src"].split(".jpg")[0] + ".jpg"
image_alt = img["alt"]
self.add_event_with_props(
default_values,
event_url,
title,
None,
start_day,
location,
description,
[],
recurrences=None,
uuids=[event_url],
url_human=event_url,
start_time=start_time,
end_day=end_day,
end_time=end_time,
published=published,
image=image,
image_alt=image_alt,
)

View File

@ -0,0 +1,44 @@
# Generated by Django 4.2.19 on 2025-03-16 14:27
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("agenda_culturel", "0159_event_editing_user"),
]
operations = [
migrations.AlterField(
model_name="recurrentimport",
name="processor",
field=models.CharField(
choices=[
("ical", "ical"),
("icalnobusy", "ical no busy"),
("icalnovc", "ical no VC"),
("ical naive tz", "ical naive timezone"),
("lacoope", "lacoope.org"),
("lacomedie", "la comédie"),
("lefotomat", "le fotomat"),
("lapucealoreille", "la puce à l'oreille"),
("Plugin wordpress MEC", "Plugin wordpress MEC"),
("Facebook events", "Événements d'une page FB"),
("Billetterie CF", "Billetterie Clermont-Ferrand"),
("arachnee", "Arachnée concert"),
("rio", "Le Rio"),
("raymonde", "La Raymonde"),
("apidae", "Agenda apidae tourisme"),
("iguana", "Agenda iguana (médiathèques)"),
("Mille formes", "Mille formes"),
("Amis cerises", "Les Amis du Temps des Cerises"),
("Mobilizon", "Mobilizon"),
("Le Caméléon", "Le caméléon"),
],
default="ical",
max_length=20,
verbose_name="Processor",
),
),
]

View File

@ -2617,6 +2617,7 @@ class RecurrentImport(models.Model):
MILLEFORMES = "Mille formes", _("Mille formes")
AMISCERISES = "Amis cerises", _("Les Amis du Temps des Cerises")
MOBILIZON = "Mobilizon", _("Mobilizon")
LECAMELEON = "Le Caméléon", _("Le caméléon")
class DOWNLOADER(models.TextChoices):
SIMPLE = "simple", _("simple")