Import du caméléon

Fix #115
This commit is contained in:
Jean-Marie Favreau
2025-03-16 15:01:42 +01:00
parent a53f404c24
commit f03ebb6458
5 changed files with 173 additions and 0 deletions

View File

@@ -24,6 +24,7 @@ from .import_tasks.custom_extractors import (
laraymonde,
mille_formes,
amisdutempsdescerises,
lecameleon,
)
from .import_tasks.downloader import (
ChromiumHeadlessDownloader,
@@ -204,6 +205,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
extractor = amisdutempsdescerises.CExtractor()
elif rimport.processor == RecurrentImport.PROCESSOR.MOBILIZON:
extractor = mobilizon.CExtractor()
elif rimport.processor == RecurrentImport.PROCESSOR.LECAMELEON:
extractor = lecameleon.CExtractor()
else:
extractor = None

View File

@@ -0,0 +1,81 @@
from bs4 import BeautifulSoup
from datetime import date
from ..twosteps_extractor import TwoStepsExtractorNoPause
from ..extractor import Extractor
# A class dedicated to get events from Le Caméléon
# URL: https://lesrdvducameleon.wixsite.com/lesrdv/les-spectacles
class CExtractor(TwoStepsExtractorNoPause):
def __init__(self):
super().__init__()
self.today = date.today()
def build_event_url_list(self, content, infuture_days=180):
soup = BeautifulSoup(content, "html.parser")
links = soup.select("a[data-anchor='event-details']")
if links:
for lk in links:
self.add_event_url(lk["href"])
def add_event_from_content(
self,
event_content,
event_url,
url_human=None,
default_values=None,
published=False,
):
soup = BeautifulSoup(event_content, "html.parser")
title = soup.select_one("[data-hook='event-title']").text
d = soup.select_one("[data-hook='event-full-date']").text
location = soup.select_one("[data-hook='event-full-location']").text
description = (
soup.select_one("[data-hook='about-section-text']").get_text(separator="\n")
+ "\n"
+ soup.select_one("[data-hook='event-description']").get_text(
separator="\n"
)
)
img = soup.select_one("[data-hook='event-image'] img")
els = d.split(",")
if len(els) > 1:
start_day = Extractor.parse_french_date(
els[0], default_year_by_proximity=self.today
)
end_day = start_day
es = els[1].split("")
start_time = Extractor.parse_french_time(es[0])
end_time = Extractor.parse_french_time(es[1])
else:
start_day = None
start_time = None
start_time = None
end_time = None
image = img["src"].split(".jpg")[0] + ".jpg"
image_alt = img["alt"]
self.add_event_with_props(
default_values,
event_url,
title,
None,
start_day,
location,
description,
[],
recurrences=None,
uuids=[event_url],
url_human=event_url,
start_time=start_time,
end_day=end_day,
end_time=end_time,
published=published,
image=image,
image_alt=image_alt,
)

View File

@@ -0,0 +1,44 @@
# Generated by Django 4.2.19 on 2025-03-16 14:27
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("agenda_culturel", "0159_event_editing_user"),
]
operations = [
migrations.AlterField(
model_name="recurrentimport",
name="processor",
field=models.CharField(
choices=[
("ical", "ical"),
("icalnobusy", "ical no busy"),
("icalnovc", "ical no VC"),
("ical naive tz", "ical naive timezone"),
("lacoope", "lacoope.org"),
("lacomedie", "la comédie"),
("lefotomat", "le fotomat"),
("lapucealoreille", "la puce à l'oreille"),
("Plugin wordpress MEC", "Plugin wordpress MEC"),
("Facebook events", "Événements d'une page FB"),
("Billetterie CF", "Billetterie Clermont-Ferrand"),
("arachnee", "Arachnée concert"),
("rio", "Le Rio"),
("raymonde", "La Raymonde"),
("apidae", "Agenda apidae tourisme"),
("iguana", "Agenda iguana (médiathèques)"),
("Mille formes", "Mille formes"),
("Amis cerises", "Les Amis du Temps des Cerises"),
("Mobilizon", "Mobilizon"),
("Le Caméléon", "Le caméléon"),
],
default="ical",
max_length=20,
verbose_name="Processor",
),
),
]

View File

@@ -2617,6 +2617,7 @@ class RecurrentImport(models.Model):
MILLEFORMES = "Mille formes", _("Mille formes")
AMISCERISES = "Amis cerises", _("Les Amis du Temps des Cerises")
MOBILIZON = "Mobilizon", _("Mobilizon")
LECAMELEON = "Le Caméléon", _("Le caméléon")
class DOWNLOADER(models.TextChoices):
SIMPLE = "simple", _("simple")