From f03ebb64586f54f5300b2adb90dd6b2403478910 Mon Sep 17 00:00:00 2001 From: Jean-Marie Favreau Date: Sun, 16 Mar 2025 15:01:42 +0100 Subject: [PATCH] =?UTF-8?q?Import=20du=20cam=C3=A9l=C3=A9on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix #115 --- experimentations/get_lecameleon_events.py | 44 ++++++++++ src/agenda_culturel/celery.py | 3 + .../custom_extractors/lecameleon.py | 81 +++++++++++++++++++ .../0160_alter_recurrentimport_processor.py | 44 ++++++++++ src/agenda_culturel/models.py | 1 + 5 files changed, 173 insertions(+) create mode 100755 experimentations/get_lecameleon_events.py create mode 100644 src/agenda_culturel/import_tasks/custom_extractors/lecameleon.py create mode 100644 src/agenda_culturel/migrations/0160_alter_recurrentimport_processor.py diff --git a/experimentations/get_lecameleon_events.py b/experimentations/get_lecameleon_events.py new file mode 100755 index 0000000..529ceb2 --- /dev/null +++ b/experimentations/get_lecameleon_events.py @@ -0,0 +1,44 @@ +#!/usr/bin/python3 +# coding: utf-8 + +import json +import os +import sys + +# getting the name of the directory +# where the this file is present. +current = os.path.dirname(os.path.realpath(__file__)) + +# Getting the parent directory name +# where the current directory is present. +parent = os.path.dirname(current) + +# adding the parent directory to +# the sys.path. +sys.path.append(parent) +sys.path.append(parent + "/src") + +from src.agenda_culturel.import_tasks.custom_extractors import lecameleon +from src.agenda_culturel.import_tasks.downloader import SimpleDownloader +from src.agenda_culturel.import_tasks.importer import URL2Events + +if __name__ == "__main__": + u2e = URL2Events(SimpleDownloader(), lecameleon.CExtractor()) + url = "https://lesrdvducameleon.wixsite.com/lesrdv/les-spectacles" + url_human = "https://lesrdvducameleon.wixsite.com/lesrdv/les-spectacles" + + try: + events = u2e.process( + url, + url_human, + cache="cache-lecameleon.xml", + default_values={}, + published=True, + ) + + exportfile = "events-lecameleon.json" + print("Saving events to file {}".format(exportfile)) + with open(exportfile, "w") as f: + json.dump(events, f, indent=4, default=str) + except Exception as e: + print("Exception: " + str(e)) diff --git a/src/agenda_culturel/celery.py b/src/agenda_culturel/celery.py index f391e9a..cb4c045 100644 --- a/src/agenda_culturel/celery.py +++ b/src/agenda_culturel/celery.py @@ -24,6 +24,7 @@ from .import_tasks.custom_extractors import ( laraymonde, mille_formes, amisdutempsdescerises, + lecameleon, ) from .import_tasks.downloader import ( ChromiumHeadlessDownloader, @@ -204,6 +205,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id): extractor = amisdutempsdescerises.CExtractor() elif rimport.processor == RecurrentImport.PROCESSOR.MOBILIZON: extractor = mobilizon.CExtractor() + elif rimport.processor == RecurrentImport.PROCESSOR.LECAMELEON: + extractor = lecameleon.CExtractor() else: extractor = None diff --git a/src/agenda_culturel/import_tasks/custom_extractors/lecameleon.py b/src/agenda_culturel/import_tasks/custom_extractors/lecameleon.py new file mode 100644 index 0000000..04e0a01 --- /dev/null +++ b/src/agenda_culturel/import_tasks/custom_extractors/lecameleon.py @@ -0,0 +1,81 @@ +from bs4 import BeautifulSoup +from datetime import date + +from ..twosteps_extractor import TwoStepsExtractorNoPause +from ..extractor import Extractor + + +# A class dedicated to get events from Le Caméléon +# URL: https://lesrdvducameleon.wixsite.com/lesrdv/les-spectacles +class CExtractor(TwoStepsExtractorNoPause): + def __init__(self): + super().__init__() + self.today = date.today() + + def build_event_url_list(self, content, infuture_days=180): + soup = BeautifulSoup(content, "html.parser") + + links = soup.select("a[data-anchor='event-details']") + if links: + for lk in links: + self.add_event_url(lk["href"]) + + def add_event_from_content( + self, + event_content, + event_url, + url_human=None, + default_values=None, + published=False, + ): + soup = BeautifulSoup(event_content, "html.parser") + + title = soup.select_one("[data-hook='event-title']").text + d = soup.select_one("[data-hook='event-full-date']").text + location = soup.select_one("[data-hook='event-full-location']").text + description = ( + soup.select_one("[data-hook='about-section-text']").get_text(separator="\n") + + "\n" + + soup.select_one("[data-hook='event-description']").get_text( + separator="\n" + ) + ) + img = soup.select_one("[data-hook='event-image'] img") + + els = d.split(",") + if len(els) > 1: + start_day = Extractor.parse_french_date( + els[0], default_year_by_proximity=self.today + ) + end_day = start_day + es = els[1].split("–") + start_time = Extractor.parse_french_time(es[0]) + end_time = Extractor.parse_french_time(es[1]) + else: + start_day = None + start_time = None + start_time = None + end_time = None + + image = img["src"].split(".jpg")[0] + ".jpg" + image_alt = img["alt"] + + self.add_event_with_props( + default_values, + event_url, + title, + None, + start_day, + location, + description, + [], + recurrences=None, + uuids=[event_url], + url_human=event_url, + start_time=start_time, + end_day=end_day, + end_time=end_time, + published=published, + image=image, + image_alt=image_alt, + ) diff --git a/src/agenda_culturel/migrations/0160_alter_recurrentimport_processor.py b/src/agenda_culturel/migrations/0160_alter_recurrentimport_processor.py new file mode 100644 index 0000000..0218ac5 --- /dev/null +++ b/src/agenda_culturel/migrations/0160_alter_recurrentimport_processor.py @@ -0,0 +1,44 @@ +# Generated by Django 4.2.19 on 2025-03-16 14:27 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("agenda_culturel", "0159_event_editing_user"), + ] + + operations = [ + migrations.AlterField( + model_name="recurrentimport", + name="processor", + field=models.CharField( + choices=[ + ("ical", "ical"), + ("icalnobusy", "ical no busy"), + ("icalnovc", "ical no VC"), + ("ical naive tz", "ical naive timezone"), + ("lacoope", "lacoope.org"), + ("lacomedie", "la comédie"), + ("lefotomat", "le fotomat"), + ("lapucealoreille", "la puce à l'oreille"), + ("Plugin wordpress MEC", "Plugin wordpress MEC"), + ("Facebook events", "Événements d'une page FB"), + ("Billetterie CF", "Billetterie Clermont-Ferrand"), + ("arachnee", "Arachnée concert"), + ("rio", "Le Rio"), + ("raymonde", "La Raymonde"), + ("apidae", "Agenda apidae tourisme"), + ("iguana", "Agenda iguana (médiathèques)"), + ("Mille formes", "Mille formes"), + ("Amis cerises", "Les Amis du Temps des Cerises"), + ("Mobilizon", "Mobilizon"), + ("Le Caméléon", "Le caméléon"), + ], + default="ical", + max_length=20, + verbose_name="Processor", + ), + ), + ] diff --git a/src/agenda_culturel/models.py b/src/agenda_culturel/models.py index adec5e8..6a7199f 100644 --- a/src/agenda_culturel/models.py +++ b/src/agenda_culturel/models.py @@ -2617,6 +2617,7 @@ class RecurrentImport(models.Model): MILLEFORMES = "Mille formes", _("Mille formes") AMISCERISES = "Amis cerises", _("Les Amis du Temps des Cerises") MOBILIZON = "Mobilizon", _("Mobilizon") + LECAMELEON = "Le Caméléon", _("Le caméléon") class DOWNLOADER(models.TextChoices): SIMPLE = "simple", _("simple")