parent
a53f404c24
commit
f03ebb6458
44
experimentations/get_lecameleon_events.py
Executable file
44
experimentations/get_lecameleon_events.py
Executable file
@ -0,0 +1,44 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# getting the name of the directory
|
||||||
|
# where the this file is present.
|
||||||
|
current = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
# Getting the parent directory name
|
||||||
|
# where the current directory is present.
|
||||||
|
parent = os.path.dirname(current)
|
||||||
|
|
||||||
|
# adding the parent directory to
|
||||||
|
# the sys.path.
|
||||||
|
sys.path.append(parent)
|
||||||
|
sys.path.append(parent + "/src")
|
||||||
|
|
||||||
|
from src.agenda_culturel.import_tasks.custom_extractors import lecameleon
|
||||||
|
from src.agenda_culturel.import_tasks.downloader import SimpleDownloader
|
||||||
|
from src.agenda_culturel.import_tasks.importer import URL2Events
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
u2e = URL2Events(SimpleDownloader(), lecameleon.CExtractor())
|
||||||
|
url = "https://lesrdvducameleon.wixsite.com/lesrdv/les-spectacles"
|
||||||
|
url_human = "https://lesrdvducameleon.wixsite.com/lesrdv/les-spectacles"
|
||||||
|
|
||||||
|
try:
|
||||||
|
events = u2e.process(
|
||||||
|
url,
|
||||||
|
url_human,
|
||||||
|
cache="cache-lecameleon.xml",
|
||||||
|
default_values={},
|
||||||
|
published=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
exportfile = "events-lecameleon.json"
|
||||||
|
print("Saving events to file {}".format(exportfile))
|
||||||
|
with open(exportfile, "w") as f:
|
||||||
|
json.dump(events, f, indent=4, default=str)
|
||||||
|
except Exception as e:
|
||||||
|
print("Exception: " + str(e))
|
@ -24,6 +24,7 @@ from .import_tasks.custom_extractors import (
|
|||||||
laraymonde,
|
laraymonde,
|
||||||
mille_formes,
|
mille_formes,
|
||||||
amisdutempsdescerises,
|
amisdutempsdescerises,
|
||||||
|
lecameleon,
|
||||||
)
|
)
|
||||||
from .import_tasks.downloader import (
|
from .import_tasks.downloader import (
|
||||||
ChromiumHeadlessDownloader,
|
ChromiumHeadlessDownloader,
|
||||||
@ -204,6 +205,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
|
|||||||
extractor = amisdutempsdescerises.CExtractor()
|
extractor = amisdutempsdescerises.CExtractor()
|
||||||
elif rimport.processor == RecurrentImport.PROCESSOR.MOBILIZON:
|
elif rimport.processor == RecurrentImport.PROCESSOR.MOBILIZON:
|
||||||
extractor = mobilizon.CExtractor()
|
extractor = mobilizon.CExtractor()
|
||||||
|
elif rimport.processor == RecurrentImport.PROCESSOR.LECAMELEON:
|
||||||
|
extractor = lecameleon.CExtractor()
|
||||||
else:
|
else:
|
||||||
extractor = None
|
extractor = None
|
||||||
|
|
||||||
|
@ -0,0 +1,81 @@
|
|||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
|
from ..twosteps_extractor import TwoStepsExtractorNoPause
|
||||||
|
from ..extractor import Extractor
|
||||||
|
|
||||||
|
|
||||||
|
# A class dedicated to get events from Le Caméléon
|
||||||
|
# URL: https://lesrdvducameleon.wixsite.com/lesrdv/les-spectacles
|
||||||
|
class CExtractor(TwoStepsExtractorNoPause):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.today = date.today()
|
||||||
|
|
||||||
|
def build_event_url_list(self, content, infuture_days=180):
|
||||||
|
soup = BeautifulSoup(content, "html.parser")
|
||||||
|
|
||||||
|
links = soup.select("a[data-anchor='event-details']")
|
||||||
|
if links:
|
||||||
|
for lk in links:
|
||||||
|
self.add_event_url(lk["href"])
|
||||||
|
|
||||||
|
def add_event_from_content(
|
||||||
|
self,
|
||||||
|
event_content,
|
||||||
|
event_url,
|
||||||
|
url_human=None,
|
||||||
|
default_values=None,
|
||||||
|
published=False,
|
||||||
|
):
|
||||||
|
soup = BeautifulSoup(event_content, "html.parser")
|
||||||
|
|
||||||
|
title = soup.select_one("[data-hook='event-title']").text
|
||||||
|
d = soup.select_one("[data-hook='event-full-date']").text
|
||||||
|
location = soup.select_one("[data-hook='event-full-location']").text
|
||||||
|
description = (
|
||||||
|
soup.select_one("[data-hook='about-section-text']").get_text(separator="\n")
|
||||||
|
+ "\n"
|
||||||
|
+ soup.select_one("[data-hook='event-description']").get_text(
|
||||||
|
separator="\n"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
img = soup.select_one("[data-hook='event-image'] img")
|
||||||
|
|
||||||
|
els = d.split(",")
|
||||||
|
if len(els) > 1:
|
||||||
|
start_day = Extractor.parse_french_date(
|
||||||
|
els[0], default_year_by_proximity=self.today
|
||||||
|
)
|
||||||
|
end_day = start_day
|
||||||
|
es = els[1].split("–")
|
||||||
|
start_time = Extractor.parse_french_time(es[0])
|
||||||
|
end_time = Extractor.parse_french_time(es[1])
|
||||||
|
else:
|
||||||
|
start_day = None
|
||||||
|
start_time = None
|
||||||
|
start_time = None
|
||||||
|
end_time = None
|
||||||
|
|
||||||
|
image = img["src"].split(".jpg")[0] + ".jpg"
|
||||||
|
image_alt = img["alt"]
|
||||||
|
|
||||||
|
self.add_event_with_props(
|
||||||
|
default_values,
|
||||||
|
event_url,
|
||||||
|
title,
|
||||||
|
None,
|
||||||
|
start_day,
|
||||||
|
location,
|
||||||
|
description,
|
||||||
|
[],
|
||||||
|
recurrences=None,
|
||||||
|
uuids=[event_url],
|
||||||
|
url_human=event_url,
|
||||||
|
start_time=start_time,
|
||||||
|
end_day=end_day,
|
||||||
|
end_time=end_time,
|
||||||
|
published=published,
|
||||||
|
image=image,
|
||||||
|
image_alt=image_alt,
|
||||||
|
)
|
@ -0,0 +1,44 @@
|
|||||||
|
# Generated by Django 4.2.19 on 2025-03-16 14:27
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("agenda_culturel", "0159_event_editing_user"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="recurrentimport",
|
||||||
|
name="processor",
|
||||||
|
field=models.CharField(
|
||||||
|
choices=[
|
||||||
|
("ical", "ical"),
|
||||||
|
("icalnobusy", "ical no busy"),
|
||||||
|
("icalnovc", "ical no VC"),
|
||||||
|
("ical naive tz", "ical naive timezone"),
|
||||||
|
("lacoope", "lacoope.org"),
|
||||||
|
("lacomedie", "la comédie"),
|
||||||
|
("lefotomat", "le fotomat"),
|
||||||
|
("lapucealoreille", "la puce à l'oreille"),
|
||||||
|
("Plugin wordpress MEC", "Plugin wordpress MEC"),
|
||||||
|
("Facebook events", "Événements d'une page FB"),
|
||||||
|
("Billetterie CF", "Billetterie Clermont-Ferrand"),
|
||||||
|
("arachnee", "Arachnée concert"),
|
||||||
|
("rio", "Le Rio"),
|
||||||
|
("raymonde", "La Raymonde"),
|
||||||
|
("apidae", "Agenda apidae tourisme"),
|
||||||
|
("iguana", "Agenda iguana (médiathèques)"),
|
||||||
|
("Mille formes", "Mille formes"),
|
||||||
|
("Amis cerises", "Les Amis du Temps des Cerises"),
|
||||||
|
("Mobilizon", "Mobilizon"),
|
||||||
|
("Le Caméléon", "Le caméléon"),
|
||||||
|
],
|
||||||
|
default="ical",
|
||||||
|
max_length=20,
|
||||||
|
verbose_name="Processor",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
@ -2617,6 +2617,7 @@ class RecurrentImport(models.Model):
|
|||||||
MILLEFORMES = "Mille formes", _("Mille formes")
|
MILLEFORMES = "Mille formes", _("Mille formes")
|
||||||
AMISCERISES = "Amis cerises", _("Les Amis du Temps des Cerises")
|
AMISCERISES = "Amis cerises", _("Les Amis du Temps des Cerises")
|
||||||
MOBILIZON = "Mobilizon", _("Mobilizon")
|
MOBILIZON = "Mobilizon", _("Mobilizon")
|
||||||
|
LECAMELEON = "Le Caméléon", _("Le caméléon")
|
||||||
|
|
||||||
class DOWNLOADER(models.TextChoices):
|
class DOWNLOADER(models.TextChoices):
|
||||||
SIMPLE = "simple", _("simple")
|
SIMPLE = "simple", _("simple")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user