On ajoute l'import de pages de l'agenda associations CF

Fix #277
This commit is contained in:
Jean-Marie Favreau
2025-03-09 15:54:28 +01:00
parent d401f533bd
commit ae26f3630c
3 changed files with 112 additions and 0 deletions

View File

@@ -0,0 +1,38 @@
#!/usr/bin/python3
# coding: utf-8
import json
import os
import sys
# getting the name of the directory
# where the this file is present.
current = os.path.dirname(os.path.realpath(__file__))
# Getting the parent directory name
# where the current directory is present.
parent = os.path.dirname(current)
# adding the parent directory to
# the sys.path.
sys.path.append(parent)
sys.path.append(parent + "/src")
from src.agenda_culturel.import_tasks.downloader import (
ChromiumHeadlessDownloader,
)
from src.agenda_culturel.import_tasks.custom_extractors.associations_cf import (
CExtractor,
)
from src.agenda_culturel.import_tasks.importer import URL2Events
if __name__ == "__main__":
u2e = URL2Events(ChromiumHeadlessDownloader(), CExtractor())
url = "https://associations.clermont-ferrand.fr/evenement/week-end-multi-culturel"
events = u2e.process(url, cache="asso_cf.html", published=True)
exportfile = "event-asso_cf.json"
print("Saving events to file {}".format(exportfile))
with open(exportfile, "w") as f:
json.dump(events, f, indent=4, default=str)