Ajout source médiathèques
This commit is contained in:
		
							
								
								
									
										44
									
								
								experimentations/get_meditheques_clermont.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										44
									
								
								experimentations/get_meditheques_clermont.py
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,44 @@
 | 
			
		||||
#!/usr/bin/python3
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import json
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
# getting the name of the directory
 | 
			
		||||
# where the this file is present.
 | 
			
		||||
current = os.path.dirname(os.path.realpath(__file__))
 | 
			
		||||
 
 | 
			
		||||
# Getting the parent directory name
 | 
			
		||||
# where the current directory is present.
 | 
			
		||||
parent = os.path.dirname(current)
 | 
			
		||||
 
 | 
			
		||||
# adding the parent directory to 
 | 
			
		||||
# the sys.path.
 | 
			
		||||
sys.path.append(parent)
 | 
			
		||||
sys.path.append(parent + "/src")
 | 
			
		||||
 | 
			
		||||
from src.agenda_culturel.import_tasks.downloader import *
 | 
			
		||||
from src.agenda_culturel.import_tasks.extractor import *
 | 
			
		||||
from src.agenda_culturel.import_tasks.importer import *
 | 
			
		||||
from src.agenda_culturel.import_tasks.custom_extractors import *
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
 | 
			
		||||
    u2e = URL2Events(SimpleDownloader(), iguana_agenda.CExtractor())
 | 
			
		||||
    url = "https://bibliotheques-clermontmetropole.eu/iguana/Service.PubContainer.cls?uuid=a4a1f992-06da-4ff4-9176-4af0a095c7d1"
 | 
			
		||||
    url_human = "https://bibliotheques-clermontmetropole.eu/iguana/www.main.cls?surl=AGENDA_Tout%20lagenda"
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        events = u2e.process(url, url_human, cache = "cache-mediatheques.html", default_values = {}, published = True)
 | 
			
		||||
 | 
			
		||||
        exportfile = "events-mediatheques.json"
 | 
			
		||||
        print("Saving events to file {}".format(exportfile))
 | 
			
		||||
        with open(exportfile, "w") as f:
 | 
			
		||||
            json.dump(events, f, indent=4, default=str)
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        print("Exception: " + str(e))
 | 
			
		||||
@@ -154,6 +154,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
 | 
			
		||||
        extractor = laraymonde.CExtractor()
 | 
			
		||||
    elif rimport.processor == RecurrentImport.PROCESSOR.APIDAE:
 | 
			
		||||
        extractor = apidae_tourisme.CExtractor()
 | 
			
		||||
    elif rimport.processor == RecurrentImport.PROCESSOR.IGUANA:
 | 
			
		||||
        extractor = iguana_agenda.CExtractor()
 | 
			
		||||
    else:
 | 
			
		||||
        extractor = None
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,111 @@
 | 
			
		||||
from ..generic_extractors import *
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
 | 
			
		||||
# A class dedicated to get events from Raymond Bar
 | 
			
		||||
# URL: https://www.raymondbar.net/
 | 
			
		||||
class CExtractor(TwoStepsExtractorNoPause):
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
 | 
			
		||||
    def guess_category(self, category):
 | 
			
		||||
        if "Cinéma" in category:
 | 
			
		||||
            return "Cinéma"
 | 
			
		||||
        if "Conférence" in category or "Rencontres" in category:
 | 
			
		||||
            return "Rencontres & débats"
 | 
			
		||||
        if "Lecture" in category or "Conte" in category:
 | 
			
		||||
            return "Spectacles"
 | 
			
		||||
        if "Atelier" in category or "Jeux" in category or "":
 | 
			
		||||
            return "Animations & Ateliers"
 | 
			
		||||
        if "Numérique" in category:
 | 
			
		||||
            return "Rendez-vous locaux"
 | 
			
		||||
 | 
			
		||||
        return "Sans catégorie"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def guess_tags_from_category(self, category):
 | 
			
		||||
        tags = []
 | 
			
		||||
        if "Lecture" in category:
 | 
			
		||||
            tags.append("📖 lecture")
 | 
			
		||||
        if "Jeux" in category:
 | 
			
		||||
            tags.append("🎲 jeux")
 | 
			
		||||
 | 
			
		||||
        return tags
 | 
			
		||||
 | 
			
		||||
    def build_event_url_list(self, content, infuture_days=180):
 | 
			
		||||
        
 | 
			
		||||
        soup = BeautifulSoup(content, "html.parser")
 | 
			
		||||
 | 
			
		||||
        root_address_human = self.url_human.split('?')[0]
 | 
			
		||||
        root_address = self.url.split('Service')[0]
 | 
			
		||||
 | 
			
		||||
        items = soup.select("li.listItem")
 | 
			
		||||
        if items:
 | 
			
		||||
            for item in items:
 | 
			
		||||
                elems = item["onclick"].split('"')
 | 
			
		||||
                v = elems[3].split('^')[1]
 | 
			
		||||
                contentItem = elems[1]
 | 
			
		||||
                multidate = item.select_one('.until.maindate').text != ''
 | 
			
		||||
                if not multidate:
 | 
			
		||||
                    url_human = root_address_human + '?p=*&v=' + v + "#contentitem=" + contentItem
 | 
			
		||||
                    url = 	root_address + 'Service.PubItem.cls?action=get&instance=*&uuid=' + contentItem
 | 
			
		||||
                    self.add_event_url(url)
 | 
			
		||||
                    self.add_event_url_human(url, url_human)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def add_event_from_content(
 | 
			
		||||
        self,
 | 
			
		||||
        event_content,
 | 
			
		||||
        event_url,
 | 
			
		||||
        url_human=None,
 | 
			
		||||
        default_values=None,
 | 
			
		||||
        published=False,
 | 
			
		||||
    ):
 | 
			
		||||
 | 
			
		||||
        soup = BeautifulSoup(event_content, "xml")
 | 
			
		||||
 | 
			
		||||
        
 | 
			
		||||
        title = soup.select_one("Title").text
 | 
			
		||||
        content = soup.select_one("Content").text
 | 
			
		||||
 | 
			
		||||
        soup = BeautifulSoup(content, "html.parser")
 | 
			
		||||
 | 
			
		||||
        image = soup.select_one(".image img")["src"]
 | 
			
		||||
        description = soup.select_one(".rightcolumn .content").text
 | 
			
		||||
        location = soup.select_one(".infos .location").text
 | 
			
		||||
        public = soup.select_one(".infos .public").text
 | 
			
		||||
        start_day = Extractor.parse_french_date(soup.select_one(".infos .date .from").text)
 | 
			
		||||
        start_time = Extractor.parse_french_time(soup.select_one(".infos .date .time").text)
 | 
			
		||||
        acces = soup.select_one(".infos .acces").text
 | 
			
		||||
        category = soup.select_one(".rightcolumn .category").text
 | 
			
		||||
        infos = soup.select_one('.infos').text
 | 
			
		||||
 | 
			
		||||
        description = description + "\n" + infos
 | 
			
		||||
 | 
			
		||||
        tags = self.guess_tags_from_category(category)
 | 
			
		||||
        category = self.guess_category(category)
 | 
			
		||||
        if "Tout-petits" in public or "Jeunesse" in public:
 | 
			
		||||
            tags.append("🎈 jeune public")
 | 
			
		||||
        if "Accès libre" in acces:
 | 
			
		||||
            tags.append("💶 gratuit")
 | 
			
		||||
 | 
			
		||||
        self.add_event_with_props(
 | 
			
		||||
                    default_values,
 | 
			
		||||
                    event_url,
 | 
			
		||||
                    title,
 | 
			
		||||
                    category,
 | 
			
		||||
                    start_day,
 | 
			
		||||
                    location,
 | 
			
		||||
                    description,
 | 
			
		||||
                    tags,
 | 
			
		||||
                    recurrences=None,
 | 
			
		||||
                    uuids=[event_url],
 | 
			
		||||
                    url_human=event_url,
 | 
			
		||||
                    start_time=start_time,
 | 
			
		||||
                    end_day=None,
 | 
			
		||||
                    end_time=None,
 | 
			
		||||
                    published=published,
 | 
			
		||||
                    image=image,
 | 
			
		||||
                    image_alt=""
 | 
			
		||||
                )
 | 
			
		||||
@@ -2011,6 +2011,7 @@ class RecurrentImport(models.Model):
 | 
			
		||||
        LERIO = "rio", _('Le Rio')
 | 
			
		||||
        LARAYMONDE = "raymonde", _('La Raymonde')
 | 
			
		||||
        APIDAE = 'apidae', _('Agenda apidae tourisme')
 | 
			
		||||
        IGUANA = 'iguana', _('Agenda iguana (médiathèques)')
 | 
			
		||||
 | 
			
		||||
    class DOWNLOADER(models.TextChoices):
 | 
			
		||||
        SIMPLE = "simple", _("simple")
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user