Oups
This commit is contained in:
		@@ -0,0 +1,156 @@
 | 
			
		||||
from ..generic_extractors import *
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from datetime import timedelta
 | 
			
		||||
 | 
			
		||||
# A class dedicated to get events from La Cour des 3 Coquins and Graines de spectacle
 | 
			
		||||
# URL: https://billetterie-c3c.clermont-ferrand.fr//
 | 
			
		||||
class CExtractor(TwoStepsExtractor):
 | 
			
		||||
 | 
			
		||||
    def extract(
 | 
			
		||||
        self,
 | 
			
		||||
        content,
 | 
			
		||||
        url,
 | 
			
		||||
        url_human=None,
 | 
			
		||||
        default_values=None,
 | 
			
		||||
        published=False,
 | 
			
		||||
        only_future=True,
 | 
			
		||||
        ignore_404=True):
 | 
			
		||||
        self.root_address = "https://" + urlparse(url).netloc + "/"
 | 
			
		||||
        return super().extract(content, url, url_human, default_values, published, only_future, ignore_404)
 | 
			
		||||
 | 
			
		||||
    def category_agenda(self, category):
 | 
			
		||||
        if not category:
 | 
			
		||||
            return None
 | 
			
		||||
        mapping = {"Théâtre": "Spectacles", "Concert": "Fêtes & Concerts", "Projection": "Cinéma"}
 | 
			
		||||
        mapping_tag = {"Théâtre": "🎭 théâtre", "Concert": "🎵 concert", "Projection": None}
 | 
			
		||||
        if category in mapping:
 | 
			
		||||
            return mapping[category], mapping_tag[category]
 | 
			
		||||
        else:
 | 
			
		||||
            return None, None
 | 
			
		||||
 | 
			
		||||
    def build_event_url_list(self, content):
 | 
			
		||||
        soup = BeautifulSoup(content, "html.parser")
 | 
			
		||||
 | 
			
		||||
        events = soup.select("div.fiche-info")
 | 
			
		||||
 | 
			
		||||
        for e in events:
 | 
			
		||||
            e_url = e.select_one("a.btn.lien_savoir_plus")["href"]
 | 
			
		||||
            if e_url != "":
 | 
			
		||||
                e_url = self.url + "/" + e_url
 | 
			
		||||
                self.add_event_url(e_url)
 | 
			
		||||
 | 
			
		||||
    def add_event_from_content(
 | 
			
		||||
        self,
 | 
			
		||||
        event_content,
 | 
			
		||||
        event_url,
 | 
			
		||||
        url_human=None,
 | 
			
		||||
        default_values=None,
 | 
			
		||||
        published=False,
 | 
			
		||||
    ):
 | 
			
		||||
        soup = BeautifulSoup(event_content, "html.parser")
 | 
			
		||||
 | 
			
		||||
        title = soup.select_one("h1")
 | 
			
		||||
        if title:
 | 
			
		||||
            title = title.text
 | 
			
		||||
 | 
			
		||||
        image = soup.select_one("#media .swiper-slide img")
 | 
			
		||||
        if image:
 | 
			
		||||
            image = image["src"]
 | 
			
		||||
        else:
 | 
			
		||||
            image = None
 | 
			
		||||
            
 | 
			
		||||
        description = soup.select_one(".presentation").get_text()
 | 
			
		||||
        duration = soup.select_one("#criteres .DUREE-V .valeur-critere li")
 | 
			
		||||
        if not duration is None:
 | 
			
		||||
            duration = Extractor.parse_french_time(duration.text)
 | 
			
		||||
 | 
			
		||||
        location = soup.select_one("#criteres .LIEU-V .valeur-critere li")
 | 
			
		||||
        if not location is None:
 | 
			
		||||
            location = location.text
 | 
			
		||||
 | 
			
		||||
        categories = []
 | 
			
		||||
        tags = []
 | 
			
		||||
        for t in soup.select(".sous-titre span"):
 | 
			
		||||
            classes = t.get("class")
 | 
			
		||||
            if classes and len(classes) > 0:
 | 
			
		||||
                if classes[0].startswith("LIEU-"):
 | 
			
		||||
                    location = t.text
 | 
			
		||||
                elif classes[0].startswith("THEMATIQUE-"):
 | 
			
		||||
                    cat, tag = self.category_agenda(t.text)
 | 
			
		||||
                    if cat:
 | 
			
		||||
                        categories.append(cat)
 | 
			
		||||
                    if tag:
 | 
			
		||||
                        tags.append(tag)
 | 
			
		||||
 | 
			
		||||
        # TODO: parser les dates, récupérer les heures ()
 | 
			
		||||
        dates = [o.get("value") for o in soup.select("select.datedleb_resa option")]
 | 
			
		||||
        
 | 
			
		||||
        patternCodeSite = re.compile(r'.*gsw_vars\["CODEPRESTATAIRE"\] = "(.*?)";.*', flags=re.DOTALL)
 | 
			
		||||
        patternCodeObject = re.compile(r'.*gsw_vars\["CODEPRESTATION"\] = "(.*?)";.*', flags=re.DOTALL)
 | 
			
		||||
        patternCodeMoteur = re.compile(r'.*Resa.init_moteur_resa\(\'([0-9]+)\'\);.*', flags=re.DOTALL)
 | 
			
		||||
        scripts = soup.find_all('script')
 | 
			
		||||
        codeSite = ""
 | 
			
		||||
        idObject = ""
 | 
			
		||||
        moteur = ""
 | 
			
		||||
        for script in scripts:
 | 
			
		||||
            if(patternCodeSite.match(str(script.string))):
 | 
			
		||||
                data = patternCodeSite.match(script.string)
 | 
			
		||||
                codeSite = data.groups()[0]
 | 
			
		||||
            if(patternCodeObject.match(str(script.string))):
 | 
			
		||||
                data = patternCodeObject.match(script.string)
 | 
			
		||||
                idObject = data.groups()[0]
 | 
			
		||||
            if(patternCodeMoteur.match(str(script.string))):
 | 
			
		||||
                data = patternCodeMoteur.match(script.string)
 | 
			
		||||
                moteur = data.groups()[0]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        pause = self.downloader.pause
 | 
			
		||||
        self.downloader.pause = False
 | 
			
		||||
 | 
			
		||||
        # get exact schedule need two supplementary requests
 | 
			
		||||
        datetimes = []
 | 
			
		||||
        if codeSite != "" and idObject != "" and moteur != "":  
 | 
			
		||||
            for date in dates:
 | 
			
		||||
                # the first page is required such that the server knows the selected date
 | 
			
		||||
                page1 = self.downloader.get_content(self.root_address + "/booking?action=searchAjax&cid=" + moteur + "&afficheDirectDispo=" + date + "&type_prestataire=V&cle_fiche=PRESTATION-V-" + codeSite + "-" + idObject + "&datedeb=" + date)
 | 
			
		||||
                # then we get the form with hours
 | 
			
		||||
                page2 = self.downloader.get_content(self.root_address + "/booking?action=detailTarifsPrestationAjax&prestation=V-" + codeSite + "-" + idObject)
 | 
			
		||||
                soup2 = BeautifulSoup(page2, "html.parser")
 | 
			
		||||
                times = [o.text for o in soup2.select("#quart_en_cours_spec option")]
 | 
			
		||||
                for t in times:
 | 
			
		||||
                    startdate = Extractor.parse_french_date(date)
 | 
			
		||||
                    starttime = Extractor.parse_french_time(t)
 | 
			
		||||
                    start = datetime.datetime.combine(startdate, starttime)
 | 
			
		||||
                    enddate = None
 | 
			
		||||
                    endtime = None
 | 
			
		||||
                    if duration is not None:
 | 
			
		||||
                        end = start + timedelta(hours=duration.hour, minutes=duration.minute, seconds=duration.second)
 | 
			
		||||
                        enddate = end.date()
 | 
			
		||||
                        endtime = end.time()
 | 
			
		||||
                    datetimes.append((startdate, starttime, enddate, endtime))
 | 
			
		||||
        self.downloader.pause = pause
 | 
			
		||||
 | 
			
		||||
        category = None
 | 
			
		||||
        if len(categories) > 0:
 | 
			
		||||
            category = categories[0]
 | 
			
		||||
 | 
			
		||||
        for dt in datetimes:
 | 
			
		||||
 | 
			
		||||
            self.add_event_with_props(
 | 
			
		||||
                default_values,
 | 
			
		||||
                event_url,
 | 
			
		||||
                title,
 | 
			
		||||
                category,
 | 
			
		||||
                dt[0],
 | 
			
		||||
                location,
 | 
			
		||||
                description,
 | 
			
		||||
                tags,
 | 
			
		||||
                recurrences=None,
 | 
			
		||||
                uuids=[event_url],
 | 
			
		||||
                url_human=url_human,
 | 
			
		||||
                start_time=dt[1],
 | 
			
		||||
                end_day=dt[2],
 | 
			
		||||
                end_time=dt[3],
 | 
			
		||||
                published=published,
 | 
			
		||||
                image=image,
 | 
			
		||||
            )
 | 
			
		||||
		Reference in New Issue
	
	Block a user