@@ -0,0 +1,72 @@
 | 
			
		||||
import logging
 | 
			
		||||
import re
 | 
			
		||||
from urllib.parse import urlparse
 | 
			
		||||
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
 | 
			
		||||
from ..extractor import Extractor
 | 
			
		||||
 | 
			
		||||
logger = logging.getLogger(__name__)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CExtractor(Extractor):
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
 | 
			
		||||
    def extract(
 | 
			
		||||
        self,
 | 
			
		||||
        content,
 | 
			
		||||
        url,
 | 
			
		||||
        url_human=None,
 | 
			
		||||
        default_values=None,
 | 
			
		||||
        published=False,
 | 
			
		||||
    ):
 | 
			
		||||
        self.set_header(url)
 | 
			
		||||
        u = urlparse(url)
 | 
			
		||||
        if u.netloc == "associations.clermont-ferrand.fr":
 | 
			
		||||
            soup = BeautifulSoup(content, "html.parser")
 | 
			
		||||
 | 
			
		||||
            title = soup.select_one("h1").text.strip()
 | 
			
		||||
            img = soup.select_one("img.image-style-event")
 | 
			
		||||
            image = None
 | 
			
		||||
            image_alt = None
 | 
			
		||||
            if img:
 | 
			
		||||
                image = img["src"]
 | 
			
		||||
                if not image.startswith("http"):
 | 
			
		||||
                    image = "https://" + u.netloc + image
 | 
			
		||||
                image_alt = img["alt"]
 | 
			
		||||
            description = soup.select_one("div.field--name-body").get_text(
 | 
			
		||||
                separator="\n"
 | 
			
		||||
            )
 | 
			
		||||
            location = soup.select_one("div.c-location__holder .c-desc").text
 | 
			
		||||
 | 
			
		||||
            start_day = soup.select_one("div.o-date")
 | 
			
		||||
            if start_day is not None:
 | 
			
		||||
                start_day = Extractor.parse_french_date(
 | 
			
		||||
                    re.sub("[ ]*\n[ ]*", " ", start_day.get_text(separator=" "))
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
            start_time = soup.select_one("div.c-hours__holder .c-desc")
 | 
			
		||||
            if start_time is not None:
 | 
			
		||||
                start_time = Extractor.parse_french_time(start_time.text)
 | 
			
		||||
 | 
			
		||||
            category = None
 | 
			
		||||
            tags = []
 | 
			
		||||
            uuids = [url]
 | 
			
		||||
 | 
			
		||||
            self.add_event(
 | 
			
		||||
                default_values,
 | 
			
		||||
                title,
 | 
			
		||||
                category,
 | 
			
		||||
                start_day,
 | 
			
		||||
                location,
 | 
			
		||||
                description,
 | 
			
		||||
                tags,
 | 
			
		||||
                uuids,
 | 
			
		||||
                url_human=url_human,
 | 
			
		||||
                start_time=start_time,
 | 
			
		||||
                image=image,
 | 
			
		||||
                image_alt=image_alt,
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        return self.get_structure()
 | 
			
		||||
@@ -315,11 +315,13 @@ class Extractor(ABC):
 | 
			
		||||
            CExtractor as GoogleCalendarLinkEventExtractor,
 | 
			
		||||
        )
 | 
			
		||||
        from .generic_extractors.ical import ICALExtractor
 | 
			
		||||
        from .custom_extractors.associations_cf import CExtractor as AssociationsCF
 | 
			
		||||
 | 
			
		||||
        if single_event:
 | 
			
		||||
            return [
 | 
			
		||||
                FacebookEventExtractor(),
 | 
			
		||||
                GoogleCalendarLinkEventExtractor(),
 | 
			
		||||
                AssociationsCF(),
 | 
			
		||||
                EventNotFoundExtractor(),
 | 
			
		||||
            ]
 | 
			
		||||
        else:
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user