Fix nouveau site puce à l'oreille
This commit is contained in:
		@@ -29,8 +29,8 @@ from src.agenda_culturel.import_tasks.custom_extractors import *
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
 | 
			
		||||
    u2e = URL2Events(SimpleDownloader(), lapucealoreille.CExtractor())
 | 
			
		||||
    url = "https://www.lapucealoreille63.fr/programmation/"
 | 
			
		||||
    url_human = "https://www.lapucealoreille63.fr/programmation/"
 | 
			
		||||
    url = "https://www.lapucealoreille63.fr/agenda"
 | 
			
		||||
    url_human = "https://www.lapucealoreille63.fr/agenda"
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        events = u2e.process(url, url_human, cache = "cache-lapucealoreille.xml", default_values = {}, published = True)
 | 
			
		||||
 
 | 
			
		||||
@@ -15,11 +15,6 @@ class CExtractor(TwoStepsExtractor):
 | 
			
		||||
            e_url = e.find("a")
 | 
			
		||||
            if e_url:
 | 
			
		||||
                if self.add_event_url(e_url["href"]):
 | 
			
		||||
                    title = e.select("div[data-testid=richTextElement] h1.font_0 span")
 | 
			
		||||
                    if title:
 | 
			
		||||
                        title = title[0].contents[0].get_text().replace("\n", " ")
 | 
			
		||||
                        title = re.sub(" +", " ", title)
 | 
			
		||||
                        self.add_event_title(e_url["href"], title)
 | 
			
		||||
 | 
			
		||||
    def add_event_from_content(
 | 
			
		||||
        self,
 | 
			
		||||
@@ -31,9 +26,12 @@ class CExtractor(TwoStepsExtractor):
 | 
			
		||||
    ):
 | 
			
		||||
        soup = BeautifulSoup(event_content, "html.parser")
 | 
			
		||||
 | 
			
		||||
        title = soup.select("h2")[0].get_text()
 | 
			
		||||
 | 
			
		||||
        start_day = self.parse_french_date(
 | 
			
		||||
            soup.find("h2").get_text()
 | 
			
		||||
            soup.select("h2")[1].get_text()
 | 
			
		||||
        )  # pas parfait, mais bordel que ce site est mal construit
 | 
			
		||||
        print(soup.select("h2")[1].get_text())
 | 
			
		||||
 | 
			
		||||
        spans = soup.select("div[data-testid=richTextElement] span")
 | 
			
		||||
        start_time = None
 | 
			
		||||
@@ -79,7 +77,7 @@ class CExtractor(TwoStepsExtractor):
 | 
			
		||||
 | 
			
		||||
        self.add_event_with_props(
 | 
			
		||||
            event_url,
 | 
			
		||||
            None,
 | 
			
		||||
            title,
 | 
			
		||||
            "Concert",
 | 
			
		||||
            start_day,
 | 
			
		||||
            location,
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user