@@ -1,5 +1,6 @@
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from ..extractor import Extractor
 | 
			
		||||
from ..twosteps_extractor import TwoStepsExtractor
 | 
			
		||||
 | 
			
		||||
@@ -33,7 +34,6 @@ class CExtractor(TwoStepsExtractor):
 | 
			
		||||
        start_day = Extractor.parse_french_date(
 | 
			
		||||
            soup.select("h2")[1].get_text()
 | 
			
		||||
        )  # pas parfait, mais bordel que ce site est mal construit
 | 
			
		||||
        print(soup.select("h2")[1].get_text())
 | 
			
		||||
 | 
			
		||||
        spans = soup.select("div[data-testid=richTextElement] span")
 | 
			
		||||
        start_time = None
 | 
			
		||||
@@ -62,18 +62,21 @@ class CExtractor(TwoStepsExtractor):
 | 
			
		||||
        url_human = event_url
 | 
			
		||||
        tags = ["🎵 concert"]
 | 
			
		||||
 | 
			
		||||
        image = soup.select("wow-image img[fetchpriority=high]")
 | 
			
		||||
        image = soup.select("section wow-image img[fetchpriority=high]")
 | 
			
		||||
        if image:
 | 
			
		||||
            image = image[0]["src"]
 | 
			
		||||
        else:
 | 
			
		||||
            image = None
 | 
			
		||||
 | 
			
		||||
        descriptions = soup.select(
 | 
			
		||||
            "div[data-testid=mesh-container-content] div[data-testid=inline-content] div[data-testid=mesh-container-content] div[data-testid=richTextElement]"
 | 
			
		||||
            "main div[data-testid=mesh-container-content] div[data-testid=inline-content] div[data-testid=inline-content] div[data-testid=mesh-container-content] div[data-testid=richTextElement]"
 | 
			
		||||
        )
 | 
			
		||||
        if descriptions:
 | 
			
		||||
            prefixes = ["TARIF", "OUVER", "SPECT", "HORAI", "LIEU\n", "conce"]
 | 
			
		||||
            descriptions = [d.get_text() for d in descriptions]
 | 
			
		||||
            description = max(descriptions, key=len)
 | 
			
		||||
            main_descs = [d for d in descriptions if d[:5] not in prefixes]
 | 
			
		||||
            other_descs = [d for d in descriptions if d[:5] in prefixes]
 | 
			
		||||
            description = "\n".join(main_descs + ["\n"] + other_descs)
 | 
			
		||||
        else:
 | 
			
		||||
            description = None
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user