parent
a885d1a064
commit
55e8c1a323
@ -1,5 +1,6 @@
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
from ..extractor import Extractor
|
||||
from ..twosteps_extractor import TwoStepsExtractor
|
||||
|
||||
@ -33,7 +34,6 @@ class CExtractor(TwoStepsExtractor):
|
||||
start_day = Extractor.parse_french_date(
|
||||
soup.select("h2")[1].get_text()
|
||||
) # pas parfait, mais bordel que ce site est mal construit
|
||||
print(soup.select("h2")[1].get_text())
|
||||
|
||||
spans = soup.select("div[data-testid=richTextElement] span")
|
||||
start_time = None
|
||||
@ -62,18 +62,21 @@ class CExtractor(TwoStepsExtractor):
|
||||
url_human = event_url
|
||||
tags = ["🎵 concert"]
|
||||
|
||||
image = soup.select("wow-image img[fetchpriority=high]")
|
||||
image = soup.select("section wow-image img[fetchpriority=high]")
|
||||
if image:
|
||||
image = image[0]["src"]
|
||||
else:
|
||||
image = None
|
||||
|
||||
descriptions = soup.select(
|
||||
"div[data-testid=mesh-container-content] div[data-testid=inline-content] div[data-testid=mesh-container-content] div[data-testid=richTextElement]"
|
||||
"main div[data-testid=mesh-container-content] div[data-testid=inline-content] div[data-testid=inline-content] div[data-testid=mesh-container-content] div[data-testid=richTextElement]"
|
||||
)
|
||||
if descriptions:
|
||||
prefixes = ["TARIF", "OUVER", "SPECT", "HORAI", "LIEU\n", "conce"]
|
||||
descriptions = [d.get_text() for d in descriptions]
|
||||
description = max(descriptions, key=len)
|
||||
main_descs = [d for d in descriptions if d[:5] not in prefixes]
|
||||
other_descs = [d for d in descriptions if d[:5] in prefixes]
|
||||
description = "\n".join(main_descs + ["\n"] + other_descs)
|
||||
else:
|
||||
description = None
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user