parent
9dada3e6c2
commit
629a8509cb
@ -1,3 +1,4 @@
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from ..extractor import Extractor
|
||||
@ -33,17 +34,16 @@ class CExtractor(TwoStepsExtractor):
|
||||
return None, None
|
||||
|
||||
def build_event_url_list(self, content):
|
||||
soup = BeautifulSoup(content, "xml")
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
|
||||
events = soup.select("div.mec-tile-event-content")
|
||||
for e in events:
|
||||
link = e.select("h4.mec-event-title a")
|
||||
if len(link) == 1:
|
||||
url = link[0]["href"]
|
||||
title = link[0].get_text()
|
||||
title = link[0].get_text().replace("’", "'")
|
||||
|
||||
if self.add_event_url(url):
|
||||
print(url, title)
|
||||
self.add_event_title(url, title)
|
||||
|
||||
categories = e.select(".mec-label-normal")
|
||||
@ -64,7 +64,7 @@ class CExtractor(TwoStepsExtractor):
|
||||
default_values=None,
|
||||
published=False,
|
||||
):
|
||||
soup = BeautifulSoup(event_content, "xml")
|
||||
soup = BeautifulSoup(event_content, "html.parser")
|
||||
|
||||
start_day = soup.select(".mec-start-date-label")
|
||||
if start_day and len(start_day) > 0:
|
||||
@ -91,6 +91,7 @@ class CExtractor(TwoStepsExtractor):
|
||||
description = soup.select(".mec-event-content .mec-single-event-description")[
|
||||
0
|
||||
].get_text(separator=" ")
|
||||
description = re.sub(r"([ ]*\n)+", "\n", description).strip().replace("’", "'")
|
||||
|
||||
url_human = event_url
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user