Les fonctions sont des fonctions de classe
This commit is contained in:
parent
de4b54baa4
commit
5f1f5fd003
@ -50,7 +50,7 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
description = soup.select_one(".presentation").get_text()
|
description = soup.select_one(".presentation").get_text()
|
||||||
duration = soup.select_one("#criteres .DUREE-V .valeur-critere li")
|
duration = soup.select_one("#criteres .DUREE-V .valeur-critere li")
|
||||||
if duration is not None:
|
if duration is not None:
|
||||||
duration = self.parse_french_time(duration.text)
|
duration = Extractor.parse_french_time(duration.text)
|
||||||
|
|
||||||
location = self.nom_lieu
|
location = self.nom_lieu
|
||||||
categories = []
|
categories = []
|
||||||
@ -94,8 +94,8 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
soup2 = BeautifulSoup(page2, "html.parser")
|
soup2 = BeautifulSoup(page2, "html.parser")
|
||||||
times = [o.text for o in soup2.select("#quart_en_cours_spec option")]
|
times = [o.text for o in soup2.select("#quart_en_cours_spec option")]
|
||||||
for t in times:
|
for t in times:
|
||||||
startdate = self.parse_french_date(date)
|
startdate = Extractor.parse_french_date(date)
|
||||||
starttime = self.parse_french_time(t)
|
starttime = Extractor.parse_french_time(t)
|
||||||
start = datetime.datetime.combine(startdate, starttime)
|
start = datetime.datetime.combine(startdate, starttime)
|
||||||
enddate = None
|
enddate = None
|
||||||
endtime = None
|
endtime = None
|
||||||
|
@ -41,13 +41,13 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
for span in spans:
|
for span in spans:
|
||||||
txt = span.get_text()
|
txt = span.get_text()
|
||||||
if txt.lstrip().startswith("DÉBUT"):
|
if txt.lstrip().startswith("DÉBUT"):
|
||||||
start_time = self.parse_french_time(txt.split(":")[-1])
|
start_time = Extractor.parse_french_time(txt.split(":")[-1])
|
||||||
end_time = None
|
end_time = None
|
||||||
elif txt.lstrip().startswith("HORAIRES :"):
|
elif txt.lstrip().startswith("HORAIRES :"):
|
||||||
hs = txt.split(":")[-1].split("-")
|
hs = txt.split(":")[-1].split("-")
|
||||||
start_time = self.parse_french_time(hs[0])
|
start_time = Extractor.parse_french_time(hs[0])
|
||||||
if len(hs) > 1:
|
if len(hs) > 1:
|
||||||
end_time = self.parse_french_time(hs[1])
|
end_time = Extractor.parse_french_time(hs[1])
|
||||||
else:
|
else:
|
||||||
end_time = None
|
end_time = None
|
||||||
elif txt.lstrip().startswith("LIEU :") and not location:
|
elif txt.lstrip().startswith("LIEU :") and not location:
|
||||||
@ -55,7 +55,7 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
|
|
||||||
if not location:
|
if not location:
|
||||||
location = self.nom_lieu
|
location = self.nom_lieu
|
||||||
end_day = self.guess_end_day(start_day, start_time, end_time)
|
end_day = Extractor.guess_end_day(start_day, start_time, end_time)
|
||||||
|
|
||||||
url_human = event_url
|
url_human = event_url
|
||||||
tags = []
|
tags = []
|
||||||
|
@ -56,15 +56,15 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
|
|
||||||
start_day = soup.select(".mec-start-date-label")
|
start_day = soup.select(".mec-start-date-label")
|
||||||
if start_day and len(start_day) > 0:
|
if start_day and len(start_day) > 0:
|
||||||
start_day = self.parse_french_date(start_day[0].get_text())
|
start_day = Extractor.parse_french_date(start_day[0].get_text())
|
||||||
else:
|
else:
|
||||||
start_day = None
|
start_day = None
|
||||||
t = soup.select(".mec-single-event-time .mec-events-abbr")
|
t = soup.select(".mec-single-event-time .mec-events-abbr")
|
||||||
if t:
|
if t:
|
||||||
t = t[0].get_text().split("-")
|
t = t[0].get_text().split("-")
|
||||||
start_time = self.parse_french_time(t[0])
|
start_time = Extractor.parse_french_time(t[0])
|
||||||
if len(t) > 1:
|
if len(t) > 1:
|
||||||
end_time = self.parse_french_time(t[1])
|
end_time = Extractor.parse_french_time(t[1])
|
||||||
else:
|
else:
|
||||||
end_time = None
|
end_time = None
|
||||||
else:
|
else:
|
||||||
|
@ -18,7 +18,7 @@ class Extractor(ABC):
|
|||||||
self.downloader = None
|
self.downloader = None
|
||||||
self.referer = ""
|
self.referer = ""
|
||||||
|
|
||||||
def guess_end_day(self, start_day, start_time, end_time):
|
def guess_end_day(start_day, start_time, end_time):
|
||||||
if end_time:
|
if end_time:
|
||||||
if end_time > start_time:
|
if end_time > start_time:
|
||||||
return start_day
|
return start_day
|
||||||
@ -48,7 +48,7 @@ class Extractor(ABC):
|
|||||||
return i + 1
|
return i + 1
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def parse_french_date(self, text):
|
def parse_french_date(text):
|
||||||
# format NomJour Numero Mois Année
|
# format NomJour Numero Mois Année
|
||||||
m = re.search(
|
m = re.search(
|
||||||
"[a-zA-ZéÉûÛ:.]+[ ]*([0-9]+)[er]*[ ]*([a-zA-ZéÉûÛ:.]+)[ ]*([0-9]+)", text
|
"[a-zA-ZéÉûÛ:.]+[ ]*([0-9]+)[er]*[ ]*([a-zA-ZéÉûÛ:.]+)[ ]*([0-9]+)", text
|
||||||
@ -88,7 +88,7 @@ class Extractor(ABC):
|
|||||||
return None
|
return None
|
||||||
return date(year, month, day)
|
return date(year, month, day)
|
||||||
|
|
||||||
def parse_french_time(self, text):
|
def parse_french_time(text):
|
||||||
# format heures minutes secondes
|
# format heures minutes secondes
|
||||||
m = re.search("([0-9]+)[ a-zA-Z:.]+([0-9]+)[ a-zA-Z:.]+([0-9]+)", text)
|
m = re.search("([0-9]+)[ a-zA-Z:.]+([0-9]+)[ a-zA-Z:.]+([0-9]+)", text)
|
||||||
if m:
|
if m:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user