Les fonctions sont des fonctions de classe

This commit is contained in:
Jean-Marie Favreau 2024-10-12 18:19:59 +02:00
parent de4b54baa4
commit 5f1f5fd003
4 changed files with 13 additions and 13 deletions

View File

@ -50,7 +50,7 @@ class CExtractor(TwoStepsExtractor):
description = soup.select_one(".presentation").get_text() description = soup.select_one(".presentation").get_text()
duration = soup.select_one("#criteres .DUREE-V .valeur-critere li") duration = soup.select_one("#criteres .DUREE-V .valeur-critere li")
if duration is not None: if duration is not None:
duration = self.parse_french_time(duration.text) duration = Extractor.parse_french_time(duration.text)
location = self.nom_lieu location = self.nom_lieu
categories = [] categories = []
@ -94,8 +94,8 @@ class CExtractor(TwoStepsExtractor):
soup2 = BeautifulSoup(page2, "html.parser") soup2 = BeautifulSoup(page2, "html.parser")
times = [o.text for o in soup2.select("#quart_en_cours_spec option")] times = [o.text for o in soup2.select("#quart_en_cours_spec option")]
for t in times: for t in times:
startdate = self.parse_french_date(date) startdate = Extractor.parse_french_date(date)
starttime = self.parse_french_time(t) starttime = Extractor.parse_french_time(t)
start = datetime.datetime.combine(startdate, starttime) start = datetime.datetime.combine(startdate, starttime)
enddate = None enddate = None
endtime = None endtime = None

View File

@ -41,13 +41,13 @@ class CExtractor(TwoStepsExtractor):
for span in spans: for span in spans:
txt = span.get_text() txt = span.get_text()
if txt.lstrip().startswith("DÉBUT"): if txt.lstrip().startswith("DÉBUT"):
start_time = self.parse_french_time(txt.split(":")[-1]) start_time = Extractor.parse_french_time(txt.split(":")[-1])
end_time = None end_time = None
elif txt.lstrip().startswith("HORAIRES :"): elif txt.lstrip().startswith("HORAIRES :"):
hs = txt.split(":")[-1].split("-") hs = txt.split(":")[-1].split("-")
start_time = self.parse_french_time(hs[0]) start_time = Extractor.parse_french_time(hs[0])
if len(hs) > 1: if len(hs) > 1:
end_time = self.parse_french_time(hs[1]) end_time = Extractor.parse_french_time(hs[1])
else: else:
end_time = None end_time = None
elif txt.lstrip().startswith("LIEU :") and not location: elif txt.lstrip().startswith("LIEU :") and not location:
@ -55,7 +55,7 @@ class CExtractor(TwoStepsExtractor):
if not location: if not location:
location = self.nom_lieu location = self.nom_lieu
end_day = self.guess_end_day(start_day, start_time, end_time) end_day = Extractor.guess_end_day(start_day, start_time, end_time)
url_human = event_url url_human = event_url
tags = [] tags = []

View File

@ -56,15 +56,15 @@ class CExtractor(TwoStepsExtractor):
start_day = soup.select(".mec-start-date-label") start_day = soup.select(".mec-start-date-label")
if start_day and len(start_day) > 0: if start_day and len(start_day) > 0:
start_day = self.parse_french_date(start_day[0].get_text()) start_day = Extractor.parse_french_date(start_day[0].get_text())
else: else:
start_day = None start_day = None
t = soup.select(".mec-single-event-time .mec-events-abbr") t = soup.select(".mec-single-event-time .mec-events-abbr")
if t: if t:
t = t[0].get_text().split("-") t = t[0].get_text().split("-")
start_time = self.parse_french_time(t[0]) start_time = Extractor.parse_french_time(t[0])
if len(t) > 1: if len(t) > 1:
end_time = self.parse_french_time(t[1]) end_time = Extractor.parse_french_time(t[1])
else: else:
end_time = None end_time = None
else: else:

View File

@ -18,7 +18,7 @@ class Extractor(ABC):
self.downloader = None self.downloader = None
self.referer = "" self.referer = ""
def guess_end_day(self, start_day, start_time, end_time): def guess_end_day(start_day, start_time, end_time):
if end_time: if end_time:
if end_time > start_time: if end_time > start_time:
return start_day return start_day
@ -48,7 +48,7 @@ class Extractor(ABC):
return i + 1 return i + 1
return None return None
def parse_french_date(self, text): def parse_french_date(text):
# format NomJour Numero Mois Année # format NomJour Numero Mois Année
m = re.search( m = re.search(
"[a-zA-ZéÉûÛ:.]+[ ]*([0-9]+)[er]*[ ]*([a-zA-ZéÉûÛ:.]+)[ ]*([0-9]+)", text "[a-zA-ZéÉûÛ:.]+[ ]*([0-9]+)[er]*[ ]*([a-zA-ZéÉûÛ:.]+)[ ]*([0-9]+)", text
@ -88,7 +88,7 @@ class Extractor(ABC):
return None return None
return date(year, month, day) return date(year, month, day)
def parse_french_time(self, text): def parse_french_time(text):
# format heures minutes secondes # format heures minutes secondes
m = re.search("([0-9]+)[ a-zA-Z:.]+([0-9]+)[ a-zA-Z:.]+([0-9]+)", text) m = re.search("([0-9]+)[ a-zA-Z:.]+([0-9]+)[ a-zA-Z:.]+([0-9]+)", text)
if m: if m: