introduction de chronostring
This commit is contained in:
parent
b72387fa5e
commit
355c56243e
@ -12,6 +12,7 @@ Parmi les outils et ressources sur lesquelles s'appuie l'agenda culturel, on peu
|
||||
- [Selenium](https://www.selenium.dev/)
|
||||
- [Feather icons](https://feathericons.com/)
|
||||
- [Pico CSS](https://picocss.com/)
|
||||
- [chronostring](https://forge.chapril.org/jmtrivial/chronostring) (des mêmes auteurs)
|
||||
|
||||
## Installation
|
||||
|
||||
|
@ -1,9 +1,10 @@
|
||||
from datetime import date
|
||||
from datetime import date, datetime
|
||||
from urllib.parse import urlparse
|
||||
from chronostring import parse_dates
|
||||
import re
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from ..extractor import Extractor
|
||||
from ..twosteps_extractor import TwoStepsExtractorNoPause
|
||||
|
||||
|
||||
@ -53,33 +54,6 @@ class CExtractor(TwoStepsExtractorNoPause):
|
||||
|
||||
return result
|
||||
|
||||
# this method is not perfect, but dates and hours are not structured
|
||||
def parse_dates(self, date):
|
||||
dl = date.replace(" à ", "\n").split("\n")
|
||||
result = []
|
||||
|
||||
for d in dl:
|
||||
# only lines with a digit
|
||||
if sum(c.isdigit() for c in d) != 0:
|
||||
# split subparts
|
||||
for d2 in d.replace(" et ", ", ").split(", "):
|
||||
d2 = d2.strip()
|
||||
dd = Extractor.parse_french_date(
|
||||
d2, default_year_by_proximity=self.today
|
||||
)
|
||||
if dd is None:
|
||||
hh = Extractor.parse_french_time(d2)
|
||||
for i, r in enumerate(result):
|
||||
result[i][1].append(hh)
|
||||
else:
|
||||
result.append([dd, []])
|
||||
|
||||
if "De" in date and " à " in date:
|
||||
for i, r in enumerate(result):
|
||||
result[i].append(True)
|
||||
|
||||
return result
|
||||
|
||||
def build_event_url_list(self, content, infuture_days=180):
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
links = soup.select(".cell a.evenement")
|
||||
@ -118,104 +92,64 @@ class CExtractor(TwoStepsExtractorNoPause):
|
||||
soup.select_one(".champ.taxo-age").text
|
||||
category = self.parse_category(soup.select_one(".champ.categorie").text)
|
||||
|
||||
date = soup.select_one(".champ.date-libre").text
|
||||
dt = soup.select_one(".champ.date-libre").text
|
||||
|
||||
description = "\n\n".join(
|
||||
[x for x in [soustitre, description, date, infos] if x is not None]
|
||||
[x for x in [soustitre, description, dt, infos] if x is not None]
|
||||
)
|
||||
|
||||
if (
|
||||
" au " in date
|
||||
or date.startswith("Du")
|
||||
or date.lower().strip() == "en continu"
|
||||
or date.startswith("Les")
|
||||
" au " in dt
|
||||
or dt.startswith("Du")
|
||||
or dt.lower().strip() == "en continu"
|
||||
or dt.startswith("Les")
|
||||
):
|
||||
return
|
||||
|
||||
dates = self.parse_dates(date)
|
||||
dates = []
|
||||
for dl in dt.split("\n"):
|
||||
if re.match(r".* ans[ ]*:.*", dt):
|
||||
dates += parse_dates(dt.split(":")[1])
|
||||
else:
|
||||
dates += parse_dates(dt)
|
||||
|
||||
for d in dates:
|
||||
if len(d) >= 2:
|
||||
start_day = d[0]
|
||||
start_day = None
|
||||
start_time = None
|
||||
end_day = None
|
||||
end_time = None
|
||||
if isinstance(d, datetime):
|
||||
start_day = d.date()
|
||||
start_time = d.time()
|
||||
elif isinstance(d, date):
|
||||
start_day = d
|
||||
elif isinstance(d, list) and len(d) == 2:
|
||||
start_day = d[0].date()
|
||||
start_time = d[0].time()
|
||||
end_day = d[1].date()
|
||||
end_time = d[1].time()
|
||||
|
||||
if len(d) == 3 and len(d[1]) == 2:
|
||||
start_time = d[1][0]
|
||||
end_time = d[1][1]
|
||||
uuid = (
|
||||
event_url
|
||||
+ "?date="
|
||||
+ str(start_day)
|
||||
+ "&hour="
|
||||
+ str(start_time)
|
||||
)
|
||||
self.add_event_with_props(
|
||||
default_values,
|
||||
event_url,
|
||||
title,
|
||||
category,
|
||||
start_day,
|
||||
location,
|
||||
description,
|
||||
[],
|
||||
recurrences=None,
|
||||
uuids=[uuid],
|
||||
url_human=event_url,
|
||||
start_time=start_time,
|
||||
end_day=start_day,
|
||||
end_time=end_time,
|
||||
published=published,
|
||||
image=image,
|
||||
image_alt=image_alt,
|
||||
)
|
||||
else:
|
||||
end_time = None
|
||||
if len(d[1]) == 0:
|
||||
start_time = None
|
||||
uuid = event_url + "?date=" + str(start_day)
|
||||
self.add_event_with_props(
|
||||
default_values,
|
||||
event_url,
|
||||
title,
|
||||
category,
|
||||
start_day,
|
||||
location,
|
||||
description,
|
||||
[],
|
||||
recurrences=None,
|
||||
uuids=[uuid],
|
||||
url_human=event_url,
|
||||
start_time=start_time,
|
||||
end_day=start_day,
|
||||
end_time=end_time,
|
||||
published=published,
|
||||
image=image,
|
||||
image_alt=image_alt,
|
||||
)
|
||||
for t in d[1]:
|
||||
start_time = t
|
||||
uuid = (
|
||||
event_url
|
||||
+ "?date="
|
||||
+ str(start_day)
|
||||
+ "&hour="
|
||||
+ str(start_time)
|
||||
)
|
||||
self.add_event_with_props(
|
||||
default_values,
|
||||
event_url,
|
||||
title,
|
||||
category,
|
||||
start_day,
|
||||
location,
|
||||
description,
|
||||
[],
|
||||
recurrences=None,
|
||||
uuids=[uuid],
|
||||
url_human=event_url,
|
||||
start_time=start_time,
|
||||
end_day=start_day,
|
||||
end_time=end_time,
|
||||
published=published,
|
||||
image=image,
|
||||
image_alt=image_alt,
|
||||
)
|
||||
if start_day is not None:
|
||||
uuid = event_url + "?date=" + str(start_day)
|
||||
if start_time is not None:
|
||||
uuid = uuid + "&hour=" + str(start_time)
|
||||
|
||||
self.add_event_with_props(
|
||||
default_values,
|
||||
event_url,
|
||||
title,
|
||||
category,
|
||||
start_day,
|
||||
location,
|
||||
description,
|
||||
[],
|
||||
recurrences=None,
|
||||
uuids=[uuid],
|
||||
url_human=event_url,
|
||||
start_time=start_time,
|
||||
end_day=end_day,
|
||||
end_time=end_time,
|
||||
published=published,
|
||||
image=image,
|
||||
image_alt=image_alt,
|
||||
)
|
||||
|
@ -51,3 +51,4 @@ django-cleanup==9.0.0
|
||||
django-unused-media==0.2.2
|
||||
django-resized==1.0.3
|
||||
django-solo==2.4.0
|
||||
chronostring==0.1.2
|
||||
|
Loading…
x
Reference in New Issue
Block a user