introduction de chronostring

This commit is contained in:
Jean-Marie Favreau 2025-04-25 15:18:42 +02:00
parent b72387fa5e
commit 355c56243e
3 changed files with 55 additions and 119 deletions

View File

@ -12,6 +12,7 @@ Parmi les outils et ressources sur lesquelles s'appuie l'agenda culturel, on peu
- [Selenium](https://www.selenium.dev/) - [Selenium](https://www.selenium.dev/)
- [Feather icons](https://feathericons.com/) - [Feather icons](https://feathericons.com/)
- [Pico CSS](https://picocss.com/) - [Pico CSS](https://picocss.com/)
- [chronostring](https://forge.chapril.org/jmtrivial/chronostring) (des mêmes auteurs)
## Installation ## Installation

View File

@ -1,9 +1,10 @@
from datetime import date from datetime import date, datetime
from urllib.parse import urlparse from urllib.parse import urlparse
from chronostring import parse_dates
import re
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from ..extractor import Extractor
from ..twosteps_extractor import TwoStepsExtractorNoPause from ..twosteps_extractor import TwoStepsExtractorNoPause
@ -53,33 +54,6 @@ class CExtractor(TwoStepsExtractorNoPause):
return result return result
# this method is not perfect, but dates and hours are not structured
def parse_dates(self, date):
dl = date.replace(" à ", "\n").split("\n")
result = []
for d in dl:
# only lines with a digit
if sum(c.isdigit() for c in d) != 0:
# split subparts
for d2 in d.replace(" et ", ", ").split(", "):
d2 = d2.strip()
dd = Extractor.parse_french_date(
d2, default_year_by_proximity=self.today
)
if dd is None:
hh = Extractor.parse_french_time(d2)
for i, r in enumerate(result):
result[i][1].append(hh)
else:
result.append([dd, []])
if "De" in date and " à " in date:
for i, r in enumerate(result):
result[i].append(True)
return result
def build_event_url_list(self, content, infuture_days=180): def build_event_url_list(self, content, infuture_days=180):
soup = BeautifulSoup(content, "html.parser") soup = BeautifulSoup(content, "html.parser")
links = soup.select(".cell a.evenement") links = soup.select(".cell a.evenement")
@ -118,104 +92,64 @@ class CExtractor(TwoStepsExtractorNoPause):
soup.select_one(".champ.taxo-age").text soup.select_one(".champ.taxo-age").text
category = self.parse_category(soup.select_one(".champ.categorie").text) category = self.parse_category(soup.select_one(".champ.categorie").text)
date = soup.select_one(".champ.date-libre").text dt = soup.select_one(".champ.date-libre").text
description = "\n\n".join( description = "\n\n".join(
[x for x in [soustitre, description, date, infos] if x is not None] [x for x in [soustitre, description, dt, infos] if x is not None]
) )
if ( if (
" au " in date " au " in dt
or date.startswith("Du") or dt.startswith("Du")
or date.lower().strip() == "en continu" or dt.lower().strip() == "en continu"
or date.startswith("Les") or dt.startswith("Les")
): ):
return return
dates = self.parse_dates(date) dates = []
for dl in dt.split("\n"):
if re.match(r".* ans[  ]*:.*", dt):
dates += parse_dates(dt.split(":")[1])
else:
dates += parse_dates(dt)
for d in dates: for d in dates:
if len(d) >= 2: start_day = None
start_day = d[0] start_time = None
end_day = None
end_time = None
if isinstance(d, datetime):
start_day = d.date()
start_time = d.time()
elif isinstance(d, date):
start_day = d
elif isinstance(d, list) and len(d) == 2:
start_day = d[0].date()
start_time = d[0].time()
end_day = d[1].date()
end_time = d[1].time()
if len(d) == 3 and len(d[1]) == 2: if start_day is not None:
start_time = d[1][0] uuid = event_url + "?date=" + str(start_day)
end_time = d[1][1] if start_time is not None:
uuid = ( uuid = uuid + "&hour=" + str(start_time)
event_url
+ "?date=" self.add_event_with_props(
+ str(start_day) default_values,
+ "&hour=" event_url,
+ str(start_time) title,
) category,
self.add_event_with_props( start_day,
default_values, location,
event_url, description,
title, [],
category, recurrences=None,
start_day, uuids=[uuid],
location, url_human=event_url,
description, start_time=start_time,
[], end_day=end_day,
recurrences=None, end_time=end_time,
uuids=[uuid], published=published,
url_human=event_url, image=image,
start_time=start_time, image_alt=image_alt,
end_day=start_day, )
end_time=end_time,
published=published,
image=image,
image_alt=image_alt,
)
else:
end_time = None
if len(d[1]) == 0:
start_time = None
uuid = event_url + "?date=" + str(start_day)
self.add_event_with_props(
default_values,
event_url,
title,
category,
start_day,
location,
description,
[],
recurrences=None,
uuids=[uuid],
url_human=event_url,
start_time=start_time,
end_day=start_day,
end_time=end_time,
published=published,
image=image,
image_alt=image_alt,
)
for t in d[1]:
start_time = t
uuid = (
event_url
+ "?date="
+ str(start_day)
+ "&hour="
+ str(start_time)
)
self.add_event_with_props(
default_values,
event_url,
title,
category,
start_day,
location,
description,
[],
recurrences=None,
uuids=[uuid],
url_human=event_url,
start_time=start_time,
end_day=start_day,
end_time=end_time,
published=published,
image=image,
image_alt=image_alt,
)

View File

@ -51,3 +51,4 @@ django-cleanup==9.0.0
django-unused-media==0.2.2 django-unused-media==0.2.2
django-resized==1.0.3 django-resized==1.0.3
django-solo==2.4.0 django-solo==2.4.0
chronostring==0.1.2