introduction de chronostring

This commit is contained in:
Jean-Marie Favreau 2025-04-25 15:18:42 +02:00
parent b72387fa5e
commit 355c56243e
3 changed files with 55 additions and 119 deletions

View File

@ -12,6 +12,7 @@ Parmi les outils et ressources sur lesquelles s'appuie l'agenda culturel, on peu
- [Selenium](https://www.selenium.dev/)
- [Feather icons](https://feathericons.com/)
- [Pico CSS](https://picocss.com/)
- [chronostring](https://forge.chapril.org/jmtrivial/chronostring) (des mêmes auteurs)
## Installation

View File

@ -1,9 +1,10 @@
from datetime import date
from datetime import date, datetime
from urllib.parse import urlparse
from chronostring import parse_dates
import re
from bs4 import BeautifulSoup
from ..extractor import Extractor
from ..twosteps_extractor import TwoStepsExtractorNoPause
@ -53,33 +54,6 @@ class CExtractor(TwoStepsExtractorNoPause):
return result
# this method is not perfect, but dates and hours are not structured
def parse_dates(self, date):
dl = date.replace(" à ", "\n").split("\n")
result = []
for d in dl:
# only lines with a digit
if sum(c.isdigit() for c in d) != 0:
# split subparts
for d2 in d.replace(" et ", ", ").split(", "):
d2 = d2.strip()
dd = Extractor.parse_french_date(
d2, default_year_by_proximity=self.today
)
if dd is None:
hh = Extractor.parse_french_time(d2)
for i, r in enumerate(result):
result[i][1].append(hh)
else:
result.append([dd, []])
if "De" in date and " à " in date:
for i, r in enumerate(result):
result[i].append(True)
return result
def build_event_url_list(self, content, infuture_days=180):
soup = BeautifulSoup(content, "html.parser")
links = soup.select(".cell a.evenement")
@ -118,60 +92,48 @@ class CExtractor(TwoStepsExtractorNoPause):
soup.select_one(".champ.taxo-age").text
category = self.parse_category(soup.select_one(".champ.categorie").text)
date = soup.select_one(".champ.date-libre").text
dt = soup.select_one(".champ.date-libre").text
description = "\n\n".join(
[x for x in [soustitre, description, date, infos] if x is not None]
[x for x in [soustitre, description, dt, infos] if x is not None]
)
if (
" au " in date
or date.startswith("Du")
or date.lower().strip() == "en continu"
or date.startswith("Les")
" au " in dt
or dt.startswith("Du")
or dt.lower().strip() == "en continu"
or dt.startswith("Les")
):
return
dates = self.parse_dates(date)
dates = []
for dl in dt.split("\n"):
if re.match(r".* ans[  ]*:.*", dt):
dates += parse_dates(dt.split(":")[1])
else:
dates += parse_dates(dt)
for d in dates:
if len(d) >= 2:
start_day = d[0]
if len(d) == 3 and len(d[1]) == 2:
start_time = d[1][0]
end_time = d[1][1]
uuid = (
event_url
+ "?date="
+ str(start_day)
+ "&hour="
+ str(start_time)
)
self.add_event_with_props(
default_values,
event_url,
title,
category,
start_day,
location,
description,
[],
recurrences=None,
uuids=[uuid],
url_human=event_url,
start_time=start_time,
end_day=start_day,
end_time=end_time,
published=published,
image=image,
image_alt=image_alt,
)
else:
end_time = None
if len(d[1]) == 0:
start_day = None
start_time = None
end_day = None
end_time = None
if isinstance(d, datetime):
start_day = d.date()
start_time = d.time()
elif isinstance(d, date):
start_day = d
elif isinstance(d, list) and len(d) == 2:
start_day = d[0].date()
start_time = d[0].time()
end_day = d[1].date()
end_time = d[1].time()
if start_day is not None:
uuid = event_url + "?date=" + str(start_day)
if start_time is not None:
uuid = uuid + "&hour=" + str(start_time)
self.add_event_with_props(
default_values,
event_url,
@ -185,35 +147,7 @@ class CExtractor(TwoStepsExtractorNoPause):
uuids=[uuid],
url_human=event_url,
start_time=start_time,
end_day=start_day,
end_time=end_time,
published=published,
image=image,
image_alt=image_alt,
)
for t in d[1]:
start_time = t
uuid = (
event_url
+ "?date="
+ str(start_day)
+ "&hour="
+ str(start_time)
)
self.add_event_with_props(
default_values,
event_url,
title,
category,
start_day,
location,
description,
[],
recurrences=None,
uuids=[uuid],
url_human=event_url,
start_time=start_time,
end_day=start_day,
end_day=end_day,
end_time=end_time,
published=published,
image=image,

View File

@ -51,3 +51,4 @@ django-cleanup==9.0.0
django-unused-media==0.2.2
django-resized==1.0.3
django-solo==2.4.0
chronostring==0.1.2