introduction de chronostring
This commit is contained in:
parent
b72387fa5e
commit
355c56243e
@ -12,6 +12,7 @@ Parmi les outils et ressources sur lesquelles s'appuie l'agenda culturel, on peu
|
|||||||
- [Selenium](https://www.selenium.dev/)
|
- [Selenium](https://www.selenium.dev/)
|
||||||
- [Feather icons](https://feathericons.com/)
|
- [Feather icons](https://feathericons.com/)
|
||||||
- [Pico CSS](https://picocss.com/)
|
- [Pico CSS](https://picocss.com/)
|
||||||
|
- [chronostring](https://forge.chapril.org/jmtrivial/chronostring) (des mêmes auteurs)
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
from datetime import date
|
from datetime import date, datetime
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
from chronostring import parse_dates
|
||||||
|
import re
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from ..extractor import Extractor
|
|
||||||
from ..twosteps_extractor import TwoStepsExtractorNoPause
|
from ..twosteps_extractor import TwoStepsExtractorNoPause
|
||||||
|
|
||||||
|
|
||||||
@ -53,33 +54,6 @@ class CExtractor(TwoStepsExtractorNoPause):
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# this method is not perfect, but dates and hours are not structured
|
|
||||||
def parse_dates(self, date):
|
|
||||||
dl = date.replace(" à ", "\n").split("\n")
|
|
||||||
result = []
|
|
||||||
|
|
||||||
for d in dl:
|
|
||||||
# only lines with a digit
|
|
||||||
if sum(c.isdigit() for c in d) != 0:
|
|
||||||
# split subparts
|
|
||||||
for d2 in d.replace(" et ", ", ").split(", "):
|
|
||||||
d2 = d2.strip()
|
|
||||||
dd = Extractor.parse_french_date(
|
|
||||||
d2, default_year_by_proximity=self.today
|
|
||||||
)
|
|
||||||
if dd is None:
|
|
||||||
hh = Extractor.parse_french_time(d2)
|
|
||||||
for i, r in enumerate(result):
|
|
||||||
result[i][1].append(hh)
|
|
||||||
else:
|
|
||||||
result.append([dd, []])
|
|
||||||
|
|
||||||
if "De" in date and " à " in date:
|
|
||||||
for i, r in enumerate(result):
|
|
||||||
result[i].append(True)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def build_event_url_list(self, content, infuture_days=180):
|
def build_event_url_list(self, content, infuture_days=180):
|
||||||
soup = BeautifulSoup(content, "html.parser")
|
soup = BeautifulSoup(content, "html.parser")
|
||||||
links = soup.select(".cell a.evenement")
|
links = soup.select(".cell a.evenement")
|
||||||
@ -118,104 +92,64 @@ class CExtractor(TwoStepsExtractorNoPause):
|
|||||||
soup.select_one(".champ.taxo-age").text
|
soup.select_one(".champ.taxo-age").text
|
||||||
category = self.parse_category(soup.select_one(".champ.categorie").text)
|
category = self.parse_category(soup.select_one(".champ.categorie").text)
|
||||||
|
|
||||||
date = soup.select_one(".champ.date-libre").text
|
dt = soup.select_one(".champ.date-libre").text
|
||||||
|
|
||||||
description = "\n\n".join(
|
description = "\n\n".join(
|
||||||
[x for x in [soustitre, description, date, infos] if x is not None]
|
[x for x in [soustitre, description, dt, infos] if x is not None]
|
||||||
)
|
)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
" au " in date
|
" au " in dt
|
||||||
or date.startswith("Du")
|
or dt.startswith("Du")
|
||||||
or date.lower().strip() == "en continu"
|
or dt.lower().strip() == "en continu"
|
||||||
or date.startswith("Les")
|
or dt.startswith("Les")
|
||||||
):
|
):
|
||||||
return
|
return
|
||||||
|
|
||||||
dates = self.parse_dates(date)
|
dates = []
|
||||||
|
for dl in dt.split("\n"):
|
||||||
|
if re.match(r".* ans[ ]*:.*", dt):
|
||||||
|
dates += parse_dates(dt.split(":")[1])
|
||||||
|
else:
|
||||||
|
dates += parse_dates(dt)
|
||||||
|
|
||||||
for d in dates:
|
for d in dates:
|
||||||
if len(d) >= 2:
|
start_day = None
|
||||||
start_day = d[0]
|
start_time = None
|
||||||
|
end_day = None
|
||||||
|
end_time = None
|
||||||
|
if isinstance(d, datetime):
|
||||||
|
start_day = d.date()
|
||||||
|
start_time = d.time()
|
||||||
|
elif isinstance(d, date):
|
||||||
|
start_day = d
|
||||||
|
elif isinstance(d, list) and len(d) == 2:
|
||||||
|
start_day = d[0].date()
|
||||||
|
start_time = d[0].time()
|
||||||
|
end_day = d[1].date()
|
||||||
|
end_time = d[1].time()
|
||||||
|
|
||||||
if len(d) == 3 and len(d[1]) == 2:
|
if start_day is not None:
|
||||||
start_time = d[1][0]
|
uuid = event_url + "?date=" + str(start_day)
|
||||||
end_time = d[1][1]
|
if start_time is not None:
|
||||||
uuid = (
|
uuid = uuid + "&hour=" + str(start_time)
|
||||||
event_url
|
|
||||||
+ "?date="
|
self.add_event_with_props(
|
||||||
+ str(start_day)
|
default_values,
|
||||||
+ "&hour="
|
event_url,
|
||||||
+ str(start_time)
|
title,
|
||||||
)
|
category,
|
||||||
self.add_event_with_props(
|
start_day,
|
||||||
default_values,
|
location,
|
||||||
event_url,
|
description,
|
||||||
title,
|
[],
|
||||||
category,
|
recurrences=None,
|
||||||
start_day,
|
uuids=[uuid],
|
||||||
location,
|
url_human=event_url,
|
||||||
description,
|
start_time=start_time,
|
||||||
[],
|
end_day=end_day,
|
||||||
recurrences=None,
|
end_time=end_time,
|
||||||
uuids=[uuid],
|
published=published,
|
||||||
url_human=event_url,
|
image=image,
|
||||||
start_time=start_time,
|
image_alt=image_alt,
|
||||||
end_day=start_day,
|
)
|
||||||
end_time=end_time,
|
|
||||||
published=published,
|
|
||||||
image=image,
|
|
||||||
image_alt=image_alt,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
end_time = None
|
|
||||||
if len(d[1]) == 0:
|
|
||||||
start_time = None
|
|
||||||
uuid = event_url + "?date=" + str(start_day)
|
|
||||||
self.add_event_with_props(
|
|
||||||
default_values,
|
|
||||||
event_url,
|
|
||||||
title,
|
|
||||||
category,
|
|
||||||
start_day,
|
|
||||||
location,
|
|
||||||
description,
|
|
||||||
[],
|
|
||||||
recurrences=None,
|
|
||||||
uuids=[uuid],
|
|
||||||
url_human=event_url,
|
|
||||||
start_time=start_time,
|
|
||||||
end_day=start_day,
|
|
||||||
end_time=end_time,
|
|
||||||
published=published,
|
|
||||||
image=image,
|
|
||||||
image_alt=image_alt,
|
|
||||||
)
|
|
||||||
for t in d[1]:
|
|
||||||
start_time = t
|
|
||||||
uuid = (
|
|
||||||
event_url
|
|
||||||
+ "?date="
|
|
||||||
+ str(start_day)
|
|
||||||
+ "&hour="
|
|
||||||
+ str(start_time)
|
|
||||||
)
|
|
||||||
self.add_event_with_props(
|
|
||||||
default_values,
|
|
||||||
event_url,
|
|
||||||
title,
|
|
||||||
category,
|
|
||||||
start_day,
|
|
||||||
location,
|
|
||||||
description,
|
|
||||||
[],
|
|
||||||
recurrences=None,
|
|
||||||
uuids=[uuid],
|
|
||||||
url_human=event_url,
|
|
||||||
start_time=start_time,
|
|
||||||
end_day=start_day,
|
|
||||||
end_time=end_time,
|
|
||||||
published=published,
|
|
||||||
image=image,
|
|
||||||
image_alt=image_alt,
|
|
||||||
)
|
|
||||||
|
@ -51,3 +51,4 @@ django-cleanup==9.0.0
|
|||||||
django-unused-media==0.2.2
|
django-unused-media==0.2.2
|
||||||
django-resized==1.0.3
|
django-resized==1.0.3
|
||||||
django-solo==2.4.0
|
django-solo==2.4.0
|
||||||
|
chronostring==0.1.2
|
||||||
|
Loading…
x
Reference in New Issue
Block a user