Amélioration de la détection des catégories (avec accents)

This commit is contained in:
Jean-Marie Favreau 2024-10-19 18:51:41 +02:00
parent 6704d30ef1
commit 97be0db3d1
2 changed files with 9 additions and 7 deletions

View File

@ -66,16 +66,18 @@ class CExtractor(TwoStepsExtractorNoPause):
description = None description = None
category = soup.select_one(".event_category").text category = soup.select_one(".event_category").text
first_cat = Extractor.remove_accents(category.split(",")[0].lower())
print(first_cat)
tags = [] tags = []
if category in ["Grand Spectacle"]: if first_cat in ["grand spectacle"]:
category = "Danse" category = "Danse"
elif category in ["Théâtre"]: elif first_cat in ["theatre", "humour / one man show"]:
category = "Théâtre" category = "Theatre"
elif category in ["Chanson française", "Musique du monde", "Pop / Rock", "Rap, RnB", "Raggae", "Variété"]: elif first_cat in ["chanson francaise", "musique du monde", "pop / rock", "rap", "rnb", "raggae", "variete"]:
category = "Concert" category = "Concert"
elif category in ["Comédie Musicale", "Humour / One Man Show", "Spectacle équestre"]: elif first_cat in ["comedie musicale", "humour / one man show", "spectacle equestre"]:
category = "Art du spectacle" category = "Art du spectacle"
elif category in ["Spectacle pour enfant"]: elif first_cat in ["spectacle pour enfant"]:
tags = ["jeune public"] tags = ["jeune public"]
category = None category = None
else: else:

View File

@ -738,7 +738,7 @@ class Event(models.Model):
if "category" in event_structure and event_structure["category"] is not None: if "category" in event_structure and event_structure["category"] is not None:
try: try:
event_structure["category"] = Category.objects.get( event_structure["category"] = Category.objects.get(
name=event_structure["category"] name__unaccent__icontains=remove_accents(event_structure["category"].lower())
) )
except Category.DoesNotExist: except Category.DoesNotExist:
event_structure["category"] = Category.get_default_category() event_structure["category"] = Category.get_default_category()