diff --git a/experimentations/get_ical_events.py b/experimentations/get_ical_events.py index 1ec6129..741ffd7 100755 --- a/experimentations/get_ical_events.py +++ b/experimentations/get_ical_events.py @@ -12,6 +12,7 @@ from selenium.webdriver.chrome.options import Options import icalendar from datetime import datetime, date import json +from bs4 import BeautifulSoup @@ -76,7 +77,7 @@ class Extractor(ABC): def clear_events(self): self.events = [] - def add_event(self, title, category, start_day, location, description, tags, url=None, url_human=None, start_time=None, end_day=None, end_time=None, last_modified=None, published=False): + def add_event(self, title, category, start_day, location, description, tags, uuid, url_human=None, start_time=None, end_day=None, end_time=None, last_modified=None, published=False): if title is None: print("ERROR: cannot import an event without name") return @@ -88,13 +89,12 @@ class Extractor(ABC): "title": title, "category": category, "start_day": start_day, + "uuid": uuid, "location": location, - "descritpion": description, + "description": description, "tags": tags, "published": published } - if url is not None: - event["url"] = url if url_human is not None: event["url_human"] = url_human if start_time is not None: @@ -167,6 +167,12 @@ class ICALExtractor(Extractor): location = self.default_value_if_exists(default_values, "location") description = self.get_item_from_vevent(event, "DESCRIPTION") + if description is not None: + soup = BeautifulSoup(description) + delimiter = '\n' + for line_break in soup.findAll('br'): + line_break.replaceWith(delimiter) + description = soup.get_text() last_modified = self.get_item_from_vevent(event, "LAST_MODIFIED") @@ -183,7 +189,7 @@ class ICALExtractor(Extractor): if rrule is not None: print("Recurrent event not yet supported", rrule) - self.add_event(title, category, start_day, location, description, tags, url=event_url, url_human=url_human, start_time=start_time, end_day=end_day, end_time=end_time, last_modified=last_modified, published=published) + self.add_event(title, category, start_day, location, description, tags, uuid=event_url, url_human=url_human, start_time=start_time, end_day=end_day, end_time=end_time, last_modified=last_modified, published=published) return self.get_structure() diff --git a/src/agenda_culturel/admin.py b/src/agenda_culturel/admin.py index 810ac8f..fce2f6d 100644 --- a/src/agenda_culturel/admin.py +++ b/src/agenda_culturel/admin.py @@ -1,6 +1,6 @@ from django.contrib import admin from django import forms -from .models import Event, Category, StaticContent +from .models import Event, Category, StaticContent, DuplicatedEvents, BatchImportation from django_better_admin_arrayfield.admin.mixins import DynamicArrayMixin from django_better_admin_arrayfield.forms.widgets import DynamicArrayWidget from django_better_admin_arrayfield.models.fields import DynamicArrayField @@ -8,6 +8,8 @@ from django_better_admin_arrayfield.models.fields import DynamicArrayField admin.site.register(Category) admin.site.register(StaticContent) +admin.site.register(DuplicatedEvents) +admin.site.register(BatchImportation) class URLWidget(DynamicArrayWidget): diff --git a/src/agenda_culturel/celery.py b/src/agenda_culturel/celery.py index 247bf52..cb130c2 100644 --- a/src/agenda_culturel/celery.py +++ b/src/agenda_culturel/celery.py @@ -25,12 +25,17 @@ app.config_from_object("django.conf:settings", namespace="CELERY") # Load task modules from all registered Django apps. app.autodiscover_tasks() -def close_import_task(taskid, success, error_message): +def close_import_task(taskid, success, error_message, importer): from agenda_culturel.models import BatchImportation task = BatchImportation.objects.get(celery_id=taskid) task.status = BatchImportation.STATUS.SUCCESS if success else BatchImportation.STATUS.FAILED - fields = ["status"] + task.nb_initial = importer.get_nb_events() + task.nb_imported = importer.get_nb_imported_events() + task.nb_updated = importer.get_nb_updated_events() + task.nb_removed = importer.get_nb_removed_events() + + fields = ["status", "nb_initial", "nb_updated", "nb_imported", "nb_removed"] if not success: task.error_message = error_message fields.append("error_message") @@ -46,10 +51,14 @@ def import_events_from_json(self, json): importer = EventsImporter(self.request.id) - success, error_message = importer.import_events(json) + try: + success, error_message = importer.import_events(json) - # finally, close task - close_import_task(self.request.id, success, error_message) + # finally, close task + close_import_task(self.request.id, success, error_message, importer) + except Exception as e: + logger.error(e) + close_import_task(self.request.id, False, e, importer) @app.task(bind=True) diff --git a/src/agenda_culturel/forms.py b/src/agenda_culturel/forms.py index ab27976..a72d36d 100644 --- a/src/agenda_culturel/forms.py +++ b/src/agenda_culturel/forms.py @@ -1,8 +1,8 @@ -from django.forms import ModelForm, ValidationError, TextInput, Form, URLField, MultipleHiddenInput +from django.forms import ModelForm, ValidationError, TextInput, Form, URLField, MultipleHiddenInput, Textarea, CharField from datetime import date -from .models import Event +from .models import Event, BatchImportation from django.utils.translation import gettext_lazy as _ class EventSubmissionForm(Form): @@ -13,7 +13,7 @@ class EventForm(ModelForm): class Meta: model = Event - fields = '__all__' + exclude = ["possibly_duplicated"] widgets = { 'start_day': TextInput(attrs={'type': 'date', 'onchange': 'update_datetimes(event);', "onfocus": "this.oldvalue = this.value;"}), 'start_time': TextInput(attrs={'type': 'time', 'onchange': 'update_datetimes(event);', "onfocus": "this.oldvalue = this.value;"}), @@ -54,3 +54,25 @@ class EventForm(ModelForm): return end_time + + +class BatchImportationForm(ModelForm): + + json = CharField(label="JSON (facultatif)", widget=Textarea(attrs={"rows":"10"}), help_text=_("JSON in the format expected for the import. If the JSON is provided here, we will ignore the URLs given above, and use the information provided by the json without importing any additional events from the URL."), required=False) + + class Meta: + model = BatchImportation + fields = ['source', 'browsable_url'] + + def clean(self): + cleaned_data = super().clean() + json = cleaned_data.get("json") + source = cleaned_data.get("source") + browsable_url = cleaned_data.get("browsable_url") + + if (not json or json == "") and (not source or source == "") and (not browsable_url or browsable_url == ""): + raise ValidationError(_("You need to fill in either the json or the source possibly supplemented by the navigable URL.")) + + # Always return a value to use as the new cleaned data, even if + # this method didn't change it. + return cleaned_data diff --git a/src/agenda_culturel/importation.py b/src/agenda_culturel/importation.py index d4da385..2760438 100644 --- a/src/agenda_culturel/importation.py +++ b/src/agenda_culturel/importation.py @@ -1,14 +1,37 @@ from agenda_culturel.models import Event import json +from datetime import datetime class EventsImporter: def __init__(self, celery_id): self.celery_id = celery_id + self.error_message = "" + self.init_result_properties() + def init_result_properties(self): + self.event_objects = [] + self.db_event_objects = [] + self.nb_updated = 0 + self.nb_removed = 0 + self.date = None + self.url = None + + def get_nb_events(self): + return len(self.event_objects) + + def get_nb_imported_events(self): + return len(self.db_event_objects) + + def get_nb_updated_events(self): + return self.nb_updated + + def get_nb_removed_events(self): + return self.nb_removed def import_events(self, json_structure): + self.init_result_properties() try: structure = json.loads(json_structure) @@ -17,32 +40,53 @@ class EventsImporter: if not "header" in structure: return (False, "JSON is not correctly structured: missing header") + if not "events" in structure: + return (False, "JSON is not correctly structured: missing events") if "url" in structure["header"]: self.url = structure["header"]["url"] else: return (False, "JSON is not correctly structured: missing url in header") - if "url" in structure["header"]: + if "date" in structure["header"]: self.date = structure["header"]["date"] - # load events + # get events for event in structure["events"]: - self.import_event(event) + if "created_date" not in event: + if self.date is not None: + event["created_date"] = self.date + else: + event["created_date"] = datetime.now() - # update object with infos from header, and with the list of imported objects - # TODO - - # events that are missing from the import but in database are turned into drafts - # TODO + if not self.import_event(event): + return (False, self.error_message) + + # import them + self.save_imported() return (True, "") + def save_imported(self): + self.db_event_objects, self.nb_updated, self.nb_removed = Event.import_events(self.event_objects, remove_missing=True) + + def is_valid_event_structure(self, event): + if "title" not in event: + self.error_message = "JSON is not correctly structured: one event without title" + return False + if "start_day" not in event: + self.error_message = "JSON is not correctly structured: one event without start_day" + return False + return True + + def import_event(self, event): - # TODO - - pass - + if self.is_valid_event_structure(event): + event_obj = Event.from_structure(event) + self.event_objects.append(event_obj) + return True + else: + return False diff --git a/src/agenda_culturel/locale/fr/LC_MESSAGES/django.po b/src/agenda_culturel/locale/fr/LC_MESSAGES/django.po index 3ff3e29..ac3f050 100644 --- a/src/agenda_culturel/locale/fr/LC_MESSAGES/django.po +++ b/src/agenda_culturel/locale/fr/LC_MESSAGES/django.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: agenda_culturel\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2023-12-23 08:38+0000\n" +"POT-Creation-Date: 2023-12-29 15:35+0000\n" "PO-Revision-Date: 2023-10-29 14:16+0000\n" "Last-Translator: Jean-Marie Favreau \n" "Language-Team: Jean-Marie Favreau \n" @@ -17,261 +17,311 @@ msgstr "" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -#: agenda_culturel/forms.py:37 +#: agenda_culturel/forms.py:38 msgid "The end date must be after the start date." msgstr "La date de fin doit être après la date de début." -#: agenda_culturel/forms.py:52 +#: agenda_culturel/forms.py:53 msgid "The end time cannot be earlier than the start time." msgstr "L'heure de fin ne peut pas être avant l'heure de début." -#: agenda_culturel/models.py:23 agenda_culturel/models.py:52 -#: agenda_culturel/models.py:211 +#: agenda_culturel/forms.py:61 +msgid "" +"JSON in the format expected for the import. If the JSON is provided here, we " +"will ignore the URLs given above, and use the information provided by the " +"json without importing any additional events from the URL." +msgstr "" +"JSON au format attendu pour l'import. Si le JSON est fourni ici, on ignorera " +"les URL données au dessus, et on utilisera les informations fournies par le " +"json sans réaliser d'importation supplémentaire d'événements depuis l'URL." + +#: agenda_culturel/forms.py:74 +msgid "" +"You need to fill in either the json or the source possibly supplemented by " +"the navigable URL." +msgstr "" +"Vous devez renseigner soit le json soit la source éventuellement complétée " +"de l'URL navigable." + +#: agenda_culturel/models.py:26 agenda_culturel/models.py:55 +#: agenda_culturel/models.py:465 msgid "Name" msgstr "Nom" -#: agenda_culturel/models.py:23 agenda_culturel/models.py:52 +#: agenda_culturel/models.py:26 agenda_culturel/models.py:55 msgid "Category name" msgstr "Nom de la catégorie" -#: agenda_culturel/models.py:24 +#: agenda_culturel/models.py:27 msgid "Content" msgstr "Contenu" -#: agenda_culturel/models.py:24 +#: agenda_culturel/models.py:27 msgid "Text as shown to the visitors" msgstr "Text tel que présenté aux visiteureuses" -#: agenda_culturel/models.py:25 +#: agenda_culturel/models.py:28 msgid "URL path" msgstr "" -#: agenda_culturel/models.py:25 +#: agenda_culturel/models.py:28 msgid "URL path where the content is included." msgstr "" -#: agenda_culturel/models.py:53 +#: agenda_culturel/models.py:56 msgid "Alternative Name" msgstr "Nom alternatif" -#: agenda_culturel/models.py:53 +#: agenda_culturel/models.py:56 msgid "Alternative name used with a time period" msgstr "Nom alternatif utilisé avec une période de temps" -#: agenda_culturel/models.py:54 +#: agenda_culturel/models.py:57 msgid "Short name" msgstr "Nom court" -#: agenda_culturel/models.py:54 +#: agenda_culturel/models.py:57 msgid "Short name of the category" msgstr "Nom court de la catégorie" -#: agenda_culturel/models.py:55 +#: agenda_culturel/models.py:58 msgid "Color" msgstr "Couleur" -#: agenda_culturel/models.py:55 +#: agenda_culturel/models.py:58 msgid "Color used as background for the category" msgstr "Couleur utilisée comme fond de la catégorie" -#: agenda_culturel/models.py:92 agenda_culturel/models.py:109 +#: agenda_culturel/models.py:95 agenda_culturel/models.py:142 msgid "Category" msgstr "Catégorie" -#: agenda_culturel/models.py:93 +#: agenda_culturel/models.py:96 msgid "Categories" msgstr "Catégories" -#: agenda_culturel/models.py:98 +#: agenda_culturel/models.py:130 msgid "Published" msgstr "Publié" -#: agenda_culturel/models.py:99 +#: agenda_culturel/models.py:131 msgid "Draft" msgstr "Brouillon" -#: agenda_culturel/models.py:100 +#: agenda_culturel/models.py:132 msgid "Trash" msgstr "Corbeille" -#: agenda_culturel/models.py:105 +#: agenda_culturel/models.py:138 msgid "Title" msgstr "Titre" -#: agenda_culturel/models.py:105 +#: agenda_culturel/models.py:138 msgid "Short title" msgstr "Titre court" -#: agenda_culturel/models.py:107 agenda_culturel/models.py:238 +#: agenda_culturel/models.py:140 agenda_culturel/models.py:492 msgid "Status" msgstr "Status" -#: agenda_culturel/models.py:109 +#: agenda_culturel/models.py:142 msgid "Category of the event" msgstr "Catégorie de l'événement" -#: agenda_culturel/models.py:111 +#: agenda_culturel/models.py:144 msgid "Day of the event" msgstr "Date de l'événement" -#: agenda_culturel/models.py:112 +#: agenda_culturel/models.py:145 msgid "Starting time" msgstr "Heure de début" -#: agenda_culturel/models.py:114 +#: agenda_culturel/models.py:147 msgid "End day of the event" msgstr "Fin de l'événement" -#: agenda_culturel/models.py:114 +#: agenda_culturel/models.py:147 msgid "End day of the event, only required if different from the start day." msgstr "" "Date de fin de l'événement, uniquement nécessaire s'il est différent du " "premier jour de l'événement" -#: agenda_culturel/models.py:115 +#: agenda_culturel/models.py:148 msgid "Final time" msgstr "Heure de fin" -#: agenda_culturel/models.py:117 +#: agenda_culturel/models.py:150 msgid "Location" msgstr "Localisation" -#: agenda_culturel/models.py:117 +#: agenda_culturel/models.py:150 msgid "Address of the event" msgstr "Adresse de l'événement" -#: agenda_culturel/models.py:119 +#: agenda_culturel/models.py:152 msgid "Description" msgstr "Description" -#: agenda_culturel/models.py:119 +#: agenda_culturel/models.py:152 msgid "General description of the event" msgstr "Description générale de l'événement" -#: agenda_culturel/models.py:121 +#: agenda_culturel/models.py:154 msgid "Illustration (local image)" msgstr "Illustration (image locale)" -#: agenda_culturel/models.py:121 +#: agenda_culturel/models.py:154 msgid "Illustration image stored in the agenda server" msgstr "Image d'illustration stockée sur le serveur de l'agenda" -#: agenda_culturel/models.py:123 +#: agenda_culturel/models.py:156 msgid "Illustration" msgstr "Illustration" -#: agenda_culturel/models.py:123 +#: agenda_culturel/models.py:156 msgid "URL of the illustration image" msgstr "URL de l'image illustrative" -#: agenda_culturel/models.py:124 +#: agenda_culturel/models.py:157 msgid "Illustration description" msgstr "Description de l'illustration" -#: agenda_culturel/models.py:124 +#: agenda_culturel/models.py:157 msgid "Alternative text used by screen readers for the image" msgstr "Texte alternatif utiliser par les lecteurs d'écrans pour l'image" -#: agenda_culturel/models.py:126 +#: agenda_culturel/models.py:159 +msgid "UUIDs" +msgstr "UUIDs" + +#: agenda_culturel/models.py:159 +msgid "UUIDs from import to detect duplicated entries." +msgstr "UUIDs utilisés pendant l'import pour détecter les entrées dupliquées" + +#: agenda_culturel/models.py:160 msgid "URLs" msgstr "URLs" -#: agenda_culturel/models.py:126 +#: agenda_culturel/models.py:160 msgid "List of all the urls where this event can be found." msgstr "Liste de toutes les urls où l'événement peut être trouvé." -#: agenda_culturel/models.py:128 +#: agenda_culturel/models.py:162 msgid "Tags" msgstr "Étiquettes" -#: agenda_culturel/models.py:128 +#: agenda_culturel/models.py:162 msgid "A list of tags that describe the event." msgstr "Une liste d'étiquettes décrivant l'événement" -#: agenda_culturel/models.py:158 +#: agenda_culturel/models.py:164 +msgid "Possibly duplicated" +msgstr "Possibles doublons" + +#: agenda_culturel/models.py:194 msgid "Event" msgstr "Événement" -#: agenda_culturel/models.py:159 +#: agenda_culturel/models.py:195 msgid "Events" msgstr "Événements" -#: agenda_culturel/models.py:210 +#: agenda_culturel/models.py:464 msgid "Subject" msgstr "Sujet" -#: agenda_culturel/models.py:210 +#: agenda_culturel/models.py:464 msgid "The subject of your message" msgstr "Sujet de votre message" -#: agenda_culturel/models.py:211 +#: agenda_culturel/models.py:465 msgid "Your name" msgstr "Votre nom" -#: agenda_culturel/models.py:212 +#: agenda_culturel/models.py:466 msgid "Email address" msgstr "Adresse email" -#: agenda_culturel/models.py:212 +#: agenda_culturel/models.py:466 msgid "Your email address" msgstr "Votre adresse email" -#: agenda_culturel/models.py:213 +#: agenda_culturel/models.py:467 msgid "Message" msgstr "Message" -#: agenda_culturel/models.py:213 +#: agenda_culturel/models.py:467 msgid "Your message" msgstr "Votre message" -#: agenda_culturel/models.py:217 agenda_culturel/views.py:341 +#: agenda_culturel/models.py:471 agenda_culturel/views.py:343 msgid "Closed" msgstr "Fermé" -#: agenda_culturel/models.py:217 +#: agenda_culturel/models.py:471 msgid "this message has been processed and no longer needs to be handled" msgstr "Ce message a été traité et ne nécessite plus d'être pris en charge" -#: agenda_culturel/models.py:218 +#: agenda_culturel/models.py:472 msgid "Comments" msgstr "Commentaires" -#: agenda_culturel/models.py:218 +#: agenda_culturel/models.py:472 msgid "Comments on the message from the moderation team" msgstr "Commentaires sur ce message par l'équipe de modération" -#: agenda_culturel/models.py:227 +#: agenda_culturel/models.py:481 msgid "Running" msgstr "" -#: agenda_culturel/models.py:228 +#: agenda_culturel/models.py:482 msgid "Canceled" msgstr "Annulé" -#: agenda_culturel/models.py:229 +#: agenda_culturel/models.py:483 msgid "Success" msgstr "Succès" -#: agenda_culturel/models.py:230 +#: agenda_culturel/models.py:484 msgid "Failed" msgstr "Erreur" -#: agenda_culturel/models.py:235 +#: agenda_culturel/models.py:489 msgid "Source" msgstr "Source" -#: agenda_culturel/models.py:235 +#: agenda_culturel/models.py:489 msgid "URL of the source document" msgstr "URL du document source" -#: agenda_culturel/models.py:236 +#: agenda_culturel/models.py:490 msgid "Browsable url" msgstr "URL navigable" -#: agenda_culturel/models.py:236 +#: agenda_culturel/models.py:490 msgid "URL of the corresponding document that will be shown to visitors." msgstr "URL correspondant au document et qui sera montrée aux visiteurs" +#: agenda_culturel/models.py:494 +msgid "Error message" +msgstr "Votre message" + +#: agenda_culturel/models.py:496 +msgid "Number of collected events" +msgstr "Nombre d'événements collectés" + +#: agenda_culturel/models.py:497 +msgid "Number of imported events" +msgstr "Nombre d'événements importés" + +#: agenda_culturel/models.py:498 +msgid "Number of updated events" +msgstr "Nombre d'événements mis à jour" + +#: agenda_culturel/models.py:499 +msgid "Number of removed events" +msgstr "Nombre d'événements supprimés" + #: agenda_culturel/settings/base.py:134 msgid "English" msgstr "anglais" @@ -280,27 +330,27 @@ msgstr "anglais" msgid "French" msgstr "français" -#: agenda_culturel/views.py:188 +#: agenda_culturel/views.py:190 msgid "The static content has been successfully updated." msgstr "Le contenu statique a été modifié avec succès." -#: agenda_culturel/views.py:194 +#: agenda_culturel/views.py:196 msgid "The event has been successfully modified." msgstr "L'événement a été modifié avec succès." -#: agenda_culturel/views.py:205 +#: agenda_culturel/views.py:207 msgid "The event has been successfully deleted." msgstr "L'événement a été supprimé avec succès" -#: agenda_culturel/views.py:222 +#: agenda_culturel/views.py:224 msgid "The status has been successfully modified." msgstr "Le status a été modifié avec succès." -#: agenda_culturel/views.py:244 +#: agenda_culturel/views.py:246 msgid "The event is saved." msgstr "L'événement est enregistré." -#: agenda_culturel/views.py:247 +#: agenda_culturel/views.py:249 msgid "" "The event has been submitted and will be published as soon as it has been " "validated by the moderation team." @@ -308,7 +358,7 @@ msgstr "" "L'événement a été soumis et sera publié dès qu'il aura été validé par " "l'équipe de modération." -#: agenda_culturel/views.py:277 +#: agenda_culturel/views.py:279 msgid "" "The event has been successfully extracted, and you can now submit it after " "modifying it if necessary." @@ -316,7 +366,7 @@ msgstr "" "L'événement a été extrait avec succès, vous pouvez maintenant le soumettre " "après l'avoir modifié au besoin." -#: agenda_culturel/views.py:281 +#: agenda_culturel/views.py:283 msgid "" "Unable to extract an event from the proposed URL. Please use the form below " "to submit the event." @@ -324,12 +374,12 @@ msgstr "" "Impossible d'extraire un événement depuis l'URL proposée. Veuillez utiliser " "le formulaire ci-dessous pour soumettre l'événement." -#: agenda_culturel/views.py:290 +#: agenda_culturel/views.py:292 msgid "This URL has already been submitted, and you can find the event below." msgstr "" "Cette URL a déjà été soumise, et vous trouverez l'événement ci-dessous." -#: agenda_culturel/views.py:294 +#: agenda_culturel/views.py:296 msgid "" "This URL has already been submitted, but has not been selected for " "publication by the moderation team." @@ -337,23 +387,23 @@ msgstr "" "Cette URL a déjà été soumise, mais n'a pas été retenue par l'équipe de " "modération pour la publication." -#: agenda_culturel/views.py:296 +#: agenda_culturel/views.py:298 msgid "This URL has already been submitted and is awaiting moderation." msgstr "Cette URL a déjà été soumise, et est en attente de modération" -#: agenda_culturel/views.py:318 +#: agenda_culturel/views.py:320 msgid "Your message has been sent successfully." msgstr "L'événement a été supprimé avec succès" -#: agenda_culturel/views.py:326 +#: agenda_culturel/views.py:328 msgid "The contact message properties has been successfully modified." msgstr "Les propriétés du message de contact ont été modifié avec succès." -#: agenda_culturel/views.py:341 +#: agenda_culturel/views.py:343 msgid "Open" msgstr "Ouvert" -#: agenda_culturel/views.py:381 +#: agenda_culturel/views.py:383 msgid "Search" msgstr "Rechercher" @@ -361,7 +411,7 @@ msgstr "Rechercher" msgid "The import has been run successfully." msgstr "L'import a été lancé avec succès" -#: agenda_culturel/views.py:507 +#: agenda_culturel/views.py:521 msgid "The import has been canceled." msgstr "L'import a été annulé" diff --git a/src/agenda_culturel/migrations/0016_batchimportation_nb_removed.py b/src/agenda_culturel/migrations/0016_batchimportation_nb_removed.py new file mode 100644 index 0000000..0fc171f --- /dev/null +++ b/src/agenda_culturel/migrations/0016_batchimportation_nb_removed.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.7 on 2023-12-23 12:51 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('agenda_culturel', '0015_event_uuids'), + ] + + operations = [ + migrations.AddField( + model_name='batchimportation', + name='nb_removed', + field=models.PositiveIntegerField(default=0, verbose_name='Number of removed events'), + ), + ] diff --git a/src/agenda_culturel/migrations/0017_batchimportation_nb_updated.py b/src/agenda_culturel/migrations/0017_batchimportation_nb_updated.py new file mode 100644 index 0000000..9fd4d2c --- /dev/null +++ b/src/agenda_culturel/migrations/0017_batchimportation_nb_updated.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.7 on 2023-12-23 13:01 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('agenda_culturel', '0016_batchimportation_nb_removed'), + ] + + operations = [ + migrations.AddField( + model_name='batchimportation', + name='nb_updated', + field=models.PositiveIntegerField(default=0, verbose_name='Number of updated events'), + ), + ] diff --git a/src/agenda_culturel/migrations/0018_event_imported_date_alter_event_created_date_and_more.py b/src/agenda_culturel/migrations/0018_event_imported_date_alter_event_created_date_and_more.py new file mode 100644 index 0000000..62a8ace --- /dev/null +++ b/src/agenda_culturel/migrations/0018_event_imported_date_alter_event_created_date_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 4.2.7 on 2023-12-23 13:31 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('agenda_culturel', '0017_batchimportation_nb_updated'), + ] + + operations = [ + migrations.AddField( + model_name='event', + name='imported_date', + field=models.DateTimeField(blank=True, null=True), + ), + migrations.AlterField( + model_name='event', + name='created_date', + field=models.DateTimeField(editable=False), + ), + migrations.AlterField( + model_name='event', + name='modified_date', + field=models.DateTimeField(blank=True, null=True), + ), + ] diff --git a/src/agenda_culturel/migrations/0019_duplicatedevents_event_possibly_duplicated.py b/src/agenda_culturel/migrations/0019_duplicatedevents_event_possibly_duplicated.py new file mode 100644 index 0000000..46004d2 --- /dev/null +++ b/src/agenda_culturel/migrations/0019_duplicatedevents_event_possibly_duplicated.py @@ -0,0 +1,25 @@ +# Generated by Django 4.2.7 on 2023-12-29 11:44 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('agenda_culturel', '0018_event_imported_date_alter_event_created_date_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='DuplicatedEvents', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ], + ), + migrations.AddField( + model_name='event', + name='possibly_duplicated', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='agenda_culturel.duplicatedevents'), + ), + ] diff --git a/src/agenda_culturel/migrations/0020_trigram_extension.py b/src/agenda_culturel/migrations/0020_trigram_extension.py new file mode 100644 index 0000000..5b72be3 --- /dev/null +++ b/src/agenda_culturel/migrations/0020_trigram_extension.py @@ -0,0 +1,11 @@ +from django.db import migrations +from django.contrib.postgres.operations import TrigramExtension + + +class Migration(migrations.Migration): + + dependencies = [ + ('agenda_culturel', '0019_duplicatedevents_event_possibly_duplicated'), + ] + + operations = [TrigramExtension()] diff --git a/src/agenda_culturel/models.py b/src/agenda_culturel/models.py index 933e4ff..97d61e9 100644 --- a/src/agenda_culturel/models.py +++ b/src/agenda_culturel/models.py @@ -9,6 +9,9 @@ from urllib.parse import urlparse import urllib.request import os from django.core.files import File +from django.utils import timezone +from django.contrib.postgres.search import TrigramSimilarity +from django.db.models import Q from django.template.defaultfilters import date as _date @@ -92,6 +95,35 @@ class Category(models.Model): verbose_name = _('Category') verbose_name_plural = _('Categories') + +class DuplicatedEvents(models.Model): + + def nb_duplicated(self): + return Event.objects.filter(possibly_duplicated=self).count() + + def merge_into(self, other): + # for all objects associated to this group + for e in Event.objects.filter(possibly_duplicated=self): + # change their group membership + e.possibly_duplicated = other + # save them + e.save() + # then delete the empty group + self.delete() + + def merge_groups(groups): + if len(groups) == 0: + return None + elif len(groups) == 1: + return groups[0] + else: + result = groups[0] + for g in groups[1:]: + g.merge_into(result) + return result + + + class Event(models.Model): class STATUS(models.TextChoices): @@ -99,8 +131,9 @@ class Event(models.Model): DRAFT = "draft", _("Draft") TRASH = "trash", _("Trash") - created_date = models.DateTimeField(auto_now_add=True) - modified_date = models.DateTimeField(auto_now=True) + created_date = models.DateTimeField(editable=False) + imported_date = models.DateTimeField(blank=True, null=True) + modified_date = models.DateTimeField(blank=True, null=True) title = models.CharField(verbose_name=_('Title'), help_text=_('Short title'), max_length=512) @@ -128,6 +161,8 @@ class Event(models.Model): tags = ArrayField(models.CharField(max_length=64), verbose_name=_('Tags'), help_text=_("A list of tags that describe the event."), blank=True, null=True) + possibly_duplicated = models.ForeignKey(DuplicatedEvents, verbose_name=_('Possibly duplicated'), on_delete=models.SET_NULL, null=True, blank=True) + def get_consolidated_end_day(self, intuitive=True): if intuitive: end_day = self.get_consolidated_end_day(False) @@ -180,30 +215,248 @@ class Event(models.Model): return self.status == Event.STATUS.TRASH def modified(self): - return abs((self.modified_date - self.created_date).total_seconds()) > 1 + return self.modified_date is None or abs((self.modified_date - self.created_date).total_seconds()) > 1 def nb_draft_events(): return Event.objects.filter(status=Event.STATUS.DRAFT).count() - def save(self, *args, **kwargs): + + def download_image(self): + # first download file + + a = urlparse(self.image) + basename = os.path.basename(a.path) + + try: + tmpfile, _ = urllib.request.urlretrieve(self.image) + except: + return None + + # if the download is ok, then create the corresponding file object + self.local_image = File(name=basename, file=open(tmpfile, "rb")) + + + def update_dates(self): + now = timezone.now() + if not self.id: + self.created_date = now + if hasattr(self, "require_imported_date"): + self.imported_date = now + self.modified_date = now + + + def prepare_save(self): + self.update_dates() + # if the image is defined but not locally downloaded if self.image and not self.local_image: - # first download file - - a = urlparse(self.image) - basename = os.path.basename(a.path) + self.download_image() - try: - tmpfile, _ = urllib.request.urlretrieve(self.image) - except: - return None - # if the download is ok, then create the corresponding file object - self.local_image = File(name=basename, file=open(tmpfile, "rb")) + def save(self, *args, **kwargs): + + self.prepare_save() + + # delete duplicated group if it's only with one element + if self.possibly_duplicated is not None and self.possibly_duplicated.nb_duplicated() == 1: + self.possibly_duplicated.delete() + self.possibly_duplicated = None + super().save(*args, **kwargs) + def from_structure(event_structure): + if "category" in event_structure and event_structure["category"] is not None: + event_structure["category"] = Category.objects.get(name=event_structure["category"]) + + if "uuid" in event_structure and event_structure["uuid"] is not None: + event_structure["uuids"] = [event_structure["uuid"]] + del event_structure["uuid"] + + if "published" in event_structure and event_structure["published"] is not None: + event_structure["status"] = Event.STATUS.PUBLISHED + del event_structure["published"] + else: + event_structure["status"] = Event.STATUS.DRAFT + + if "url_human" in event_structure and event_structure["url_human"] is not None: + event_structure["reference_urls"] = [event_structure["url_human"]] + del event_structure["url_human"] + + if "last_modified" in event_structure and event_structure["last_modified"] is not None: + event_structure["created_date"] = event_structure["last_modified"] + del event_structure["last_modified"] + else: + event_structure["created_date"] = timezone.now() + + if "start_time" in event_structure: + event_structure["start_time"] = time.fromisoformat(event_structure["start_time"]) + + if "end_time" in event_structure: + event_structure["end_time"] = time.fromisoformat(event_structure["end_time"]) + + if "description" in event_structure and event_structure["description"] is None: + event_structure["description"] = "" + + return Event(**event_structure) + + + def find_similar_events(self): + start_time_test = Q(start_time=self.start_time) + + if self.start_time is not None: + # convert str start_time to time + if isinstance(self.start_time, str): + self.start_time = time.fromisoformat(self.start_time) + interval = (time(self.start_time.hour - 1, self.start_time.minute) if self.start_time.hour >= 1 else time(0, 0), + time(self.start_time.hour + 1, self.start_time.minute) if self.start_time.hour < 23 else time(23, 59)) + start_time_test = start_time_test | Q(start_time__range=interval) + + return Event.objects.annotate(similarity_title=TrigramSimilarity("title", self.title)). \ + annotate(similarity_location=TrigramSimilarity("location", self.location)). \ + filter(Q(start_day=self.start_day) & start_time_test & Q(similarity_title__gt=0.5) & Q(similarity_title__gt=0.3)) + + + def find_same_events_by_uuid(self): + return None if self.uuids is None or len(self.uuids) == 0 else Event.objects.filter(uuids__contains=self.uuids) + + + def get_possibly_duplicated(self): + if self.possibly_duplicated is None: + return [] + else: + return Event.objects.filter(possibly_duplicated=self.possibly_duplicated).exclude(pk=self.pk) + + + def set_possibly_duplicated(self, events): + # get existing groups + groups = list(set([e.possibly_duplicated for e in events] + [self.possibly_duplicated])) + groups = [g for g in groups if g is not None] + + # do we have to create a new group? + if len(groups) == 0: + group = DuplicatedEvents.objects.create() + logger.warning("set possibily duplicated 0 {}".format(group)) + else: + # otherwise merge existing groups + group = DuplicatedEvents.merge_groups(groups) + logger.warning("set possibily duplicated not 0 {}".format(group)) + group.save() + + # set the possibly duplicated group for the current object + self.possibly_duplicated = group + + # and for the other events + for e in events: + e.possibly_duplicated = group + # finally save the other events + Event.objects.bulk_update(events, fields=["possibly_duplicated"]) + + + def data_fields(): + return ["title", "location", "start_day", "start_time", "end_day", "end_time", "description", "image", "image_alt", "image_alt", "reference_urls"] + + def same_event_by_data(self, other): + for attr in Event.data_fields(): + if str(getattr(self, attr)) != str(getattr(other, attr)): + logger.warning("on trouve une différence dans {}: {} vs {}".format(attr, getattr(self, attr), getattr(other, attr))) + return False + return True + + def find_same_event_by_data_in_list(self, events): + return [e for e in events if self.same_event_by_data(e)] + + + def find_last_imported_not_modified(events): + events = [e for e in events if e.imported_date is not None and (e.modified_date is None or e.modified_date <= e.imported_date)] + if len(events) == 0: + return None + else: + events.sort(key=lambda e: e.imported_date, reverse=True) + return events[0] + + + def update(self, other): + # TODO: what about category, tags? + # set attributes + for attr in Event.data_fields(): + setattr(self, attr, getattr(other, attr)) + # add a possible missing uuid + if self.uuids is None: + self.uuids = [] + for uuid in other.uuids: + if not uuid in self.uuids: + self.uuids.append(uuid) + + + # Limitation: the given events should not be considered similar one to another... + def import_events(events, remove_missing=False): + to_import = [] + to_update = [] + + # for each event, check if it's a new one, or a one to be updated + for event in events: + # imported events should be updated + event.require_imported_date = True + event.prepare_save() + + # check if the event has already be imported (using uuid) + same_events = event.find_same_events_by_uuid() + + if len(same_events) != 0: + # check if one event has been imported and not modified in this list + same_imported = Event.find_last_imported_not_modified(same_events) + + if same_imported: + # if this event exists, it will be updated with new data + same_imported.update(event) + same_imported.require_imported_date = True + same_imported.prepare_save() + to_update.append(same_imported) + else: + # otherwise, the new event possibly a duplication of the others. + event.set_possibly_duplicated(same_events) + # it will be imported + to_import.append(event) + else: + # if uuid is unique (or not available), check for similar events + similar_events = event.find_similar_events() + + # if it exists similar events, add this relation to the event + if len(similar_events) != 0: + + # check if an event from the list is exactly the same as the new one (using data) + same_events = event.find_same_event_by_data_in_list(similar_events) + if same_events is not None and len(same_events) > 0: + # merge with the first one + same_events[0].update(event) + same_events[0].require_imported_date = True + same_events[0].prepare_save() + to_update.append(same_events[0]) + else: + # the event is possibly a duplication of the others + event.set_possibly_duplicated(similar_events) + to_import.append(event) + else: + # import this new event + to_import.append(event) + + # then import all the new events + imported = Event.objects.bulk_create(to_import) + nb_updated = Event.objects.bulk_update(to_update, fields = Event.data_fields() + ["imported_date", "modified_date", "uuids"]) + + nb_removed = 0 + if remove_missing: + # events that are missing from the import but in database are turned into drafts + # TODO + # TODO: ajouter self.source, ou faire référence à l'objet BatchImportation + pass + + return imported, nb_updated, nb_removed + + + class ContactMessage(models.Model): @@ -242,5 +495,7 @@ class BatchImportation(models.Model): nb_initial = models.PositiveIntegerField(verbose_name=_('Number of collected events'), default=0) nb_imported = models.PositiveIntegerField(verbose_name=_('Number of imported events'), default=0) + nb_updated = models.PositiveIntegerField(verbose_name=_('Number of updated events'), default=0) + nb_removed = models.PositiveIntegerField(verbose_name=_('Number of removed events'), default=0) celery_id = models.CharField(max_length=128, default="") diff --git a/src/agenda_culturel/settings/base.py b/src/agenda_culturel/settings/base.py index 18a0edb..e6e0e17 100644 --- a/src/agenda_culturel/settings/base.py +++ b/src/agenda_culturel/settings/base.py @@ -196,3 +196,6 @@ if os_getenv("EMAIL_BACKEND"): EMAIL_USE_TLS = os_getenv("EMAIL_USE_TLS", False) EMAIL_USE_SSL = os_getenv("EMAIL_USE_SSL", False) DEFAULT_FROM_EMAIL = os_getenv("DEFAULT_FROM_EMAIL") + +# increase upload size for debug experiments +DATA_UPLOAD_MAX_MEMORY_SIZE = 10 * 2621440 \ No newline at end of file diff --git a/src/agenda_culturel/templates/agenda_culturel/batchimportation_form.html b/src/agenda_culturel/templates/agenda_culturel/batchimportation_form.html index 5e8761c..903a165 100644 --- a/src/agenda_culturel/templates/agenda_culturel/batchimportation_form.html +++ b/src/agenda_culturel/templates/agenda_culturel/batchimportation_form.html @@ -11,12 +11,6 @@
{% csrf_token %} {{ form.as_p }} -

- - - JSON au format attendu pour l'import. Si le JSON est fourni ici, on ignorera les URL données au dessus, et on utilisera les informations fournies par le json sans réaliser d'importation supplémentaire d'événements depuis l'URL. -

-
diff --git a/src/agenda_culturel/templates/agenda_culturel/imports.html b/src/agenda_culturel/templates/agenda_culturel/imports.html index a894c2b..45dee0c 100644 --- a/src/agenda_culturel/templates/agenda_culturel/imports.html +++ b/src/agenda_culturel/templates/agenda_culturel/imports.html @@ -1,4 +1,4 @@ -!<{% extends "agenda_culturel/page.html" %} +{% extends "agenda_culturel/page.html" %} {% block title %}Importations par lot{% endblock %} @@ -19,10 +19,17 @@ - - - - + + + + + + + + + + + @@ -32,6 +39,10 @@ + + + + {% endfor %} diff --git a/src/agenda_culturel/templates/agenda_culturel/page-event.html b/src/agenda_culturel/templates/agenda_culturel/page-event.html index db80e83..4188fa7 100644 --- a/src/agenda_culturel/templates/agenda_culturel/page-event.html +++ b/src/agenda_culturel/templates/agenda_culturel/page-event.html @@ -16,6 +16,7 @@ {% include "agenda_culturel/single-event/event-single-inc.html" with event=event filter=filter %} diff --git a/src/agenda_culturel/views.py b/src/agenda_culturel/views.py index 4d24df3..37453ae 100644 --- a/src/agenda_culturel/views.py +++ b/src/agenda_culturel/views.py @@ -11,7 +11,7 @@ from django.http import HttpResponseRedirect from django.urls import reverse import urllib -from .forms import EventSubmissionForm, EventForm +from .forms import EventSubmissionForm, EventForm, BatchImportationForm from .models import Event, Category, StaticContent, ContactMessage, BatchImportation from django.utils import timezone @@ -484,21 +484,22 @@ def imports(request): return render(request, 'agenda_culturel/imports.html', {'paginator_filter': response} ) - class BatchImportationCreateView(SuccessMessageMixin, LoginRequiredMixin, CreateView): model = BatchImportation - fields = ['source', 'browsable_url'] success_url = reverse_lazy('imports') success_message = _('The import has been run successfully.') + form_class = BatchImportationForm + + def form_valid(self, form): # run import if "json" in form.data and form.data["json"] is not None and form.data["json"].strip() != "": result = import_events_from_json.delay(form.data["json"]) else: - result = import_events_from_url.delay(self.object.source, self.object.browsable_url) + result = import_events_from_url.delay(form.data["source"], form.data["browsable_url"]) # update the object with celery_id form.instance.celery_id = result.id
IdentifiantDateStatusActionIdentifiantDateStatusActionévénements
initialimportésmis à joursupprimés
{{ obj.created_date }} {{ obj.status }} {% if obj.status == "running" %}Annuler{% endif %}{% if obj.status == "success" %}{{ obj.nb_initial }}{% endif %}{% if obj.status == "success" %}{{ obj.nb_imported }}{% endif %}{% if obj.status == "success" %}{{ obj.nb_updated }}{% endif %}{% if obj.status == "success" %}{{ obj.nb_removed }}{% endif %}