diff --git a/experimentations/get_ical_events.py b/experimentations/get_ical_events.py index 741ffd7..ff83786 100755 --- a/experimentations/get_ical_events.py +++ b/experimentations/get_ical_events.py @@ -151,6 +151,7 @@ class ICALExtractor(Extractor): print("Extracting ical events from {}".format(url)) self.set_header(url) self.clear_events() + self.uuids = {} calendar = icalendar.Calendar.from_ical(content) @@ -179,6 +180,11 @@ class ICALExtractor(Extractor): uuid = self.get_item_from_vevent(event, "UID") if uuid is not None: + if uuid in self.uuids: + self.uuids[uuid] += 1 + uuid += ":{:04}".format(self.uuids[uuid] - 1) + else: + self.uuids[uuid] = 1 event_url = url + "#" + uuid tags = self.default_value_if_exists(default_values, "tags") diff --git a/src/agenda_culturel/forms.py b/src/agenda_culturel/forms.py index 210e2c7..260c4ee 100644 --- a/src/agenda_culturel/forms.py +++ b/src/agenda_culturel/forms.py @@ -20,6 +20,7 @@ class EventForm(ModelForm): 'end_day': TextInput(attrs={'type': 'date'}), 'end_time': TextInput(attrs={'type': 'time'}), 'uuids': MultipleHiddenInput(), + 'import_sources': MultipleHiddenInput(), } diff --git a/src/agenda_culturel/importation.py b/src/agenda_culturel/importation.py index 9678de9..fba2a9e 100644 --- a/src/agenda_culturel/importation.py +++ b/src/agenda_culturel/importation.py @@ -82,7 +82,7 @@ class EventsImporter: return event["end_day"] >= self.today def save_imported(self): - self.db_event_objects, self.nb_updated, self.nb_removed = Event.import_events(self.event_objects, remove_missing=True) + self.db_event_objects, self.nb_updated, self.nb_removed = Event.import_events(self.event_objects, remove_missing_from_source=self.url) def is_valid_event_structure(self, event): @@ -97,7 +97,7 @@ class EventsImporter: def load_event(self, event): if self.is_valid_event_structure(event): - event_obj = Event.from_structure(event) + event_obj = Event.from_structure(event, self.url) self.event_objects.append(event_obj) return True else: diff --git a/src/agenda_culturel/migrations/0022_event_import_sources.py b/src/agenda_culturel/migrations/0022_event_import_sources.py new file mode 100644 index 0000000..3d84f2b --- /dev/null +++ b/src/agenda_culturel/migrations/0022_event_import_sources.py @@ -0,0 +1,19 @@ +# Generated by Django 4.2.7 on 2023-12-31 20:06 + +from django.db import migrations, models +import django_better_admin_arrayfield.models.fields + + +class Migration(migrations.Migration): + + dependencies = [ + ('agenda_culturel', '0021_alter_event_possibly_duplicated'), + ] + + operations = [ + migrations.AddField( + model_name='event', + name='import_sources', + field=django_better_admin_arrayfield.models.fields.ArrayField(base_field=models.CharField(max_length=512), blank=True, help_text='Importation source used to detect removed entries.', null=True, size=None, verbose_name='Importation source'), + ), + ] diff --git a/src/agenda_culturel/models.py b/src/agenda_culturel/models.py index 77154f1..c71343e 100644 --- a/src/agenda_culturel/models.py +++ b/src/agenda_culturel/models.py @@ -15,7 +15,7 @@ from django.db.models import Q from django.template.defaultfilters import date as _date -from datetime import time, timedelta +from datetime import time, timedelta, date from django.utils.timezone import datetime from django.utils import timezone @@ -160,6 +160,7 @@ class Event(models.Model): image = models.URLField(verbose_name=_('Illustration'), help_text=_("URL of the illustration image"), max_length=1024, blank=True, null=True) image_alt = models.CharField(verbose_name=_('Illustration description'), help_text=_('Alternative text used by screen readers for the image'), blank=True, null=True, max_length=1024) + import_sources = ArrayField(models.CharField(max_length=512), verbose_name=_('Importation source'), help_text=_("Importation source used to detect removed entries."), blank=True, null=True) uuids = ArrayField(models.CharField(max_length=512), verbose_name=_('UUIDs'), help_text=_("UUIDs from import to detect duplicated entries."), blank=True, null=True) reference_urls = ArrayField(models.URLField(max_length=512), verbose_name=_('URLs'), help_text=_("List of all the urls where this event can be found."), blank=True, null=True) @@ -295,7 +296,7 @@ class Event(models.Model): super().save(*args, **kwargs) - def from_structure(event_structure): + def from_structure(event_structure, import_source = None): if "category" in event_structure and event_structure["category"] is not None: event_structure["category"] = Category.objects.get(name=event_structure["category"]) @@ -331,6 +332,9 @@ class Event(models.Model): if "description" in event_structure and event_structure["description"] is None: event_structure["description"] = "" + if import_source is not None: + event_structure["import_sources"] = [import_source] + return Event(**event_structure) @@ -369,11 +373,11 @@ class Event(models.Model): # do we have to create a new group? if len(groups) == 0: group = DuplicatedEvents.objects.create() - logger.warning("set possibily duplicated 0 {}".format(group)) + logger.warning("set possibly duplicated 0 {}".format(group)) else: # otherwise merge existing groups group = DuplicatedEvents.merge_groups(groups) - logger.warning("set possibily duplicated not 0 {}".format(group)) + logger.warning("set possibly duplicated not 0 {}".format(group)) group.save() # set the possibly duplicated group for the current object @@ -392,7 +396,6 @@ class Event(models.Model): def same_event_by_data(self, other): for attr in Event.data_fields(): if str(getattr(self, attr)) != str(getattr(other, attr)): - logger.warning("on trouve une différence dans {}: {} vs {}".format(attr, getattr(self, attr), getattr(other, attr))) return False return True @@ -414,6 +417,11 @@ class Event(models.Model): # set attributes for attr in Event.data_fields(): setattr(self, attr, getattr(other, attr)) + + # set status according to the input status + if other.status is not None: + self.status = other.status + # add a possible missing uuid if self.uuids is None: self.uuids = [] @@ -423,12 +431,27 @@ class Event(models.Model): # Limitation: the given events should not be considered similar one to another... - def import_events(events, remove_missing=False): + def import_events(events, remove_missing_from_source=None): to_import = [] to_update = [] + min_date = timezone.now().date() + max_date = None + uuids = set() + # for each event, check if it's a new one, or a one to be updated for event in events: + sdate = date.fromisoformat(event.start_day) + edate = date.fromisoformat(event.end_day) + if min_date is None or min_date > sdate: + min_date = sdate + if max_date is None or max_date < sdate: + max_date = sdate + if max_date is None or (event.end_day is not None and max_date < edate): + max_date = edate + if len(event.uuids) > 0: + uuids.add(event.uuids[0]) + # imported events should be updated event.set_in_importation_process() event.prepare_save() @@ -442,8 +465,7 @@ class Event(models.Model): if same_imported: # if this event exists, it will be updated with new data only if the data is fresher - logger.warning("{} vs {}".format(same_imported.modified_date, event.modified_date)) - if same_imported.modified_date < event.modified_date: + if same_imported.modified_date < event.modified_date or event.status != same_imported.status: same_imported.update(event) same_imported.set_in_importation_process() same_imported.prepare_save() @@ -478,17 +500,26 @@ class Event(models.Model): # then import all the new events imported = Event.objects.bulk_create(to_import) - nb_updated = Event.objects.bulk_update(to_update, fields = Event.data_fields() + ["imported_date", "modified_date", "uuids"]) + nb_updated = Event.objects.bulk_update(to_update, fields = Event.data_fields() + ["imported_date", "modified_date", "uuids", "status"]) - nb_removed = 0 - if remove_missing: + nb_draft = 0 + if remove_missing_from_source is not None: # events that are missing from the import but in database are turned into drafts # only if they are in the future - # TODO - # TODO: ajouter self.source, ou faire référence à l'objet BatchImportation - pass - return imported, nb_updated, nb_removed + in_interval = Event.objects.filter(((Q(end_day__isnull=True) & Q(start_day__gte=min_date) & Q(start_day__lte=max_date)) | + (Q(end_day__isnull=False) & ~(Q(start_day__gt=max_date) | Q(end_day__lt=min_date)))) & Q(import_sources__contains=[remove_missing_from_source]) & Q(status=Event.STATUS.PUBLISHED) & Q(uuids__len__gt=0)) + + to_draft = [] + for e in in_interval: + if len(uuids.intersection(e.uuids)) == 0: + e.status = Event.STATUS.TRASH + e.prepare_save() + to_draft.append(e) + + nb_draft = Event.objects.bulk_update(to_draft, fields = ["status"]) + + return imported, nb_updated, nb_draft diff --git a/src/agenda_culturel/templates/agenda_culturel/imports.html b/src/agenda_culturel/templates/agenda_culturel/imports.html index 45dee0c..eadb3cb 100644 --- a/src/agenda_culturel/templates/agenda_culturel/imports.html +++ b/src/agenda_culturel/templates/agenda_culturel/imports.html @@ -29,7 +29,7 @@