diff --git a/src/agenda_culturel/db_importer.py b/src/agenda_culturel/db_importer.py index 6dd3ed6..4b00652 100644 --- a/src/agenda_culturel/db_importer.py +++ b/src/agenda_culturel/db_importer.py @@ -1,10 +1,18 @@ import json import logging -from datetime import date +from datetime import date, datetime, time + +from django.db.models import Q +from django.contrib.auth.models import User +from .import_tasks.extractor import Extractor +from django.core.files.storage import default_storage +from django.utils.translation import gettext_lazy as _ +import recurrence +import recurrence.fields from django.utils import timezone -from agenda_culturel.models import Event +from agenda_culturel.models import Event, Message, Category, remove_accents logger = logging.getLogger(__name__) @@ -106,12 +114,228 @@ class DBImporterEvents: return event["end_day"] >= self.today def save_imported(self): - self.db_event_objects, self.nb_updated, self.nb_removed = Event.import_events( - self.event_objects, - remove_missing_from_source=self.url, - user_id=self.user_id, + self.db_event_objects, self.nb_updated, self.nb_removed = ( + DBImporterEvents.import_in_db( + self.event_objects, + remove_missing_from_source=self.url, + user_id=self.user_id, + ) ) + # Limitation: the given events should not be considered similar one to another... + def import_in_db(events, remove_missing_from_source=None, user_id=None): + user = None + if user_id: + user = User.objects.filter(pk=user_id).first() + + to_import = [] + to_update = [] + + min_date = timezone.now().date() + max_date = None + uuids = set() + + # for each event, check if it's a new one, or a one to be updated + for event in events: + sdate = date.fromisoformat(event.start_day) + if event.end_day: + edate = date.fromisoformat(event.end_day) + else: + edate = sdate + if min_date is None or min_date > sdate: + min_date = sdate + if max_date is None or max_date < sdate: + max_date = sdate + if max_date is None or (event.end_day is not None and max_date < edate): + max_date = edate + if event.uuids and len(event.uuids) > 0: + uuids |= set(event.uuids) + + # imported events should be updated + event.set_in_importation_process() + event.set_processing_user(user) + event.prepare_save() + + # check if the event has already be imported (using uuid) + same_events = event.find_same_events_by_uuid() + + if same_events is not None and len(same_events) != 0: + # check if one event has been imported and not modified in this list + same_imported = Event.find_last_pure_import(same_events) + pure = True + # if not, we check if it does not match exactly with another + if not same_imported: + for e in same_events: + if event.similar(e, False): + same_imported = e + pure = False + break + + if same_imported: + if not event.similar(same_imported, False): + # reopen DuplicatedEvents if required + if same_imported.other_versions: + if same_imported.status != Event.STATUS.TRASH: + if same_imported.other_versions.is_published(): + if ( + same_imported.other_versions.representative + != same_imported + ): + same_imported.other_versions.representative = ( + None + ) + same_imported.other_versions.save() + # add a message to explain the update + if not event.is_not_found_import(): + res = [ + r + for r in Event.get_comparison( + [event, same_imported], all + ) + if not r["similar"] + ] + if len(res) > 0: + txt = _("Updated field(s): ") + ", ".join( + [r["key"] for r in res] + ) + msg = Message( + subject=_("Update"), + name=_("update process"), + related_event=same_imported, + message=txt, + message_type=Message.TYPE.UPDATE_PROCESS, + ) + msg.save() + + new_image = same_imported.image != event.image + + # if the import process was not able to found any content, change the status as draft + if event.is_not_found_import(): + if same_imported.status == Event.STATUS.PUBLISHED: + same_imported.status = Event.STATUS.TRASH + else: + # we only update local information if it's a pure import and has no moderated_date + same_imported.update( + event, + pure and same_imported.moderated_date is None, + ) + # save messages + if event.has_message(): + for msg in event.get_messages(): + msg.related_event = same_imported + msg.save() + same_imported.set_in_importation_process() + same_imported.prepare_save() + # fix missing or updated files + if same_imported.local_image and ( + not default_storage.exists(same_imported.local_image.name) + or new_image + ): + same_imported.download_image() + same_imported.save( + update_fields=["local_image"], + noclean_other_versions=True, + ) + + to_update.append(same_imported) + else: + # otherwise, the new event possibly a duplication of the remaining others. + + # check if it should be published + trash = ( + len([e for e in same_events if e.status != Event.STATUS.TRASH]) + == 0 + ) + if trash: + event.status = Event.STATUS.TRASH + event.set_other_versions(same_events, force_non_fixed=not trash) + # it will be imported + to_import.append(event) + else: + # if uuid is unique (or not available), check for similar events + similar_events = event.find_similar_events() + + # if it exists similar events, add this relation to the event + if len(similar_events) != 0: + # the event is possibly a duplication of the others + event.set_other_versions(similar_events, force_non_fixed=True) + to_import.append(event) + else: + # import this new event + to_import.append(event) + + for e in to_import: + if e.is_event_long_duration(): + e.status = Event.STATUS.DRAFT + e.add_message( + Message( + subject=_("Import"), + name=_("import process"), + message=_( + "The duration of the event is a little too long for direct publication. Moderators can choose to publish it or not." + ), + message_type=Message.TYPE.IMPORT_PROCESS, + ) + ) + + # then import all the new events + imported = Event.objects.bulk_create(to_import) + # update organisers (m2m relation) + for i, ti in zip(imported, to_import): + if ti.has_pending_organisers() and ti.pending_organisers is not None: + i.organisers.set(ti.pending_organisers) + if ti.has_message(): + for msg in ti.get_messages(): + msg.related_event = i + msg.save() + + nb_updated = Event.objects.bulk_update( + to_update, + fields=Event.data_fields(no_m2m=True) + + [ + "imported_date", + "modified_date", + "uuids", + "status", + "import_sources", + ], + ) + + nb_draft = 0 + if remove_missing_from_source is not None and max_date is not None: + # events that are missing from the import but in database are turned into drafts + # only if they are in the future + + in_interval = Event.objects.filter( + ( + ( + Q(end_day__isnull=True) + & Q(start_day__gte=min_date) + & Q(start_day__lte=max_date) + ) + | ( + Q(end_day__isnull=False) + & ~(Q(start_day__gt=max_date) | Q(end_day__lt=min_date)) + ) + ) + & Q(import_sources__contains=[remove_missing_from_source]) + & Q(status=Event.STATUS.PUBLISHED) + & Q(uuids__len__gt=0) + ) + + to_draft = [] + for e in in_interval: + if len(uuids.intersection(e.uuids)) == 0: + e.status = Event.STATUS.TRASH + # save them without updating modified date + e.set_no_modification_date_changed() + e.prepare_save() + to_draft.append(e) + + nb_draft = Event.objects.bulk_update(to_draft, fields=["status"]) + + return imported, nb_updated, nb_draft + def is_valid_event_structure(self, event): if "title" not in event: self.error_message = ( @@ -130,9 +354,159 @@ class DBImporterEvents: logger.info( "Valid event: {} {}".format(event["last_modified"], event["title"]) ) - event_obj = Event.from_structure(event, self.url) + event_obj = DBImporterEvents.from_structure(event, self.url) self.event_objects.append(event_obj) return True else: logger.warning("Not valid event: {}".format(event)) return False + + def from_structure(event_structure, import_source=None): + # organisers is a manytomany relation thus cannot be initialised before creation of the event + organisers = event_structure.pop("organisers", None) + # supplementary information + email = event_structure.pop("email", None) + comments = event_structure.pop("comments", None) + warnings = event_structure.pop("warnings", []) + + for w in warnings: + if w == Extractor.Warning.NO_START_DATE: + event_structure["title"] += ( + " - " + + _("Warning") + + ": " + + _("the date has not been imported correctly.") + ) + + if "category" in event_structure and event_structure["category"] is not None: + try: + event_structure["category"] = Category.objects.get( + name__unaccent__icontains=remove_accents( + event_structure["category"].lower() + ) + ) + except Category.DoesNotExist: + event_structure["category"] = Category.get_default_category() + else: + event_structure["category"] = Category.get_default_category() + + if "published" in event_structure and event_structure["published"] is not None: + if event_structure["published"]: + event_structure["status"] = Event.STATUS.PUBLISHED + else: + event_structure["status"] = Event.STATUS.DRAFT + del event_structure["published"] + else: + event_structure["status"] = Event.STATUS.DRAFT + + if "url_human" in event_structure and event_structure["url_human"] is not None: + event_structure["reference_urls"] = [event_structure["url_human"]] + del event_structure["url_human"] + + if ( + "last_modified" in event_structure + and event_structure["last_modified"] is not None + ): + d = datetime.fromisoformat(event_structure["last_modified"]) + if d.tzinfo is None or d.tzinfo.utcoffset(d) is None: + d = timezone.make_aware(d, timezone.get_default_timezone()) + event_structure["modified_date"] = d + del event_structure["last_modified"] + else: + event_structure["modified_date"] = None + + if "start_time" in event_structure and event_structure["start_time"] != "": + event_structure["start_time"] = time.fromisoformat( + event_structure["start_time"] + ) + else: + event_structure["start_time"] = None + + if "end_time" in event_structure and event_structure["end_time"] != "": + event_structure["end_time"] = time.fromisoformat( + event_structure["end_time"] + ) + else: + event_structure["end_time"] = None + + if "location" not in event_structure or event_structure["location"] is None: + event_structure["location"] = "" + + if "description" in event_structure and event_structure["description"] is None: + event_structure["description"] = "" + + if ( + "recurrences" in event_structure + and event_structure["recurrences"] is not None + ): + event_structure["recurrences"] = recurrence.deserialize( + event_structure["recurrences"] + ) + event_structure["recurrences"].exdates = [ + e.replace(hour=0, minute=0, second=0) + for e in event_structure["recurrences"].exdates + ] + event_structure["recurrences"].rdates = [ + e.replace(hour=0, minute=0, second=0) + for e in event_structure["recurrences"].rdates + ] + + else: + event_structure["recurrences"] = None + + if import_source is not None: + event_structure["import_sources"] = [import_source] + + result = Event(**event_structure) + result.add_pending_organisers(organisers) + if email or comments: + has_comments = comments not in ["", None] + result.add_message( + Message( + subject=_("during import process"), + email=email, + message=comments, + closed=False, + message_type=( + Message.TYPE.FROM_CONTRIBUTOR + if has_comments + else Message.TYPE.FROM_CONTRIBUTOR_NO_MSG + ), + ) + ) + for w in warnings: + if w == Extractor.Warning.NO_START_DATE: + result.set_invalid_start_date() + result.add_message( + Message( + subject=_("warning"), + closed=False, + message=_("the date has not been imported correctly."), + message_type=Message.TYPE.WARNING, + ) + ) + if w == Extractor.Warning.NO_TITLE: + result.set_invalid_title() + result.add_message( + Message( + subject=_("warning"), + closed=False, + message=_("the title has not been imported correctly."), + message_type=Message.TYPE.WARNING, + ) + ) + if w == Extractor.Warning.NOT_FOUND: + result.status = Event.STATUS.DRAFT + result.set_is_not_found_import() + result.add_message( + Message( + subject=_("warning"), + closed=False, + message=_( + "The import was unable to find an event in the page." + ), + message_type=Message.TYPE.WARNING, + ) + ) + + return result diff --git a/src/agenda_culturel/models.py b/src/agenda_culturel/models.py index 81cf1c0..c472506 100644 --- a/src/agenda_culturel/models.py +++ b/src/agenda_culturel/models.py @@ -1880,156 +1880,6 @@ class Event(models.Model): e.set_no_modification_date_changed() e.save() - def from_structure(event_structure, import_source=None): - # organisers is a manytomany relation thus cannot be initialised before creation of the event - organisers = event_structure.pop("organisers", None) - # supplementary information - email = event_structure.pop("email", None) - comments = event_structure.pop("comments", None) - warnings = event_structure.pop("warnings", []) - - for w in warnings: - if w == Extractor.Warning.NO_START_DATE: - event_structure["title"] += ( - " - " - + _("Warning") - + ": " - + _("the date has not been imported correctly.") - ) - - if "category" in event_structure and event_structure["category"] is not None: - try: - event_structure["category"] = Category.objects.get( - name__unaccent__icontains=remove_accents( - event_structure["category"].lower() - ) - ) - except Category.DoesNotExist: - event_structure["category"] = Category.get_default_category() - else: - event_structure["category"] = Category.get_default_category() - - if "published" in event_structure and event_structure["published"] is not None: - if event_structure["published"]: - event_structure["status"] = Event.STATUS.PUBLISHED - else: - event_structure["status"] = Event.STATUS.DRAFT - del event_structure["published"] - else: - event_structure["status"] = Event.STATUS.DRAFT - - if "url_human" in event_structure and event_structure["url_human"] is not None: - event_structure["reference_urls"] = [event_structure["url_human"]] - del event_structure["url_human"] - - if ( - "last_modified" in event_structure - and event_structure["last_modified"] is not None - ): - d = datetime.fromisoformat(event_structure["last_modified"]) - if d.tzinfo is None or d.tzinfo.utcoffset(d) is None: - d = timezone.make_aware(d, timezone.get_default_timezone()) - event_structure["modified_date"] = d - del event_structure["last_modified"] - else: - event_structure["modified_date"] = None - - if "start_time" in event_structure and event_structure["start_time"] != "": - event_structure["start_time"] = time.fromisoformat( - event_structure["start_time"] - ) - else: - event_structure["start_time"] = None - - if "end_time" in event_structure and event_structure["end_time"] != "": - event_structure["end_time"] = time.fromisoformat( - event_structure["end_time"] - ) - else: - event_structure["end_time"] = None - - if "location" not in event_structure or event_structure["location"] is None: - event_structure["location"] = "" - - if "description" in event_structure and event_structure["description"] is None: - event_structure["description"] = "" - - if ( - "recurrences" in event_structure - and event_structure["recurrences"] is not None - ): - event_structure["recurrences"] = recurrence.deserialize( - event_structure["recurrences"] - ) - event_structure["recurrences"].exdates = [ - e.replace(hour=0, minute=0, second=0) - for e in event_structure["recurrences"].exdates - ] - event_structure["recurrences"].rdates = [ - e.replace(hour=0, minute=0, second=0) - for e in event_structure["recurrences"].rdates - ] - - else: - event_structure["recurrences"] = None - - if import_source is not None: - event_structure["import_sources"] = [import_source] - - result = Event(**event_structure) - result.add_pending_organisers(organisers) - if email or comments: - has_comments = comments not in ["", None] - result.add_message( - Message( - subject=_("during import process"), - email=email, - message=comments, - closed=False, - message_type=( - Message.TYPE.FROM_CONTRIBUTOR - if has_comments - else Message.TYPE.FROM_CONTRIBUTOR_NO_MSG - ), - ) - ) - for w in warnings: - if w == Extractor.Warning.NO_START_DATE: - result.set_invalid_start_date() - result.add_message( - Message( - subject=_("warning"), - closed=False, - message=_("the date has not been imported correctly."), - message_type=Message.TYPE.WARNING, - ) - ) - if w == Extractor.Warning.NO_TITLE: - result.set_invalid_title() - result.add_message( - Message( - subject=_("warning"), - closed=False, - message=_("the title has not been imported correctly."), - message_type=Message.TYPE.WARNING, - ) - ) - if w == Extractor.Warning.NOT_FOUND: - result.status = Event.STATUS.DRAFT - result.set_is_not_found_import() - result.add_message( - Message( - subject=_("warning"), - closed=False, - message=_( - "The import was unable to find an event in the page." - ), - message_type=Message.TYPE.WARNING, - ) - ) - - return result - def find_similar_events(self): start_time_test = Q(start_time=self.start_time) @@ -2303,220 +2153,6 @@ class Event(models.Model): if source not in self.import_sources: self.import_sources.append(source) - # Limitation: the given events should not be considered similar one to another... - def import_events(events, remove_missing_from_source=None, user_id=None): - user = None - if user_id: - user = User.objects.filter(pk=user_id).first() - - to_import = [] - to_update = [] - - min_date = timezone.now().date() - max_date = None - uuids = set() - - # for each event, check if it's a new one, or a one to be updated - for event in events: - sdate = date.fromisoformat(event.start_day) - if event.end_day: - edate = date.fromisoformat(event.end_day) - else: - edate = sdate - if min_date is None or min_date > sdate: - min_date = sdate - if max_date is None or max_date < sdate: - max_date = sdate - if max_date is None or (event.end_day is not None and max_date < edate): - max_date = edate - if event.uuids and len(event.uuids) > 0: - uuids |= set(event.uuids) - - # imported events should be updated - event.set_in_importation_process() - event.set_processing_user(user) - event.prepare_save() - - # check if the event has already be imported (using uuid) - same_events = event.find_same_events_by_uuid() - - if same_events is not None and len(same_events) != 0: - # check if one event has been imported and not modified in this list - same_imported = Event.find_last_pure_import(same_events) - pure = True - # if not, we check if it does not match exactly with another - if not same_imported: - for e in same_events: - if event.similar(e, False): - same_imported = e - pure = False - break - - if same_imported: - if not event.similar(same_imported, False): - # reopen DuplicatedEvents if required - if same_imported.other_versions: - if same_imported.status != Event.STATUS.TRASH: - if same_imported.other_versions.is_published(): - if ( - same_imported.other_versions.representative - != same_imported - ): - same_imported.other_versions.representative = ( - None - ) - same_imported.other_versions.save() - # add a message to explain the update - if not event.is_not_found_import(): - res = [ - r - for r in Event.get_comparison( - [event, same_imported], all - ) - if not r["similar"] - ] - if len(res) > 0: - txt = _("Updated field(s): ") + ", ".join( - [r["key"] for r in res] - ) - msg = Message( - subject=_("Update"), - name=_("update process"), - related_event=same_imported, - message=txt, - message_type=Message.TYPE.UPDATE_PROCESS, - ) - msg.save() - - new_image = same_imported.image != event.image - - # if the import process was not able to found any content, change the status as draft - if event.is_not_found_import(): - if same_imported.status == Event.STATUS.PUBLISHED: - same_imported.status = Event.STATUS.TRASH - else: - # we only update local information if it's a pure import and has no moderated_date - same_imported.update( - event, - pure and same_imported.moderated_date is None, - ) - # save messages - if event.has_message(): - for msg in event.get_messages(): - msg.related_event = same_imported - msg.save() - same_imported.set_in_importation_process() - same_imported.prepare_save() - # fix missing or updated files - if same_imported.local_image and ( - not default_storage.exists(same_imported.local_image.name) - or new_image - ): - same_imported.download_image() - same_imported.save( - update_fields=["local_image"], - noclean_other_versions=True, - ) - - to_update.append(same_imported) - else: - # otherwise, the new event possibly a duplication of the remaining others. - - # check if it should be published - trash = ( - len([e for e in same_events if e.status != Event.STATUS.TRASH]) - == 0 - ) - if trash: - event.status = Event.STATUS.TRASH - event.set_other_versions(same_events, force_non_fixed=not trash) - # it will be imported - to_import.append(event) - else: - # if uuid is unique (or not available), check for similar events - similar_events = event.find_similar_events() - - # if it exists similar events, add this relation to the event - if len(similar_events) != 0: - # the event is possibly a duplication of the others - event.set_other_versions(similar_events, force_non_fixed=True) - to_import.append(event) - else: - # import this new event - to_import.append(event) - - for e in to_import: - if e.is_event_long_duration(): - e.status = Event.STATUS.DRAFT - e.add_message( - Message( - subject=_("Import"), - name=_("import process"), - message=_( - "The duration of the event is a little too long for direct publication. Moderators can choose to publish it or not." - ), - message_type=Message.TYPE.IMPORT_PROCESS, - ) - ) - - # then import all the new events - imported = Event.objects.bulk_create(to_import) - # update organisers (m2m relation) - for i, ti in zip(imported, to_import): - if ti.has_pending_organisers() and ti.pending_organisers is not None: - i.organisers.set(ti.pending_organisers) - if ti.has_message(): - for msg in ti.get_messages(): - msg.related_event = i - msg.save() - - nb_updated = Event.objects.bulk_update( - to_update, - fields=Event.data_fields(no_m2m=True) - + [ - "imported_date", - "modified_date", - "uuids", - "status", - "import_sources", - ], - ) - - nb_draft = 0 - if remove_missing_from_source is not None and max_date is not None: - # events that are missing from the import but in database are turned into drafts - # only if they are in the future - - in_interval = Event.objects.filter( - ( - ( - Q(end_day__isnull=True) - & Q(start_day__gte=min_date) - & Q(start_day__lte=max_date) - ) - | ( - Q(end_day__isnull=False) - & ~(Q(start_day__gt=max_date) | Q(end_day__lt=min_date)) - ) - ) - & Q(import_sources__contains=[remove_missing_from_source]) - & Q(status=Event.STATUS.PUBLISHED) - & Q(uuids__len__gt=0) - ) - - to_draft = [] - for e in in_interval: - if len(uuids.intersection(e.uuids)) == 0: - e.status = Event.STATUS.TRASH - # save them without updating modified date - e.set_no_modification_date_changed() - e.prepare_save() - to_draft.append(e) - - nb_draft = Event.objects.bulk_update(to_draft, fields=["status"]) - - return imported, nb_updated, nb_draft - def set_current_date(self, date): self.current_date = date