Réorganisation du code

This commit is contained in:
Jean-Marie Favreau 2025-04-26 11:05:58 +02:00
parent bff6631754
commit 573584cf43
2 changed files with 381 additions and 371 deletions

View File

@ -1,10 +1,18 @@
import json import json
import logging import logging
from datetime import date from datetime import date, datetime, time
from django.db.models import Q
from django.contrib.auth.models import User
from .import_tasks.extractor import Extractor
from django.core.files.storage import default_storage
from django.utils.translation import gettext_lazy as _
import recurrence
import recurrence.fields
from django.utils import timezone from django.utils import timezone
from agenda_culturel.models import Event from agenda_culturel.models import Event, Message, Category, remove_accents
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -106,11 +114,227 @@ class DBImporterEvents:
return event["end_day"] >= self.today return event["end_day"] >= self.today
def save_imported(self): def save_imported(self):
self.db_event_objects, self.nb_updated, self.nb_removed = Event.import_events( self.db_event_objects, self.nb_updated, self.nb_removed = (
DBImporterEvents.import_in_db(
self.event_objects, self.event_objects,
remove_missing_from_source=self.url, remove_missing_from_source=self.url,
user_id=self.user_id, user_id=self.user_id,
) )
)
# Limitation: the given events should not be considered similar one to another...
def import_in_db(events, remove_missing_from_source=None, user_id=None):
user = None
if user_id:
user = User.objects.filter(pk=user_id).first()
to_import = []
to_update = []
min_date = timezone.now().date()
max_date = None
uuids = set()
# for each event, check if it's a new one, or a one to be updated
for event in events:
sdate = date.fromisoformat(event.start_day)
if event.end_day:
edate = date.fromisoformat(event.end_day)
else:
edate = sdate
if min_date is None or min_date > sdate:
min_date = sdate
if max_date is None or max_date < sdate:
max_date = sdate
if max_date is None or (event.end_day is not None and max_date < edate):
max_date = edate
if event.uuids and len(event.uuids) > 0:
uuids |= set(event.uuids)
# imported events should be updated
event.set_in_importation_process()
event.set_processing_user(user)
event.prepare_save()
# check if the event has already be imported (using uuid)
same_events = event.find_same_events_by_uuid()
if same_events is not None and len(same_events) != 0:
# check if one event has been imported and not modified in this list
same_imported = Event.find_last_pure_import(same_events)
pure = True
# if not, we check if it does not match exactly with another
if not same_imported:
for e in same_events:
if event.similar(e, False):
same_imported = e
pure = False
break
if same_imported:
if not event.similar(same_imported, False):
# reopen DuplicatedEvents if required
if same_imported.other_versions:
if same_imported.status != Event.STATUS.TRASH:
if same_imported.other_versions.is_published():
if (
same_imported.other_versions.representative
!= same_imported
):
same_imported.other_versions.representative = (
None
)
same_imported.other_versions.save()
# add a message to explain the update
if not event.is_not_found_import():
res = [
r
for r in Event.get_comparison(
[event, same_imported], all
)
if not r["similar"]
]
if len(res) > 0:
txt = _("Updated field(s): ") + ", ".join(
[r["key"] for r in res]
)
msg = Message(
subject=_("Update"),
name=_("update process"),
related_event=same_imported,
message=txt,
message_type=Message.TYPE.UPDATE_PROCESS,
)
msg.save()
new_image = same_imported.image != event.image
# if the import process was not able to found any content, change the status as draft
if event.is_not_found_import():
if same_imported.status == Event.STATUS.PUBLISHED:
same_imported.status = Event.STATUS.TRASH
else:
# we only update local information if it's a pure import and has no moderated_date
same_imported.update(
event,
pure and same_imported.moderated_date is None,
)
# save messages
if event.has_message():
for msg in event.get_messages():
msg.related_event = same_imported
msg.save()
same_imported.set_in_importation_process()
same_imported.prepare_save()
# fix missing or updated files
if same_imported.local_image and (
not default_storage.exists(same_imported.local_image.name)
or new_image
):
same_imported.download_image()
same_imported.save(
update_fields=["local_image"],
noclean_other_versions=True,
)
to_update.append(same_imported)
else:
# otherwise, the new event possibly a duplication of the remaining others.
# check if it should be published
trash = (
len([e for e in same_events if e.status != Event.STATUS.TRASH])
== 0
)
if trash:
event.status = Event.STATUS.TRASH
event.set_other_versions(same_events, force_non_fixed=not trash)
# it will be imported
to_import.append(event)
else:
# if uuid is unique (or not available), check for similar events
similar_events = event.find_similar_events()
# if it exists similar events, add this relation to the event
if len(similar_events) != 0:
# the event is possibly a duplication of the others
event.set_other_versions(similar_events, force_non_fixed=True)
to_import.append(event)
else:
# import this new event
to_import.append(event)
for e in to_import:
if e.is_event_long_duration():
e.status = Event.STATUS.DRAFT
e.add_message(
Message(
subject=_("Import"),
name=_("import process"),
message=_(
"The duration of the event is a little too long for direct publication. Moderators can choose to publish it or not."
),
message_type=Message.TYPE.IMPORT_PROCESS,
)
)
# then import all the new events
imported = Event.objects.bulk_create(to_import)
# update organisers (m2m relation)
for i, ti in zip(imported, to_import):
if ti.has_pending_organisers() and ti.pending_organisers is not None:
i.organisers.set(ti.pending_organisers)
if ti.has_message():
for msg in ti.get_messages():
msg.related_event = i
msg.save()
nb_updated = Event.objects.bulk_update(
to_update,
fields=Event.data_fields(no_m2m=True)
+ [
"imported_date",
"modified_date",
"uuids",
"status",
"import_sources",
],
)
nb_draft = 0
if remove_missing_from_source is not None and max_date is not None:
# events that are missing from the import but in database are turned into drafts
# only if they are in the future
in_interval = Event.objects.filter(
(
(
Q(end_day__isnull=True)
& Q(start_day__gte=min_date)
& Q(start_day__lte=max_date)
)
| (
Q(end_day__isnull=False)
& ~(Q(start_day__gt=max_date) | Q(end_day__lt=min_date))
)
)
& Q(import_sources__contains=[remove_missing_from_source])
& Q(status=Event.STATUS.PUBLISHED)
& Q(uuids__len__gt=0)
)
to_draft = []
for e in in_interval:
if len(uuids.intersection(e.uuids)) == 0:
e.status = Event.STATUS.TRASH
# save them without updating modified date
e.set_no_modification_date_changed()
e.prepare_save()
to_draft.append(e)
nb_draft = Event.objects.bulk_update(to_draft, fields=["status"])
return imported, nb_updated, nb_draft
def is_valid_event_structure(self, event): def is_valid_event_structure(self, event):
if "title" not in event: if "title" not in event:
@ -130,9 +354,159 @@ class DBImporterEvents:
logger.info( logger.info(
"Valid event: {} {}".format(event["last_modified"], event["title"]) "Valid event: {} {}".format(event["last_modified"], event["title"])
) )
event_obj = Event.from_structure(event, self.url) event_obj = DBImporterEvents.from_structure(event, self.url)
self.event_objects.append(event_obj) self.event_objects.append(event_obj)
return True return True
else: else:
logger.warning("Not valid event: {}".format(event)) logger.warning("Not valid event: {}".format(event))
return False return False
def from_structure(event_structure, import_source=None):
# organisers is a manytomany relation thus cannot be initialised before creation of the event
organisers = event_structure.pop("organisers", None)
# supplementary information
email = event_structure.pop("email", None)
comments = event_structure.pop("comments", None)
warnings = event_structure.pop("warnings", [])
for w in warnings:
if w == Extractor.Warning.NO_START_DATE:
event_structure["title"] += (
" - "
+ _("Warning")
+ ": "
+ _("the date has not been imported correctly.")
)
if "category" in event_structure and event_structure["category"] is not None:
try:
event_structure["category"] = Category.objects.get(
name__unaccent__icontains=remove_accents(
event_structure["category"].lower()
)
)
except Category.DoesNotExist:
event_structure["category"] = Category.get_default_category()
else:
event_structure["category"] = Category.get_default_category()
if "published" in event_structure and event_structure["published"] is not None:
if event_structure["published"]:
event_structure["status"] = Event.STATUS.PUBLISHED
else:
event_structure["status"] = Event.STATUS.DRAFT
del event_structure["published"]
else:
event_structure["status"] = Event.STATUS.DRAFT
if "url_human" in event_structure and event_structure["url_human"] is not None:
event_structure["reference_urls"] = [event_structure["url_human"]]
del event_structure["url_human"]
if (
"last_modified" in event_structure
and event_structure["last_modified"] is not None
):
d = datetime.fromisoformat(event_structure["last_modified"])
if d.tzinfo is None or d.tzinfo.utcoffset(d) is None:
d = timezone.make_aware(d, timezone.get_default_timezone())
event_structure["modified_date"] = d
del event_structure["last_modified"]
else:
event_structure["modified_date"] = None
if "start_time" in event_structure and event_structure["start_time"] != "":
event_structure["start_time"] = time.fromisoformat(
event_structure["start_time"]
)
else:
event_structure["start_time"] = None
if "end_time" in event_structure and event_structure["end_time"] != "":
event_structure["end_time"] = time.fromisoformat(
event_structure["end_time"]
)
else:
event_structure["end_time"] = None
if "location" not in event_structure or event_structure["location"] is None:
event_structure["location"] = ""
if "description" in event_structure and event_structure["description"] is None:
event_structure["description"] = ""
if (
"recurrences" in event_structure
and event_structure["recurrences"] is not None
):
event_structure["recurrences"] = recurrence.deserialize(
event_structure["recurrences"]
)
event_structure["recurrences"].exdates = [
e.replace(hour=0, minute=0, second=0)
for e in event_structure["recurrences"].exdates
]
event_structure["recurrences"].rdates = [
e.replace(hour=0, minute=0, second=0)
for e in event_structure["recurrences"].rdates
]
else:
event_structure["recurrences"] = None
if import_source is not None:
event_structure["import_sources"] = [import_source]
result = Event(**event_structure)
result.add_pending_organisers(organisers)
if email or comments:
has_comments = comments not in ["", None]
result.add_message(
Message(
subject=_("during import process"),
email=email,
message=comments,
closed=False,
message_type=(
Message.TYPE.FROM_CONTRIBUTOR
if has_comments
else Message.TYPE.FROM_CONTRIBUTOR_NO_MSG
),
)
)
for w in warnings:
if w == Extractor.Warning.NO_START_DATE:
result.set_invalid_start_date()
result.add_message(
Message(
subject=_("warning"),
closed=False,
message=_("the date has not been imported correctly."),
message_type=Message.TYPE.WARNING,
)
)
if w == Extractor.Warning.NO_TITLE:
result.set_invalid_title()
result.add_message(
Message(
subject=_("warning"),
closed=False,
message=_("the title has not been imported correctly."),
message_type=Message.TYPE.WARNING,
)
)
if w == Extractor.Warning.NOT_FOUND:
result.status = Event.STATUS.DRAFT
result.set_is_not_found_import()
result.add_message(
Message(
subject=_("warning"),
closed=False,
message=_(
"The import was unable to find an event in the page."
),
message_type=Message.TYPE.WARNING,
)
)
return result

View File

@ -1880,156 +1880,6 @@ class Event(models.Model):
e.set_no_modification_date_changed() e.set_no_modification_date_changed()
e.save() e.save()
def from_structure(event_structure, import_source=None):
# organisers is a manytomany relation thus cannot be initialised before creation of the event
organisers = event_structure.pop("organisers", None)
# supplementary information
email = event_structure.pop("email", None)
comments = event_structure.pop("comments", None)
warnings = event_structure.pop("warnings", [])
for w in warnings:
if w == Extractor.Warning.NO_START_DATE:
event_structure["title"] += (
" - "
+ _("Warning")
+ ": "
+ _("the date has not been imported correctly.")
)
if "category" in event_structure and event_structure["category"] is not None:
try:
event_structure["category"] = Category.objects.get(
name__unaccent__icontains=remove_accents(
event_structure["category"].lower()
)
)
except Category.DoesNotExist:
event_structure["category"] = Category.get_default_category()
else:
event_structure["category"] = Category.get_default_category()
if "published" in event_structure and event_structure["published"] is not None:
if event_structure["published"]:
event_structure["status"] = Event.STATUS.PUBLISHED
else:
event_structure["status"] = Event.STATUS.DRAFT
del event_structure["published"]
else:
event_structure["status"] = Event.STATUS.DRAFT
if "url_human" in event_structure and event_structure["url_human"] is not None:
event_structure["reference_urls"] = [event_structure["url_human"]]
del event_structure["url_human"]
if (
"last_modified" in event_structure
and event_structure["last_modified"] is not None
):
d = datetime.fromisoformat(event_structure["last_modified"])
if d.tzinfo is None or d.tzinfo.utcoffset(d) is None:
d = timezone.make_aware(d, timezone.get_default_timezone())
event_structure["modified_date"] = d
del event_structure["last_modified"]
else:
event_structure["modified_date"] = None
if "start_time" in event_structure and event_structure["start_time"] != "":
event_structure["start_time"] = time.fromisoformat(
event_structure["start_time"]
)
else:
event_structure["start_time"] = None
if "end_time" in event_structure and event_structure["end_time"] != "":
event_structure["end_time"] = time.fromisoformat(
event_structure["end_time"]
)
else:
event_structure["end_time"] = None
if "location" not in event_structure or event_structure["location"] is None:
event_structure["location"] = ""
if "description" in event_structure and event_structure["description"] is None:
event_structure["description"] = ""
if (
"recurrences" in event_structure
and event_structure["recurrences"] is not None
):
event_structure["recurrences"] = recurrence.deserialize(
event_structure["recurrences"]
)
event_structure["recurrences"].exdates = [
e.replace(hour=0, minute=0, second=0)
for e in event_structure["recurrences"].exdates
]
event_structure["recurrences"].rdates = [
e.replace(hour=0, minute=0, second=0)
for e in event_structure["recurrences"].rdates
]
else:
event_structure["recurrences"] = None
if import_source is not None:
event_structure["import_sources"] = [import_source]
result = Event(**event_structure)
result.add_pending_organisers(organisers)
if email or comments:
has_comments = comments not in ["", None]
result.add_message(
Message(
subject=_("during import process"),
email=email,
message=comments,
closed=False,
message_type=(
Message.TYPE.FROM_CONTRIBUTOR
if has_comments
else Message.TYPE.FROM_CONTRIBUTOR_NO_MSG
),
)
)
for w in warnings:
if w == Extractor.Warning.NO_START_DATE:
result.set_invalid_start_date()
result.add_message(
Message(
subject=_("warning"),
closed=False,
message=_("the date has not been imported correctly."),
message_type=Message.TYPE.WARNING,
)
)
if w == Extractor.Warning.NO_TITLE:
result.set_invalid_title()
result.add_message(
Message(
subject=_("warning"),
closed=False,
message=_("the title has not been imported correctly."),
message_type=Message.TYPE.WARNING,
)
)
if w == Extractor.Warning.NOT_FOUND:
result.status = Event.STATUS.DRAFT
result.set_is_not_found_import()
result.add_message(
Message(
subject=_("warning"),
closed=False,
message=_(
"The import was unable to find an event in the page."
),
message_type=Message.TYPE.WARNING,
)
)
return result
def find_similar_events(self): def find_similar_events(self):
start_time_test = Q(start_time=self.start_time) start_time_test = Q(start_time=self.start_time)
@ -2303,220 +2153,6 @@ class Event(models.Model):
if source not in self.import_sources: if source not in self.import_sources:
self.import_sources.append(source) self.import_sources.append(source)
# Limitation: the given events should not be considered similar one to another...
def import_events(events, remove_missing_from_source=None, user_id=None):
user = None
if user_id:
user = User.objects.filter(pk=user_id).first()
to_import = []
to_update = []
min_date = timezone.now().date()
max_date = None
uuids = set()
# for each event, check if it's a new one, or a one to be updated
for event in events:
sdate = date.fromisoformat(event.start_day)
if event.end_day:
edate = date.fromisoformat(event.end_day)
else:
edate = sdate
if min_date is None or min_date > sdate:
min_date = sdate
if max_date is None or max_date < sdate:
max_date = sdate
if max_date is None or (event.end_day is not None and max_date < edate):
max_date = edate
if event.uuids and len(event.uuids) > 0:
uuids |= set(event.uuids)
# imported events should be updated
event.set_in_importation_process()
event.set_processing_user(user)
event.prepare_save()
# check if the event has already be imported (using uuid)
same_events = event.find_same_events_by_uuid()
if same_events is not None and len(same_events) != 0:
# check if one event has been imported and not modified in this list
same_imported = Event.find_last_pure_import(same_events)
pure = True
# if not, we check if it does not match exactly with another
if not same_imported:
for e in same_events:
if event.similar(e, False):
same_imported = e
pure = False
break
if same_imported:
if not event.similar(same_imported, False):
# reopen DuplicatedEvents if required
if same_imported.other_versions:
if same_imported.status != Event.STATUS.TRASH:
if same_imported.other_versions.is_published():
if (
same_imported.other_versions.representative
!= same_imported
):
same_imported.other_versions.representative = (
None
)
same_imported.other_versions.save()
# add a message to explain the update
if not event.is_not_found_import():
res = [
r
for r in Event.get_comparison(
[event, same_imported], all
)
if not r["similar"]
]
if len(res) > 0:
txt = _("Updated field(s): ") + ", ".join(
[r["key"] for r in res]
)
msg = Message(
subject=_("Update"),
name=_("update process"),
related_event=same_imported,
message=txt,
message_type=Message.TYPE.UPDATE_PROCESS,
)
msg.save()
new_image = same_imported.image != event.image
# if the import process was not able to found any content, change the status as draft
if event.is_not_found_import():
if same_imported.status == Event.STATUS.PUBLISHED:
same_imported.status = Event.STATUS.TRASH
else:
# we only update local information if it's a pure import and has no moderated_date
same_imported.update(
event,
pure and same_imported.moderated_date is None,
)
# save messages
if event.has_message():
for msg in event.get_messages():
msg.related_event = same_imported
msg.save()
same_imported.set_in_importation_process()
same_imported.prepare_save()
# fix missing or updated files
if same_imported.local_image and (
not default_storage.exists(same_imported.local_image.name)
or new_image
):
same_imported.download_image()
same_imported.save(
update_fields=["local_image"],
noclean_other_versions=True,
)
to_update.append(same_imported)
else:
# otherwise, the new event possibly a duplication of the remaining others.
# check if it should be published
trash = (
len([e for e in same_events if e.status != Event.STATUS.TRASH])
== 0
)
if trash:
event.status = Event.STATUS.TRASH
event.set_other_versions(same_events, force_non_fixed=not trash)
# it will be imported
to_import.append(event)
else:
# if uuid is unique (or not available), check for similar events
similar_events = event.find_similar_events()
# if it exists similar events, add this relation to the event
if len(similar_events) != 0:
# the event is possibly a duplication of the others
event.set_other_versions(similar_events, force_non_fixed=True)
to_import.append(event)
else:
# import this new event
to_import.append(event)
for e in to_import:
if e.is_event_long_duration():
e.status = Event.STATUS.DRAFT
e.add_message(
Message(
subject=_("Import"),
name=_("import process"),
message=_(
"The duration of the event is a little too long for direct publication. Moderators can choose to publish it or not."
),
message_type=Message.TYPE.IMPORT_PROCESS,
)
)
# then import all the new events
imported = Event.objects.bulk_create(to_import)
# update organisers (m2m relation)
for i, ti in zip(imported, to_import):
if ti.has_pending_organisers() and ti.pending_organisers is not None:
i.organisers.set(ti.pending_organisers)
if ti.has_message():
for msg in ti.get_messages():
msg.related_event = i
msg.save()
nb_updated = Event.objects.bulk_update(
to_update,
fields=Event.data_fields(no_m2m=True)
+ [
"imported_date",
"modified_date",
"uuids",
"status",
"import_sources",
],
)
nb_draft = 0
if remove_missing_from_source is not None and max_date is not None:
# events that are missing from the import but in database are turned into drafts
# only if they are in the future
in_interval = Event.objects.filter(
(
(
Q(end_day__isnull=True)
& Q(start_day__gte=min_date)
& Q(start_day__lte=max_date)
)
| (
Q(end_day__isnull=False)
& ~(Q(start_day__gt=max_date) | Q(end_day__lt=min_date))
)
)
& Q(import_sources__contains=[remove_missing_from_source])
& Q(status=Event.STATUS.PUBLISHED)
& Q(uuids__len__gt=0)
)
to_draft = []
for e in in_interval:
if len(uuids.intersection(e.uuids)) == 0:
e.status = Event.STATUS.TRASH
# save them without updating modified date
e.set_no_modification_date_changed()
e.prepare_save()
to_draft.append(e)
nb_draft = Event.objects.bulk_update(to_draft, fields=["status"])
return imported, nb_updated, nb_draft
def set_current_date(self, date): def set_current_date(self, date):
self.current_date = date self.current_date = date