Ajout d'un import des événements orphelins

This commit is contained in:
Jean-Marie Favreau 2025-02-09 14:42:15 +01:00
parent d75fe8b05e
commit fc579998ca
10 changed files with 383 additions and 224 deletions

View File

@ -324,11 +324,18 @@ def weekly_imports(self):
run_recurrent_imports_from_list([imp.pk for imp in imports]) run_recurrent_imports_from_list([imp.pk for imp in imports])
@app.task(base=ChromiumTask, bind=True) @app.task(base=ChromiumTask, bind=True)
def import_events_from_url(self, url, cat, tags, force=False, user_id=None, email=None, comments=None): def import_events_from_url(self, urls, cat=None, tags=None, force=False, user_id=None, email=None, comments=None):
from .db_importer import DBImporterEvents from .db_importer import DBImporterEvents
from agenda_culturel.models import RecurrentImport, BatchImportation from agenda_culturel.models import RecurrentImport, BatchImportation
from agenda_culturel.models import Event, Category from agenda_culturel.models import Event, Category
if isinstance(urls, list):
url = urls[0]
is_list = True
else:
is_list = False
url = urls
with memcache_chromium_lock(self.app.oid) as acquired: with memcache_chromium_lock(self.app.oid) as acquired:
if acquired: if acquired:
@ -386,7 +393,7 @@ def import_events_from_url(self, url, cat, tags, force=False, user_id=None, emai
logger.error(e) logger.error(e)
close_import_task(self.request.id, False, e, importer) close_import_task(self.request.id, False, e, importer)
return return urls[1:] if is_list else True
# if chromium is locked, we wait 30 seconds before retrying # if chromium is locked, we wait 30 seconds before retrying
raise self.retry(countdown=30) raise self.retry(countdown=30)
@ -403,7 +410,36 @@ def import_events_from_urls(self, urls_cat_tags, user_id=None, email=None, comme
import_events_from_url.delay(url, cat, tags, user_id=user_id, email=email, comments=comments) import_events_from_url.delay(url, cat, tags, user_id=user_id, email=email, comments=comments)
@app.task(base=ChromiumTask, bind=True)
def update_orphan_pure_import_events(self):
from agenda_culturel.models import RecurrentImport
from agenda_culturel.models import Event
from django.db.models import Q, F
# get all recurrent sources
srcs = RecurrentImport.objects.all().values_list("source")
today = date.today()
# get all events in future with a source and not related to a recurrent import
urls = Event.objects.filter(Q(start_day__gte=today)).filter(
(Q(import_sources__isnull=False) &
(Q(modified_date__isnull=True) |
Q(modified_date__lte=F('imported_date'))))
& ~Q(import_sources__overlap=srcs)).values_list("import_sources", flat=True)
# get urls
urls = [url_l[0] for url_l in urls if len(url_l) > 0]
# run tasks as a chain
tasks = chain(import_events_from_url.s(urls, force=True) if i == 0 else import_events_from_url.s(force=True) for i in range(len(urls)))
tasks.delay()
app.conf.beat_schedule = { app.conf.beat_schedule = {
"daily_orphans_update": {
"task": "agenda_culturel.celery.update_orphan_pure_import_events",
# Daily imports at 3:14 a.m.
"schedule": crontab(hour=2, minute=22),
},
"daily_imports": { "daily_imports": {
"task": "agenda_culturel.celery.daily_imports", "task": "agenda_culturel.celery.daily_imports",
# Daily imports at 3:14 a.m. # Daily imports at 3:14 a.m.

View File

@ -11,6 +11,7 @@ class Extractor(ABC):
class Warning(IntEnum): class Warning(IntEnum):
NO_TITLE = 1 NO_TITLE = 1
NO_START_DATE = 2 NO_START_DATE = 2
NOT_FOUND = 3
url_referer=None url_referer=None
@ -204,6 +205,7 @@ class Extractor(ABC):
published=False, published=False,
image=None, image=None,
image_alt=None, image_alt=None,
not_found=False
): ):
comments = '' comments = ''
warnings = [] warnings = []
@ -217,6 +219,8 @@ class Extractor(ABC):
published = False published = False
start_day = datetime.now().date().strftime("%Y-%m-%d") start_day = datetime.now().date().strftime("%Y-%m-%d")
warnings.append(Extractor.Warning.NO_START_DATE) warnings.append(Extractor.Warning.NO_START_DATE)
if not_found:
warnings.append(Extractor.Warning.NOT_FOUND)
tags_default = self.default_value_if_exists(default_values, "tags") tags_default = self.default_value_if_exists(default_values, "tags")
if not tags_default: if not tags_default:
@ -306,7 +310,7 @@ class EventNotFoundExtractor(Extractor):
self.add_event(default_values, "événement sans titre depuis " + url, self.add_event(default_values, "événement sans titre depuis " + url,
None, timezone.now().date(), None, None, timezone.now().date(), None,
"l'import a échoué, la saisie doit se faire manuellement à partir de l'url source " + url, "l'import a échoué, la saisie doit se faire manuellement à partir de l'url source " + url,
[], [url], published=False, url_human=url) [], [url], published=False, url_human=url, not_found=True)
return self.get_structure() return self.get_structure()

View File

@ -252,7 +252,7 @@ class FacebookEventExtractor(Extractor):
def clean_url(url): def clean_url(url):
if FacebookEventExtractor.is_known_url(url): if FacebookEventExtractor.is_known_url(url, False):
u = urlparse(url) u = urlparse(url)
result = "https://www.facebook.com" + u.path result = "https://www.facebook.com" + u.path
@ -269,9 +269,12 @@ class FacebookEventExtractor(Extractor):
return url return url
def is_known_url(url): def is_known_url(url, include_links=True):
u = urlparse(url) u = urlparse(url)
return u.netloc in ["facebook.com", "www.facebook.com", "m.facebook.com"] url_list = ["facebook.com", "www.facebook.com", "m.facebook.com"]
if include_links:
url_list.append("fb.me")
return u.netloc in url_list
def extract( def extract(
self, content, url, url_human=None, default_values=None, published=False self, content, url, url_human=None, default_values=None, published=False

File diff suppressed because it is too large Load Diff

View File

@ -1032,6 +1032,12 @@ class Event(models.Model):
def has_pending_organisers(self): def has_pending_organisers(self):
return hasattr(self, "pending_organisers") return hasattr(self, "pending_organisers")
def set_is_not_found_import(self):
self.not_found_import = True
def is_not_found_import(self):
return hasattr(self, "not_found_import")
def set_skip_duplicate_check(self): def set_skip_duplicate_check(self):
self.skip_duplicate_check = True self.skip_duplicate_check = True
@ -1235,8 +1241,6 @@ class Event(models.Model):
notif = False notif = False
if self.status != Event.STATUS.DRAFT: if self.status != Event.STATUS.DRAFT:
messages = self.get_contributor_message() messages = self.get_contributor_message()
logger.warning("messages: ")
logger.warning(messages)
if messages: if messages:
for message in messages: for message in messages:
if message and not message.closed and message.email and message.email != "": if message and not message.closed and message.email and message.email != "":
@ -1427,6 +1431,13 @@ class Event(models.Model):
closed=False, closed=False,
message=_('the title has not been imported correctly.'), message=_('the title has not been imported correctly.'),
message_type=Message.TYPE.WARNING)) message_type=Message.TYPE.WARNING))
if w == Extractor.Warning.NOT_FOUND:
result.status = Event.STATUS.DRAFT
result.set_is_not_found_import()
result.add_message(Message(subject=_('warning'),
closed=False,
message=_('The import was unable to find an event in the page.'),
message_type=Message.TYPE.WARNING))
return result return result
@ -1529,10 +1540,16 @@ class Event(models.Model):
def get_organisers(self): def get_organisers(self):
if self.pk: if self.pk:
return self.organisers.all() if self.organisers is None:
return []
else:
return self.organisers.all()
else: else:
if self.has_pending_organisers(): if self.has_pending_organisers():
return self.pending_organisers if self.pending_organisers is None:
return []
else:
return self.pending_organisers
else: else:
return [] return []
@ -1647,7 +1664,7 @@ class Event(models.Model):
def update(self, other, all): def update(self, other, all):
# integrate pending organisers # integrate pending organisers
if other.has_pending_organisers(): if other.has_pending_organisers() and not other.pending_organisers is None:
self.organisers.set(other.pending_organisers) self.organisers.set(other.pending_organisers)
logger.warning("process update " + other.title + ' ' + str(other.has_invalid_start_date())) logger.warning("process update " + other.title + ' ' + str(other.has_invalid_start_date()))
@ -1737,20 +1754,31 @@ class Event(models.Model):
same_imported.other_versions.representative = None same_imported.other_versions.representative = None
same_imported.other_versions.save() same_imported.other_versions.save()
# add a message to explain the update # add a message to explain the update
res = [r for r in Event.get_comparison([event, same_imported], all) if not r["similar"]] if not event.is_not_found_import():
if len(res) > 0: res = [r for r in Event.get_comparison([event, same_imported], all) if not r["similar"]]
txt = _("Updated field(s): ") + ", ".join([r["key"] for r in res]) if len(res) > 0:
msg = Message(subject=_('Update'), txt = _("Updated field(s): ") + ", ".join([r["key"] for r in res])
name=_('update process'), msg = Message(subject=_('Update'),
related_event=same_imported, name=_('update process'),
message=txt, related_event=same_imported,
message_type=Message.TYPE.UPDATE_PROCESS) message=txt,
msg.save() message_type=Message.TYPE.UPDATE_PROCESS)
msg.save()
# we only update local information if it's a pure import and has no moderated_date
new_image = same_imported.image != event.image new_image = same_imported.image != event.image
same_imported.update(event, pure and same_imported.moderated_date is None)
# if the import process was not able to found any content, change the status as draft
if event.is_not_found_import():
if same_imported.status == Event.STATUS.PUBLISHED:
same_imported.status = Event.STATUS.TRASH
else:
# we only update local information if it's a pure import and has no moderated_date
same_imported.update(event, pure and same_imported.moderated_date is None)
# save messages
if event.has_message():
for msg in event.get_messages():
msg.related_event = same_imported
msg.save()
same_imported.set_in_importation_process() same_imported.set_in_importation_process()
same_imported.prepare_save() same_imported.prepare_save()
# fix missing or updated files # fix missing or updated files

View File

@ -30,6 +30,9 @@
<em>url</em> <em>url</em>
{% endif %} {% endif %}
</a> </a>
{% if obj.event_id %}
(<a href="{% url 'edit_event_pk' obj.event_id %}">événement</a>)
{% endif %}
{% endif %} {% endif %}
{% endif %} </td> {% endif %} </td>
<td><span{% if obj.status == "failed" %} data-tooltip="{{ obj.error_message }}"{% endif %}>{{ obj.status }}</span></td> <td><span{% if obj.status == "failed" %} data-tooltip="{{ obj.error_message }}"{% endif %}>{{ obj.status }}</span></td>

View File

@ -8,8 +8,6 @@
{% css_categories %} {% css_categories %}
{% endblock %} {% endblock %}
{% block ajouter-bouton %}{% block ajouter-menu %}{% endblock %}{% endblock %}
{% block sidemenu-bouton %} {% block sidemenu-bouton %}
<li><a href="#contenu-principal" aria-label="Aller au contenu">{% picto_from_name "chevron-up" %}</a></li> <li><a href="#contenu-principal" aria-label="Aller au contenu">{% picto_from_name "chevron-up" %}</a></li>
<li><a href="#sidebar" aria-label="Aller au menu latéral">{% picto_from_name "chevron-down" %}</a></li> <li><a href="#sidebar" aria-label="Aller au menu latéral">{% picto_from_name "chevron-down" %}</a></li>
@ -19,10 +17,14 @@
<div class="grid two-columns"> <div class="grid two-columns">
<article> <article>
<header> <header>
<a class="slide-buttons" href="{% url 'add_import'%}" role="button">Import manuel</a> <div class="slide-buttons">
<a href="{% url 'add_import'%}" role="button">Import manuel</a>
<a href="{% url 'update_orphan_events'%}" role="button">Mettre à jour les singletons {% picto_from_name "play-circle" %}</a>
</div>
<h1>Importations par lot</h1> <h1>Importations par lot</h1>
</header> </header>
<p>Il y a actuellement {{ nb_in_orphan_import }} événements singletons, c'est-à-dire importés depuis une source mais non inclus dans un import récurrent.</p>
{% include "agenda_culturel/batch-imports-inc.html" with objects=paginator_filter %} {% include "agenda_culturel/batch-imports-inc.html" with objects=paginator_filter %}
<footer> <footer>

View File

@ -0,0 +1,27 @@
{% extends "agenda_culturel/page-admin.html" %}
{% block fluid %}{% endblock %}
{% block content %}
<article>
<header>
<h1>{% block title %}{% block og_title %}Mettre à jour les événements singletons{% endblock %}{% endblock %}</h1>
</header>
<form method="post">{% csrf_token %}
<p>Il y a actuellement {{ nb_in_orphan_import }} événements singletons, c'est-à-dire importés depuis une source mais non inclus dans un import récurrent.
Souhaitez-vous les mettre à jour&nbsp;?
</p>
{{ form }}
<footer>
<div class="grid buttons">
<a href="{{ cancel_url }}" role="button" class="secondary">Annuler</a>
<input type="submit" value="Confirmer">
</div>
</footer>
</form>
</article>
{% endblock %}

View File

@ -78,6 +78,7 @@ urlpatterns = [
EventDetailView.as_view(), EventDetailView.as_view(),
name="view_event", name="view_event",
), ),
path("event/<int:pk>/", EventDetailView.as_view(), name="edit_event_pk"),
path("event/<int:pk>/edit", EventUpdateView.as_view(), name="edit_event"), path("event/<int:pk>/edit", EventUpdateView.as_view(), name="edit_event"),
path("event/<int:pk>/moderate", EventModerateView.as_view(), name="moderate_event"), path("event/<int:pk>/moderate", EventModerateView.as_view(), name="moderate_event"),
path("event/<int:pk>/moderate/after/<int:pred>", EventModerateView.as_view(), name="moderate_event_step"), path("event/<int:pk>/moderate/after/<int:pred>", EventModerateView.as_view(), name="moderate_event_step"),
@ -136,6 +137,7 @@ urlpatterns = [
), ),
path("imports/", imports, name="imports"), path("imports/", imports, name="imports"),
path("imports/add", add_import, name="add_import"), path("imports/add", add_import, name="add_import"),
path("imports/orphans/run", update_orphan_events, name="update_orphan_events"),
path("imports/<int:pk>/cancel", cancel_import, name="cancel_import"), path("imports/<int:pk>/cancel", cancel_import, name="cancel_import"),
path("rimports/", recurrent_imports, name="recurrent_imports"), path("rimports/", recurrent_imports, name="recurrent_imports"),
path("rimports/run", run_all_rimports, name="run_all_rimports"), path("rimports/run", run_all_rimports, name="run_all_rimports"),

View File

@ -78,7 +78,7 @@ from django.utils import timezone
from django.utils.html import escape from django.utils.html import escape
from datetime import date, timedelta from datetime import date, timedelta
from django.utils.timezone import datetime from django.utils.timezone import datetime
from django.db.models import Q, Subquery, OuterRef, Count, F, Func, BooleanField, ExpressionWrapper from django.db.models import Q, Subquery, OuterRef, Count, F, Func, BooleanField, ExpressionWrapper, When
from django.urls import reverse_lazy from django.urls import reverse_lazy
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
@ -103,6 +103,7 @@ from .celery import (
run_all_recurrent_imports_canceled, run_all_recurrent_imports_canceled,
import_events_from_url, import_events_from_url,
import_events_from_urls, import_events_from_urls,
update_orphan_pure_import_events,
) )
import urllib import urllib
@ -551,12 +552,15 @@ class EventDetailView(UserPassesTestMixin, DetailView, ModelFormMixin):
def get_object(self): def get_object(self):
o = super().get_object() o = super().get_object()
o.download_missing_image() o.download_missing_image()
y = self.kwargs["year"] if "year" in self.kwargs:
m = self.kwargs["month"] y = self.kwargs["year"]
d = self.kwargs["day"] m = self.kwargs["month"]
obj = o.get_recurrence_at_date(y, m, d) d = self.kwargs["day"]
obj.set_current_date(date(y, m, d)) obj = o.get_recurrence_at_date(y, m, d)
return obj obj.set_current_date(date(y, m, d))
return obj
else:
return o
def get_success_url(self): def get_success_url(self):
return self.get_object().get_absolute_url() + "#chronology" return self.get_object().get_absolute_url() + "#chronology"
@ -1213,9 +1217,21 @@ def event_search_full(request):
@login_required(login_url="/accounts/login/") @login_required(login_url="/accounts/login/")
@permission_required("agenda_culturel.view_batchimportation") @permission_required("agenda_culturel.view_batchimportation")
def imports(request): def imports(request):
paginator = Paginator(BatchImportation.objects.all().order_by("-created_date"), 30) rel_event = Event.objects.filter(import_sources__contains=[OuterRef('url_source')]).values("pk")[:1]
paginator = Paginator(BatchImportation.objects.all().order_by("-created_date").annotate(event_id=Subquery(rel_event)),
30)
page = request.GET.get("page") page = request.GET.get("page")
today = date.today()
srcs = RecurrentImport.objects.all().values_list("source")
in_future = Event.objects.filter(Q(start_day__gte=today))
nb_in_orphan_import = in_future.filter(
(Q(import_sources__isnull=False) &
(Q(modified_date__isnull=True) |
Q(modified_date__lte=F('imported_date'))))
& ~Q(import_sources__overlap=srcs)).count()
try: try:
response = paginator.page(page) response = paginator.page(page)
except PageNotAnInteger: except PageNotAnInteger:
@ -1224,7 +1240,7 @@ def imports(request):
response = paginator.page(paginator.num_pages) response = paginator.page(paginator.num_pages)
return render( return render(
request, "agenda_culturel/imports.html", {"paginator_filter": response} request, "agenda_culturel/imports.html", {"paginator_filter": response, "nb_in_orphan_import": nb_in_orphan_import}
) )
@ -1270,6 +1286,31 @@ def cancel_import(request, pk):
{"object": import_process, "cancel_url": cancel_url}, {"object": import_process, "cancel_url": cancel_url},
) )
@login_required(login_url="/accounts/login/")
@permission_required(
["agenda_culturel.view_batchimportation", "agenda_culturel.run_batchimportation"]
)
def update_orphan_events(request):
if request.method == "POST":
# run recurrent import
update_orphan_pure_import_events.delay()
messages.success(request, _("The orphan event update has been launched."))
return HttpResponseRedirect(reverse_lazy("imports"))
else:
today = date.today()
srcs = RecurrentImport.objects.all().values_list("source")
in_future = Event.objects.filter(Q(start_day__gte=today))
nb_in_orphan_import = in_future.filter(
(Q(import_sources__isnull=False) &
(Q(modified_date__isnull=True) |
Q(modified_date__lte=F('imported_date'))))
& ~Q(import_sources__overlap=srcs)).count()
return render(
request, "agenda_culturel/run_orphan_imports_confirm.html", {"nb_in_orphan_import": nb_in_orphan_import}
)
######################### #########################
## recurrent importations ## recurrent importations