Ajout d'un import des événements orphelins

This commit is contained in:
Jean-Marie Favreau 2025-02-09 14:42:15 +01:00
parent d75fe8b05e
commit fc579998ca
10 changed files with 383 additions and 224 deletions

View File

@ -324,11 +324,18 @@ def weekly_imports(self):
run_recurrent_imports_from_list([imp.pk for imp in imports])
@app.task(base=ChromiumTask, bind=True)
def import_events_from_url(self, url, cat, tags, force=False, user_id=None, email=None, comments=None):
def import_events_from_url(self, urls, cat=None, tags=None, force=False, user_id=None, email=None, comments=None):
from .db_importer import DBImporterEvents
from agenda_culturel.models import RecurrentImport, BatchImportation
from agenda_culturel.models import Event, Category
if isinstance(urls, list):
url = urls[0]
is_list = True
else:
is_list = False
url = urls
with memcache_chromium_lock(self.app.oid) as acquired:
if acquired:
@ -386,7 +393,7 @@ def import_events_from_url(self, url, cat, tags, force=False, user_id=None, emai
logger.error(e)
close_import_task(self.request.id, False, e, importer)
return
return urls[1:] if is_list else True
# if chromium is locked, we wait 30 seconds before retrying
raise self.retry(countdown=30)
@ -403,7 +410,36 @@ def import_events_from_urls(self, urls_cat_tags, user_id=None, email=None, comme
import_events_from_url.delay(url, cat, tags, user_id=user_id, email=email, comments=comments)
@app.task(base=ChromiumTask, bind=True)
def update_orphan_pure_import_events(self):
from agenda_culturel.models import RecurrentImport
from agenda_culturel.models import Event
from django.db.models import Q, F
# get all recurrent sources
srcs = RecurrentImport.objects.all().values_list("source")
today = date.today()
# get all events in future with a source and not related to a recurrent import
urls = Event.objects.filter(Q(start_day__gte=today)).filter(
(Q(import_sources__isnull=False) &
(Q(modified_date__isnull=True) |
Q(modified_date__lte=F('imported_date'))))
& ~Q(import_sources__overlap=srcs)).values_list("import_sources", flat=True)
# get urls
urls = [url_l[0] for url_l in urls if len(url_l) > 0]
# run tasks as a chain
tasks = chain(import_events_from_url.s(urls, force=True) if i == 0 else import_events_from_url.s(force=True) for i in range(len(urls)))
tasks.delay()
app.conf.beat_schedule = {
"daily_orphans_update": {
"task": "agenda_culturel.celery.update_orphan_pure_import_events",
# Daily imports at 3:14 a.m.
"schedule": crontab(hour=2, minute=22),
},
"daily_imports": {
"task": "agenda_culturel.celery.daily_imports",
# Daily imports at 3:14 a.m.

View File

@ -11,6 +11,7 @@ class Extractor(ABC):
class Warning(IntEnum):
NO_TITLE = 1
NO_START_DATE = 2
NOT_FOUND = 3
url_referer=None
@ -204,6 +205,7 @@ class Extractor(ABC):
published=False,
image=None,
image_alt=None,
not_found=False
):
comments = ''
warnings = []
@ -217,6 +219,8 @@ class Extractor(ABC):
published = False
start_day = datetime.now().date().strftime("%Y-%m-%d")
warnings.append(Extractor.Warning.NO_START_DATE)
if not_found:
warnings.append(Extractor.Warning.NOT_FOUND)
tags_default = self.default_value_if_exists(default_values, "tags")
if not tags_default:
@ -306,7 +310,7 @@ class EventNotFoundExtractor(Extractor):
self.add_event(default_values, "événement sans titre depuis " + url,
None, timezone.now().date(), None,
"l'import a échoué, la saisie doit se faire manuellement à partir de l'url source " + url,
[], [url], published=False, url_human=url)
[], [url], published=False, url_human=url, not_found=True)
return self.get_structure()

View File

@ -252,7 +252,7 @@ class FacebookEventExtractor(Extractor):
def clean_url(url):
if FacebookEventExtractor.is_known_url(url):
if FacebookEventExtractor.is_known_url(url, False):
u = urlparse(url)
result = "https://www.facebook.com" + u.path
@ -269,9 +269,12 @@ class FacebookEventExtractor(Extractor):
return url
def is_known_url(url):
def is_known_url(url, include_links=True):
u = urlparse(url)
return u.netloc in ["facebook.com", "www.facebook.com", "m.facebook.com"]
url_list = ["facebook.com", "www.facebook.com", "m.facebook.com"]
if include_links:
url_list.append("fb.me")
return u.netloc in url_list
def extract(
self, content, url, url_human=None, default_values=None, published=False

File diff suppressed because it is too large Load Diff

View File

@ -1032,6 +1032,12 @@ class Event(models.Model):
def has_pending_organisers(self):
return hasattr(self, "pending_organisers")
def set_is_not_found_import(self):
self.not_found_import = True
def is_not_found_import(self):
return hasattr(self, "not_found_import")
def set_skip_duplicate_check(self):
self.skip_duplicate_check = True
@ -1235,8 +1241,6 @@ class Event(models.Model):
notif = False
if self.status != Event.STATUS.DRAFT:
messages = self.get_contributor_message()
logger.warning("messages: ")
logger.warning(messages)
if messages:
for message in messages:
if message and not message.closed and message.email and message.email != "":
@ -1427,6 +1431,13 @@ class Event(models.Model):
closed=False,
message=_('the title has not been imported correctly.'),
message_type=Message.TYPE.WARNING))
if w == Extractor.Warning.NOT_FOUND:
result.status = Event.STATUS.DRAFT
result.set_is_not_found_import()
result.add_message(Message(subject=_('warning'),
closed=False,
message=_('The import was unable to find an event in the page.'),
message_type=Message.TYPE.WARNING))
return result
@ -1529,9 +1540,15 @@ class Event(models.Model):
def get_organisers(self):
if self.pk:
if self.organisers is None:
return []
else:
return self.organisers.all()
else:
if self.has_pending_organisers():
if self.pending_organisers is None:
return []
else:
return self.pending_organisers
else:
return []
@ -1647,7 +1664,7 @@ class Event(models.Model):
def update(self, other, all):
# integrate pending organisers
if other.has_pending_organisers():
if other.has_pending_organisers() and not other.pending_organisers is None:
self.organisers.set(other.pending_organisers)
logger.warning("process update " + other.title + ' ' + str(other.has_invalid_start_date()))
@ -1737,6 +1754,7 @@ class Event(models.Model):
same_imported.other_versions.representative = None
same_imported.other_versions.save()
# add a message to explain the update
if not event.is_not_found_import():
res = [r for r in Event.get_comparison([event, same_imported], all) if not r["similar"]]
if len(res) > 0:
txt = _("Updated field(s): ") + ", ".join([r["key"] for r in res])
@ -1747,10 +1765,20 @@ class Event(models.Model):
message_type=Message.TYPE.UPDATE_PROCESS)
msg.save()
# we only update local information if it's a pure import and has no moderated_date
new_image = same_imported.image != event.image
# if the import process was not able to found any content, change the status as draft
if event.is_not_found_import():
if same_imported.status == Event.STATUS.PUBLISHED:
same_imported.status = Event.STATUS.TRASH
else:
# we only update local information if it's a pure import and has no moderated_date
same_imported.update(event, pure and same_imported.moderated_date is None)
# save messages
if event.has_message():
for msg in event.get_messages():
msg.related_event = same_imported
msg.save()
same_imported.set_in_importation_process()
same_imported.prepare_save()
# fix missing or updated files

View File

@ -30,6 +30,9 @@
<em>url</em>
{% endif %}
</a>
{% if obj.event_id %}
(<a href="{% url 'edit_event_pk' obj.event_id %}">événement</a>)
{% endif %}
{% endif %}
{% endif %} </td>
<td><span{% if obj.status == "failed" %} data-tooltip="{{ obj.error_message }}"{% endif %}>{{ obj.status }}</span></td>

View File

@ -8,8 +8,6 @@
{% css_categories %}
{% endblock %}
{% block ajouter-bouton %}{% block ajouter-menu %}{% endblock %}{% endblock %}
{% block sidemenu-bouton %}
<li><a href="#contenu-principal" aria-label="Aller au contenu">{% picto_from_name "chevron-up" %}</a></li>
<li><a href="#sidebar" aria-label="Aller au menu latéral">{% picto_from_name "chevron-down" %}</a></li>
@ -19,10 +17,14 @@
<div class="grid two-columns">
<article>
<header>
<a class="slide-buttons" href="{% url 'add_import'%}" role="button">Import manuel</a>
<div class="slide-buttons">
<a href="{% url 'add_import'%}" role="button">Import manuel</a>
<a href="{% url 'update_orphan_events'%}" role="button">Mettre à jour les singletons {% picto_from_name "play-circle" %}</a>
</div>
<h1>Importations par lot</h1>
</header>
<p>Il y a actuellement {{ nb_in_orphan_import }} événements singletons, c'est-à-dire importés depuis une source mais non inclus dans un import récurrent.</p>
{% include "agenda_culturel/batch-imports-inc.html" with objects=paginator_filter %}
<footer>

View File

@ -0,0 +1,27 @@
{% extends "agenda_culturel/page-admin.html" %}
{% block fluid %}{% endblock %}
{% block content %}
<article>
<header>
<h1>{% block title %}{% block og_title %}Mettre à jour les événements singletons{% endblock %}{% endblock %}</h1>
</header>
<form method="post">{% csrf_token %}
<p>Il y a actuellement {{ nb_in_orphan_import }} événements singletons, c'est-à-dire importés depuis une source mais non inclus dans un import récurrent.
Souhaitez-vous les mettre à jour&nbsp;?
</p>
{{ form }}
<footer>
<div class="grid buttons">
<a href="{{ cancel_url }}" role="button" class="secondary">Annuler</a>
<input type="submit" value="Confirmer">
</div>
</footer>
</form>
</article>
{% endblock %}

View File

@ -78,6 +78,7 @@ urlpatterns = [
EventDetailView.as_view(),
name="view_event",
),
path("event/<int:pk>/", EventDetailView.as_view(), name="edit_event_pk"),
path("event/<int:pk>/edit", EventUpdateView.as_view(), name="edit_event"),
path("event/<int:pk>/moderate", EventModerateView.as_view(), name="moderate_event"),
path("event/<int:pk>/moderate/after/<int:pred>", EventModerateView.as_view(), name="moderate_event_step"),
@ -136,6 +137,7 @@ urlpatterns = [
),
path("imports/", imports, name="imports"),
path("imports/add", add_import, name="add_import"),
path("imports/orphans/run", update_orphan_events, name="update_orphan_events"),
path("imports/<int:pk>/cancel", cancel_import, name="cancel_import"),
path("rimports/", recurrent_imports, name="recurrent_imports"),
path("rimports/run", run_all_rimports, name="run_all_rimports"),

View File

@ -78,7 +78,7 @@ from django.utils import timezone
from django.utils.html import escape
from datetime import date, timedelta
from django.utils.timezone import datetime
from django.db.models import Q, Subquery, OuterRef, Count, F, Func, BooleanField, ExpressionWrapper
from django.db.models import Q, Subquery, OuterRef, Count, F, Func, BooleanField, ExpressionWrapper, When
from django.urls import reverse_lazy
from django.utils.translation import gettext_lazy as _
@ -103,6 +103,7 @@ from .celery import (
run_all_recurrent_imports_canceled,
import_events_from_url,
import_events_from_urls,
update_orphan_pure_import_events,
)
import urllib
@ -551,12 +552,15 @@ class EventDetailView(UserPassesTestMixin, DetailView, ModelFormMixin):
def get_object(self):
o = super().get_object()
o.download_missing_image()
if "year" in self.kwargs:
y = self.kwargs["year"]
m = self.kwargs["month"]
d = self.kwargs["day"]
obj = o.get_recurrence_at_date(y, m, d)
obj.set_current_date(date(y, m, d))
return obj
else:
return o
def get_success_url(self):
return self.get_object().get_absolute_url() + "#chronology"
@ -1213,9 +1217,21 @@ def event_search_full(request):
@login_required(login_url="/accounts/login/")
@permission_required("agenda_culturel.view_batchimportation")
def imports(request):
paginator = Paginator(BatchImportation.objects.all().order_by("-created_date"), 30)
rel_event = Event.objects.filter(import_sources__contains=[OuterRef('url_source')]).values("pk")[:1]
paginator = Paginator(BatchImportation.objects.all().order_by("-created_date").annotate(event_id=Subquery(rel_event)),
30)
page = request.GET.get("page")
today = date.today()
srcs = RecurrentImport.objects.all().values_list("source")
in_future = Event.objects.filter(Q(start_day__gte=today))
nb_in_orphan_import = in_future.filter(
(Q(import_sources__isnull=False) &
(Q(modified_date__isnull=True) |
Q(modified_date__lte=F('imported_date'))))
& ~Q(import_sources__overlap=srcs)).count()
try:
response = paginator.page(page)
except PageNotAnInteger:
@ -1224,7 +1240,7 @@ def imports(request):
response = paginator.page(paginator.num_pages)
return render(
request, "agenda_culturel/imports.html", {"paginator_filter": response}
request, "agenda_culturel/imports.html", {"paginator_filter": response, "nb_in_orphan_import": nb_in_orphan_import}
)
@ -1270,6 +1286,31 @@ def cancel_import(request, pk):
{"object": import_process, "cancel_url": cancel_url},
)
@login_required(login_url="/accounts/login/")
@permission_required(
["agenda_culturel.view_batchimportation", "agenda_culturel.run_batchimportation"]
)
def update_orphan_events(request):
if request.method == "POST":
# run recurrent import
update_orphan_pure_import_events.delay()
messages.success(request, _("The orphan event update has been launched."))
return HttpResponseRedirect(reverse_lazy("imports"))
else:
today = date.today()
srcs = RecurrentImport.objects.all().values_list("source")
in_future = Event.objects.filter(Q(start_day__gte=today))
nb_in_orphan_import = in_future.filter(
(Q(import_sources__isnull=False) &
(Q(modified_date__isnull=True) |
Q(modified_date__lte=F('imported_date'))))
& ~Q(import_sources__overlap=srcs)).count()
return render(
request, "agenda_culturel/run_orphan_imports_confirm.html", {"nb_in_orphan_import": nb_in_orphan_import}
)
#########################
## recurrent importations