Ajout d'un import des événements orphelins
This commit is contained in:
parent
d75fe8b05e
commit
fc579998ca
@ -324,11 +324,18 @@ def weekly_imports(self):
|
||||
run_recurrent_imports_from_list([imp.pk for imp in imports])
|
||||
|
||||
@app.task(base=ChromiumTask, bind=True)
|
||||
def import_events_from_url(self, url, cat, tags, force=False, user_id=None, email=None, comments=None):
|
||||
def import_events_from_url(self, urls, cat=None, tags=None, force=False, user_id=None, email=None, comments=None):
|
||||
from .db_importer import DBImporterEvents
|
||||
from agenda_culturel.models import RecurrentImport, BatchImportation
|
||||
from agenda_culturel.models import Event, Category
|
||||
|
||||
if isinstance(urls, list):
|
||||
url = urls[0]
|
||||
is_list = True
|
||||
else:
|
||||
is_list = False
|
||||
url = urls
|
||||
|
||||
with memcache_chromium_lock(self.app.oid) as acquired:
|
||||
if acquired:
|
||||
|
||||
@ -386,7 +393,7 @@ def import_events_from_url(self, url, cat, tags, force=False, user_id=None, emai
|
||||
logger.error(e)
|
||||
close_import_task(self.request.id, False, e, importer)
|
||||
|
||||
return
|
||||
return urls[1:] if is_list else True
|
||||
|
||||
# if chromium is locked, we wait 30 seconds before retrying
|
||||
raise self.retry(countdown=30)
|
||||
@ -403,7 +410,36 @@ def import_events_from_urls(self, urls_cat_tags, user_id=None, email=None, comme
|
||||
import_events_from_url.delay(url, cat, tags, user_id=user_id, email=email, comments=comments)
|
||||
|
||||
|
||||
@app.task(base=ChromiumTask, bind=True)
|
||||
def update_orphan_pure_import_events(self):
|
||||
from agenda_culturel.models import RecurrentImport
|
||||
from agenda_culturel.models import Event
|
||||
from django.db.models import Q, F
|
||||
|
||||
# get all recurrent sources
|
||||
srcs = RecurrentImport.objects.all().values_list("source")
|
||||
today = date.today()
|
||||
# get all events in future with a source and not related to a recurrent import
|
||||
urls = Event.objects.filter(Q(start_day__gte=today)).filter(
|
||||
(Q(import_sources__isnull=False) &
|
||||
(Q(modified_date__isnull=True) |
|
||||
Q(modified_date__lte=F('imported_date'))))
|
||||
& ~Q(import_sources__overlap=srcs)).values_list("import_sources", flat=True)
|
||||
# get urls
|
||||
urls = [url_l[0] for url_l in urls if len(url_l) > 0]
|
||||
|
||||
# run tasks as a chain
|
||||
tasks = chain(import_events_from_url.s(urls, force=True) if i == 0 else import_events_from_url.s(force=True) for i in range(len(urls)))
|
||||
tasks.delay()
|
||||
|
||||
|
||||
|
||||
app.conf.beat_schedule = {
|
||||
"daily_orphans_update": {
|
||||
"task": "agenda_culturel.celery.update_orphan_pure_import_events",
|
||||
# Daily imports at 3:14 a.m.
|
||||
"schedule": crontab(hour=2, minute=22),
|
||||
},
|
||||
"daily_imports": {
|
||||
"task": "agenda_culturel.celery.daily_imports",
|
||||
# Daily imports at 3:14 a.m.
|
||||
|
@ -11,6 +11,7 @@ class Extractor(ABC):
|
||||
class Warning(IntEnum):
|
||||
NO_TITLE = 1
|
||||
NO_START_DATE = 2
|
||||
NOT_FOUND = 3
|
||||
|
||||
url_referer=None
|
||||
|
||||
@ -204,6 +205,7 @@ class Extractor(ABC):
|
||||
published=False,
|
||||
image=None,
|
||||
image_alt=None,
|
||||
not_found=False
|
||||
):
|
||||
comments = ''
|
||||
warnings = []
|
||||
@ -217,6 +219,8 @@ class Extractor(ABC):
|
||||
published = False
|
||||
start_day = datetime.now().date().strftime("%Y-%m-%d")
|
||||
warnings.append(Extractor.Warning.NO_START_DATE)
|
||||
if not_found:
|
||||
warnings.append(Extractor.Warning.NOT_FOUND)
|
||||
|
||||
tags_default = self.default_value_if_exists(default_values, "tags")
|
||||
if not tags_default:
|
||||
@ -306,7 +310,7 @@ class EventNotFoundExtractor(Extractor):
|
||||
self.add_event(default_values, "événement sans titre depuis " + url,
|
||||
None, timezone.now().date(), None,
|
||||
"l'import a échoué, la saisie doit se faire manuellement à partir de l'url source " + url,
|
||||
[], [url], published=False, url_human=url)
|
||||
[], [url], published=False, url_human=url, not_found=True)
|
||||
|
||||
return self.get_structure()
|
||||
|
||||
|
@ -252,7 +252,7 @@ class FacebookEventExtractor(Extractor):
|
||||
|
||||
|
||||
def clean_url(url):
|
||||
if FacebookEventExtractor.is_known_url(url):
|
||||
if FacebookEventExtractor.is_known_url(url, False):
|
||||
u = urlparse(url)
|
||||
result = "https://www.facebook.com" + u.path
|
||||
|
||||
@ -269,9 +269,12 @@ class FacebookEventExtractor(Extractor):
|
||||
return url
|
||||
|
||||
|
||||
def is_known_url(url):
|
||||
def is_known_url(url, include_links=True):
|
||||
u = urlparse(url)
|
||||
return u.netloc in ["facebook.com", "www.facebook.com", "m.facebook.com"]
|
||||
url_list = ["facebook.com", "www.facebook.com", "m.facebook.com"]
|
||||
if include_links:
|
||||
url_list.append("fb.me")
|
||||
return u.netloc in url_list
|
||||
|
||||
def extract(
|
||||
self, content, url, url_human=None, default_values=None, published=False
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1032,6 +1032,12 @@ class Event(models.Model):
|
||||
def has_pending_organisers(self):
|
||||
return hasattr(self, "pending_organisers")
|
||||
|
||||
def set_is_not_found_import(self):
|
||||
self.not_found_import = True
|
||||
|
||||
def is_not_found_import(self):
|
||||
return hasattr(self, "not_found_import")
|
||||
|
||||
def set_skip_duplicate_check(self):
|
||||
self.skip_duplicate_check = True
|
||||
|
||||
@ -1235,8 +1241,6 @@ class Event(models.Model):
|
||||
notif = False
|
||||
if self.status != Event.STATUS.DRAFT:
|
||||
messages = self.get_contributor_message()
|
||||
logger.warning("messages: ")
|
||||
logger.warning(messages)
|
||||
if messages:
|
||||
for message in messages:
|
||||
if message and not message.closed and message.email and message.email != "":
|
||||
@ -1427,6 +1431,13 @@ class Event(models.Model):
|
||||
closed=False,
|
||||
message=_('the title has not been imported correctly.'),
|
||||
message_type=Message.TYPE.WARNING))
|
||||
if w == Extractor.Warning.NOT_FOUND:
|
||||
result.status = Event.STATUS.DRAFT
|
||||
result.set_is_not_found_import()
|
||||
result.add_message(Message(subject=_('warning'),
|
||||
closed=False,
|
||||
message=_('The import was unable to find an event in the page.'),
|
||||
message_type=Message.TYPE.WARNING))
|
||||
|
||||
return result
|
||||
|
||||
@ -1529,9 +1540,15 @@ class Event(models.Model):
|
||||
|
||||
def get_organisers(self):
|
||||
if self.pk:
|
||||
if self.organisers is None:
|
||||
return []
|
||||
else:
|
||||
return self.organisers.all()
|
||||
else:
|
||||
if self.has_pending_organisers():
|
||||
if self.pending_organisers is None:
|
||||
return []
|
||||
else:
|
||||
return self.pending_organisers
|
||||
else:
|
||||
return []
|
||||
@ -1647,7 +1664,7 @@ class Event(models.Model):
|
||||
def update(self, other, all):
|
||||
|
||||
# integrate pending organisers
|
||||
if other.has_pending_organisers():
|
||||
if other.has_pending_organisers() and not other.pending_organisers is None:
|
||||
self.organisers.set(other.pending_organisers)
|
||||
|
||||
logger.warning("process update " + other.title + ' ' + str(other.has_invalid_start_date()))
|
||||
@ -1737,6 +1754,7 @@ class Event(models.Model):
|
||||
same_imported.other_versions.representative = None
|
||||
same_imported.other_versions.save()
|
||||
# add a message to explain the update
|
||||
if not event.is_not_found_import():
|
||||
res = [r for r in Event.get_comparison([event, same_imported], all) if not r["similar"]]
|
||||
if len(res) > 0:
|
||||
txt = _("Updated field(s): ") + ", ".join([r["key"] for r in res])
|
||||
@ -1747,10 +1765,20 @@ class Event(models.Model):
|
||||
message_type=Message.TYPE.UPDATE_PROCESS)
|
||||
msg.save()
|
||||
|
||||
|
||||
# we only update local information if it's a pure import and has no moderated_date
|
||||
new_image = same_imported.image != event.image
|
||||
|
||||
# if the import process was not able to found any content, change the status as draft
|
||||
if event.is_not_found_import():
|
||||
if same_imported.status == Event.STATUS.PUBLISHED:
|
||||
same_imported.status = Event.STATUS.TRASH
|
||||
else:
|
||||
# we only update local information if it's a pure import and has no moderated_date
|
||||
same_imported.update(event, pure and same_imported.moderated_date is None)
|
||||
# save messages
|
||||
if event.has_message():
|
||||
for msg in event.get_messages():
|
||||
msg.related_event = same_imported
|
||||
msg.save()
|
||||
same_imported.set_in_importation_process()
|
||||
same_imported.prepare_save()
|
||||
# fix missing or updated files
|
||||
|
@ -30,6 +30,9 @@
|
||||
<em>url</em>
|
||||
{% endif %}
|
||||
</a>
|
||||
{% if obj.event_id %}
|
||||
(<a href="{% url 'edit_event_pk' obj.event_id %}">événement</a>)
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% endif %} </td>
|
||||
<td><span{% if obj.status == "failed" %} data-tooltip="{{ obj.error_message }}"{% endif %}>{{ obj.status }}</span></td>
|
||||
|
@ -8,8 +8,6 @@
|
||||
{% css_categories %}
|
||||
{% endblock %}
|
||||
|
||||
{% block ajouter-bouton %}{% block ajouter-menu %}{% endblock %}{% endblock %}
|
||||
|
||||
{% block sidemenu-bouton %}
|
||||
<li><a href="#contenu-principal" aria-label="Aller au contenu">{% picto_from_name "chevron-up" %}</a></li>
|
||||
<li><a href="#sidebar" aria-label="Aller au menu latéral">{% picto_from_name "chevron-down" %}</a></li>
|
||||
@ -19,10 +17,14 @@
|
||||
<div class="grid two-columns">
|
||||
<article>
|
||||
<header>
|
||||
<a class="slide-buttons" href="{% url 'add_import'%}" role="button">Import manuel</a>
|
||||
<div class="slide-buttons">
|
||||
<a href="{% url 'add_import'%}" role="button">Import manuel</a>
|
||||
<a href="{% url 'update_orphan_events'%}" role="button">Mettre à jour les singletons {% picto_from_name "play-circle" %}</a>
|
||||
</div>
|
||||
<h1>Importations par lot</h1>
|
||||
</header>
|
||||
|
||||
<p>Il y a actuellement {{ nb_in_orphan_import }} événements singletons, c'est-à-dire importés depuis une source mais non inclus dans un import récurrent.</p>
|
||||
{% include "agenda_culturel/batch-imports-inc.html" with objects=paginator_filter %}
|
||||
|
||||
<footer>
|
||||
|
@ -0,0 +1,27 @@
|
||||
{% extends "agenda_culturel/page-admin.html" %}
|
||||
|
||||
|
||||
{% block fluid %}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
|
||||
<article>
|
||||
<header>
|
||||
<h1>{% block title %}{% block og_title %}Mettre à jour les événements singletons{% endblock %}{% endblock %}</h1>
|
||||
</header>
|
||||
<form method="post">{% csrf_token %}
|
||||
<p>Il y a actuellement {{ nb_in_orphan_import }} événements singletons, c'est-à-dire importés depuis une source mais non inclus dans un import récurrent.
|
||||
Souhaitez-vous les mettre à jour ?
|
||||
</p>
|
||||
{{ form }}
|
||||
|
||||
<footer>
|
||||
<div class="grid buttons">
|
||||
<a href="{{ cancel_url }}" role="button" class="secondary">Annuler</a>
|
||||
<input type="submit" value="Confirmer">
|
||||
</div>
|
||||
</footer>
|
||||
</form>
|
||||
</article>
|
||||
|
||||
{% endblock %}
|
@ -78,6 +78,7 @@ urlpatterns = [
|
||||
EventDetailView.as_view(),
|
||||
name="view_event",
|
||||
),
|
||||
path("event/<int:pk>/", EventDetailView.as_view(), name="edit_event_pk"),
|
||||
path("event/<int:pk>/edit", EventUpdateView.as_view(), name="edit_event"),
|
||||
path("event/<int:pk>/moderate", EventModerateView.as_view(), name="moderate_event"),
|
||||
path("event/<int:pk>/moderate/after/<int:pred>", EventModerateView.as_view(), name="moderate_event_step"),
|
||||
@ -136,6 +137,7 @@ urlpatterns = [
|
||||
),
|
||||
path("imports/", imports, name="imports"),
|
||||
path("imports/add", add_import, name="add_import"),
|
||||
path("imports/orphans/run", update_orphan_events, name="update_orphan_events"),
|
||||
path("imports/<int:pk>/cancel", cancel_import, name="cancel_import"),
|
||||
path("rimports/", recurrent_imports, name="recurrent_imports"),
|
||||
path("rimports/run", run_all_rimports, name="run_all_rimports"),
|
||||
|
@ -78,7 +78,7 @@ from django.utils import timezone
|
||||
from django.utils.html import escape
|
||||
from datetime import date, timedelta
|
||||
from django.utils.timezone import datetime
|
||||
from django.db.models import Q, Subquery, OuterRef, Count, F, Func, BooleanField, ExpressionWrapper
|
||||
from django.db.models import Q, Subquery, OuterRef, Count, F, Func, BooleanField, ExpressionWrapper, When
|
||||
|
||||
from django.urls import reverse_lazy
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
@ -103,6 +103,7 @@ from .celery import (
|
||||
run_all_recurrent_imports_canceled,
|
||||
import_events_from_url,
|
||||
import_events_from_urls,
|
||||
update_orphan_pure_import_events,
|
||||
)
|
||||
|
||||
import urllib
|
||||
@ -551,12 +552,15 @@ class EventDetailView(UserPassesTestMixin, DetailView, ModelFormMixin):
|
||||
def get_object(self):
|
||||
o = super().get_object()
|
||||
o.download_missing_image()
|
||||
if "year" in self.kwargs:
|
||||
y = self.kwargs["year"]
|
||||
m = self.kwargs["month"]
|
||||
d = self.kwargs["day"]
|
||||
obj = o.get_recurrence_at_date(y, m, d)
|
||||
obj.set_current_date(date(y, m, d))
|
||||
return obj
|
||||
else:
|
||||
return o
|
||||
|
||||
def get_success_url(self):
|
||||
return self.get_object().get_absolute_url() + "#chronology"
|
||||
@ -1213,9 +1217,21 @@ def event_search_full(request):
|
||||
@login_required(login_url="/accounts/login/")
|
||||
@permission_required("agenda_culturel.view_batchimportation")
|
||||
def imports(request):
|
||||
paginator = Paginator(BatchImportation.objects.all().order_by("-created_date"), 30)
|
||||
rel_event = Event.objects.filter(import_sources__contains=[OuterRef('url_source')]).values("pk")[:1]
|
||||
paginator = Paginator(BatchImportation.objects.all().order_by("-created_date").annotate(event_id=Subquery(rel_event)),
|
||||
30)
|
||||
page = request.GET.get("page")
|
||||
|
||||
today = date.today()
|
||||
|
||||
srcs = RecurrentImport.objects.all().values_list("source")
|
||||
in_future = Event.objects.filter(Q(start_day__gte=today))
|
||||
nb_in_orphan_import = in_future.filter(
|
||||
(Q(import_sources__isnull=False) &
|
||||
(Q(modified_date__isnull=True) |
|
||||
Q(modified_date__lte=F('imported_date'))))
|
||||
& ~Q(import_sources__overlap=srcs)).count()
|
||||
|
||||
try:
|
||||
response = paginator.page(page)
|
||||
except PageNotAnInteger:
|
||||
@ -1224,7 +1240,7 @@ def imports(request):
|
||||
response = paginator.page(paginator.num_pages)
|
||||
|
||||
return render(
|
||||
request, "agenda_culturel/imports.html", {"paginator_filter": response}
|
||||
request, "agenda_culturel/imports.html", {"paginator_filter": response, "nb_in_orphan_import": nb_in_orphan_import}
|
||||
)
|
||||
|
||||
|
||||
@ -1270,6 +1286,31 @@ def cancel_import(request, pk):
|
||||
{"object": import_process, "cancel_url": cancel_url},
|
||||
)
|
||||
|
||||
@login_required(login_url="/accounts/login/")
|
||||
@permission_required(
|
||||
["agenda_culturel.view_batchimportation", "agenda_culturel.run_batchimportation"]
|
||||
)
|
||||
def update_orphan_events(request):
|
||||
if request.method == "POST":
|
||||
# run recurrent import
|
||||
update_orphan_pure_import_events.delay()
|
||||
|
||||
messages.success(request, _("The orphan event update has been launched."))
|
||||
return HttpResponseRedirect(reverse_lazy("imports"))
|
||||
else:
|
||||
|
||||
today = date.today()
|
||||
|
||||
srcs = RecurrentImport.objects.all().values_list("source")
|
||||
in_future = Event.objects.filter(Q(start_day__gte=today))
|
||||
nb_in_orphan_import = in_future.filter(
|
||||
(Q(import_sources__isnull=False) &
|
||||
(Q(modified_date__isnull=True) |
|
||||
Q(modified_date__lte=F('imported_date'))))
|
||||
& ~Q(import_sources__overlap=srcs)).count()
|
||||
return render(
|
||||
request, "agenda_culturel/run_orphan_imports_confirm.html", {"nb_in_orphan_import": nb_in_orphan_import}
|
||||
)
|
||||
|
||||
#########################
|
||||
## recurrent importations
|
||||
|
Loading…
x
Reference in New Issue
Block a user