From 8547a671c4476cc62e6eb2fe2f4c61270f80a299 Mon Sep 17 00:00:00 2001
From: Jean-Marie Favreau <j-marie.favreau@uca.fr>
Date: Sun, 2 Jun 2024 14:29:05 +0200
Subject: [PATCH] Support des imports plugin MEC de Wordpress

---
 experimentations/get_le_poulailler.py         | 43 ++++++++
 experimentations/get_les_vinzelles.py         | 43 ++++++++
 src/agenda_culturel/celery.py                 |  2 +
 .../custom_extractors/wordpress_mec.py        | 99 +++++++++++++++++++
 .../import_tasks/generic_extractors.py        | 21 +++-
 .../0064_alter_recurrentimport_processor.py   | 18 ++++
 src/agenda_culturel/models.py                 |  1 +
 7 files changed, 222 insertions(+), 5 deletions(-)
 create mode 100755 experimentations/get_le_poulailler.py
 create mode 100755 experimentations/get_les_vinzelles.py
 create mode 100644 src/agenda_culturel/import_tasks/custom_extractors/wordpress_mec.py
 create mode 100644 src/agenda_culturel/migrations/0064_alter_recurrentimport_processor.py

diff --git a/experimentations/get_le_poulailler.py b/experimentations/get_le_poulailler.py
new file mode 100755
index 0000000..e7fd697
--- /dev/null
+++ b/experimentations/get_le_poulailler.py
@@ -0,0 +1,43 @@
+#!/usr/bin/python3
+# coding: utf-8
+
+import os
+import json
+import sys
+
+# getting the name of the directory
+# where the this file is present.
+current = os.path.dirname(os.path.realpath(__file__))
+ 
+# Getting the parent directory name
+# where the current directory is present.
+parent = os.path.dirname(current)
+ 
+# adding the parent directory to 
+# the sys.path.
+sys.path.append(parent)
+
+from src.agenda_culturel.import_tasks.downloader import *
+from src.agenda_culturel.import_tasks.extractor import *
+from src.agenda_culturel.import_tasks.importer import *
+from src.agenda_culturel.import_tasks.custom_extractors import *
+
+
+
+
+
+if __name__ == "__main__":
+
+    u2e = URL2Events(SimpleDownloader(), wordpress_mec.CExtractor())
+    url = "https://www.cabaretlepoulailler.fr/agenda/tout-lagenda/"
+    url_human = "https://www.cabaretlepoulailler.fr/agenda/tout-lagenda/"
+
+    try:
+        events = u2e.process(url, url_human, cache = "cache-le-poulailler.html", default_values = {"location": "Le Poulailler"}, published = True)
+
+        exportfile = "events-le-poulailler.json"
+        print("Saving events to file {}".format(exportfile))
+        with open(exportfile, "w") as f:
+            json.dump(events, f, indent=4, default=str)
+    except Exception as e:
+        print("Exception: " + str(e))
diff --git a/experimentations/get_les_vinzelles.py b/experimentations/get_les_vinzelles.py
new file mode 100755
index 0000000..f7534e8
--- /dev/null
+++ b/experimentations/get_les_vinzelles.py
@@ -0,0 +1,43 @@
+#!/usr/bin/python3
+# coding: utf-8
+
+import os
+import json
+import sys
+
+# getting the name of the directory
+# where the this file is present.
+current = os.path.dirname(os.path.realpath(__file__))
+ 
+# Getting the parent directory name
+# where the current directory is present.
+parent = os.path.dirname(current)
+ 
+# adding the parent directory to 
+# the sys.path.
+sys.path.append(parent)
+
+from src.agenda_culturel.import_tasks.downloader import *
+from src.agenda_culturel.import_tasks.extractor import *
+from src.agenda_culturel.import_tasks.importer import *
+from src.agenda_culturel.import_tasks.custom_extractors import *
+
+
+
+
+
+if __name__ == "__main__":
+
+    u2e = URL2Events(SimpleDownloader(), wordpress_mec.CExtractor())
+    url = "https://www.lesvinzelles.com/index.php/programme/"
+    url_human = "https://www.lesvinzelles.com/index.php/programme/"
+
+    try:
+        events = u2e.process(url, url_human, cache = "cache-les-vinzelles.html", default_values = {"location": "Les Vinzelles"}, published = True)
+
+        exportfile = "events-les-vinzelles.json"
+        print("Saving events to file {}".format(exportfile))
+        with open(exportfile, "w") as f:
+            json.dump(events, f, indent=4, default=str)
+    except Exception as e:
+        print("Exception: " + str(e))
diff --git a/src/agenda_culturel/celery.py b/src/agenda_culturel/celery.py
index eec6dfa..09c0b8e 100644
--- a/src/agenda_culturel/celery.py
+++ b/src/agenda_culturel/celery.py
@@ -112,6 +112,8 @@ def run_recurrent_import(self, pk):
         extractor = lefotomat.CExtractor()
     elif rimport.processor == RecurrentImport.PROCESSOR.LAPUCEALOREILLE:
         extractor = lapucealoreille.CExtractor()
+    elif rimport.processor == RecurrentImport.PROCESSOR.MECWORDPRESS:
+        extractor = wordpress_mec.CExtractor()
     else:
         extractor = None
 
diff --git a/src/agenda_culturel/import_tasks/custom_extractors/wordpress_mec.py b/src/agenda_culturel/import_tasks/custom_extractors/wordpress_mec.py
new file mode 100644
index 0000000..e102cea
--- /dev/null
+++ b/src/agenda_culturel/import_tasks/custom_extractors/wordpress_mec.py
@@ -0,0 +1,99 @@
+from ..generic_extractors import *
+from bs4 import BeautifulSoup
+
+
+# A class dedicated to get events from MEC Wordpress plugin
+# URL: https://webnus.net/modern-events-calendar/
+class CExtractor(TwoStepsExtractor):
+    
+    def local2agendaCategory(self, category):
+        mapping = {
+            "Musique": "Concert",
+            "CONCERT": "Concert",
+            "VISITE": "Autre",
+            "Spectacle": "Théâtre",
+            "Rencontre": "Autre",
+            "Atelier": "Autre",
+            "Projection": "Autre",
+        }
+        if category in mapping:
+            return mapping[category]
+        else:
+            return None
+
+    def build_event_url_list(self, content):
+        soup = BeautifulSoup(content, "xml")
+
+        events = soup.select("div.mec-tile-event-content")
+        for e in events:
+            link = e.select("h4.mec-event-title a")
+            if len(link) == 1:
+                url = link[0]["href"]
+                title = link[0].get_text()
+                
+                if self.add_event_url(url):
+                    print(url, title)
+                    self.add_event_title(url, title)
+
+                categories = e.select(".mec-label-normal")
+                if len(categories) == 0:
+                    categories = e.select(".mec-category")
+                if len(categories) > 0:
+                    category = self.local2agendaCategory(categories[0].get_text())
+                    if category is not None:
+                        self.add_event_category(url, category)
+
+
+    def add_event_from_content(
+        self,
+        event_content,
+        event_url,
+        url_human=None,
+        default_values=None,
+        published=False,
+    ):
+        soup = BeautifulSoup(event_content, "xml")
+        
+        start_day = soup.select(".mec-start-date-label")
+        if start_day and len(start_day) > 0:
+            start_day = self.parse_french_date(start_day[0].get_text())
+        else:
+            start_day = None
+        t = soup.select(".mec-single-event-time .mec-events-abbr")
+        if t:
+            t = t[0].get_text().split("-")
+            start_time = self.parse_french_time(t[0])
+            if len(t) > 1:
+                end_time = self.parse_french_time(t[1])
+            else:
+                end_time = None
+        else:
+            start_time = None
+            end_time = None
+            
+        image = soup.select(".mec-events-event-image img")
+        if image:
+            image = image[0]["src"]
+        else:
+            image = None
+        description = soup.select(".mec-event-content")[0].get_text()
+
+        url_human = event_url
+
+        self.add_event_with_props(
+            event_url,
+            None,
+            None,
+            start_day,
+            None if "location" not in default_values else default_values["location"],
+            description,
+            None,
+            recurrences=None,
+            uuids=[event_url],
+            url_human=url_human,
+            start_time=start_time,
+            end_day=None,
+            end_time=end_time,
+            published=published,
+            image=image,
+        )
diff --git a/src/agenda_culturel/import_tasks/generic_extractors.py b/src/agenda_culturel/import_tasks/generic_extractors.py
index d212624..0ba6fc8 100644
--- a/src/agenda_culturel/import_tasks/generic_extractors.py
+++ b/src/agenda_culturel/import_tasks/generic_extractors.py
@@ -2,6 +2,10 @@ from abc import abstractmethod
 from urllib.parse import urlparse
 from urllib.parse import parse_qs
 
+import logging
+
+logger = logging.getLogger(__name__)
+
 
 from .extractor import *
 from django.utils.translation import gettext_lazy as _
@@ -171,6 +175,7 @@ class TwoStepsExtractor(Extractor):
         default_values=None,
         published=False,
         only_future=True,
+        ignore_404=True
     ):
         self.only_future = only_future
         self.now = datetime.datetime.now().date()
@@ -195,10 +200,16 @@ class TwoStepsExtractor(Extractor):
             # first download the content associated with this link
             content_event = self.downloader.get_content(event_url)
             if content_event is None:
-                raise Exception(_("Cannot extract event from url {}").format(event_url))
-            # then extract event information from this html document
-            self.add_event_from_content(
-                content_event, event_url, url_human, default_values, published
-            )
+                msg = "Cannot extract event from url {}".format(event_url)
+                if ignore_404:
+                    logger.error(msg)
+                else:
+                    print("go")
+                    raise Exception(msg)
+            else:
+                # then extract event information from this html document
+                self.add_event_from_content(
+                    content_event, event_url, url_human, default_values, published
+                )
 
         return self.get_structure()
diff --git a/src/agenda_culturel/migrations/0064_alter_recurrentimport_processor.py b/src/agenda_culturel/migrations/0064_alter_recurrentimport_processor.py
new file mode 100644
index 0000000..33ac0a4
--- /dev/null
+++ b/src/agenda_culturel/migrations/0064_alter_recurrentimport_processor.py
@@ -0,0 +1,18 @@
+# Generated by Django 4.2.7 on 2024-06-02 12:01
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('agenda_culturel', '0063_alter_event_exact_location'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='recurrentimport',
+            name='processor',
+            field=models.CharField(choices=[('ical', 'ical'), ('icalnobusy', 'ical no busy'), ('icalnovc', 'ical no VC'), ('lacoope', 'lacoope.org'), ('lacomedie', 'la comédie'), ('lefotomat', 'le fotomat'), ('lapucealoreille', 'la puce à loreille'), ('Plugin wordpress MEC', 'Plugin wordpress MEC')], default='ical', max_length=20, verbose_name='Processor'),
+        ),
+    ]
diff --git a/src/agenda_culturel/models.py b/src/agenda_culturel/models.py
index b92f9b6..771f42f 100644
--- a/src/agenda_culturel/models.py
+++ b/src/agenda_culturel/models.py
@@ -1194,6 +1194,7 @@ class RecurrentImport(models.Model):
         LACOMEDIE = "lacomedie", _("la comédie")
         LEFOTOMAT = "lefotomat", _("le fotomat")
         LAPUCEALOREILLE = "lapucealoreille", _("la puce à l" "oreille")
+        MECWORDPRESS = "Plugin wordpress MEC", _("Plugin wordpress MEC")
 
     class DOWNLOADER(models.TextChoices):
         SIMPLE = "simple", _("simple")