Support des imports plugin MEC de Wordpress
This commit is contained in:
		
							
								
								
									
										43
									
								
								experimentations/get_le_poulailler.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										43
									
								
								experimentations/get_le_poulailler.py
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,43 @@
 | 
			
		||||
#!/usr/bin/python3
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import json
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
# getting the name of the directory
 | 
			
		||||
# where the this file is present.
 | 
			
		||||
current = os.path.dirname(os.path.realpath(__file__))
 | 
			
		||||
 
 | 
			
		||||
# Getting the parent directory name
 | 
			
		||||
# where the current directory is present.
 | 
			
		||||
parent = os.path.dirname(current)
 | 
			
		||||
 
 | 
			
		||||
# adding the parent directory to 
 | 
			
		||||
# the sys.path.
 | 
			
		||||
sys.path.append(parent)
 | 
			
		||||
 | 
			
		||||
from src.agenda_culturel.import_tasks.downloader import *
 | 
			
		||||
from src.agenda_culturel.import_tasks.extractor import *
 | 
			
		||||
from src.agenda_culturel.import_tasks.importer import *
 | 
			
		||||
from src.agenda_culturel.import_tasks.custom_extractors import *
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
 | 
			
		||||
    u2e = URL2Events(SimpleDownloader(), wordpress_mec.CExtractor())
 | 
			
		||||
    url = "https://www.cabaretlepoulailler.fr/agenda/tout-lagenda/"
 | 
			
		||||
    url_human = "https://www.cabaretlepoulailler.fr/agenda/tout-lagenda/"
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        events = u2e.process(url, url_human, cache = "cache-le-poulailler.html", default_values = {"location": "Le Poulailler"}, published = True)
 | 
			
		||||
 | 
			
		||||
        exportfile = "events-le-poulailler.json"
 | 
			
		||||
        print("Saving events to file {}".format(exportfile))
 | 
			
		||||
        with open(exportfile, "w") as f:
 | 
			
		||||
            json.dump(events, f, indent=4, default=str)
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        print("Exception: " + str(e))
 | 
			
		||||
							
								
								
									
										43
									
								
								experimentations/get_les_vinzelles.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										43
									
								
								experimentations/get_les_vinzelles.py
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,43 @@
 | 
			
		||||
#!/usr/bin/python3
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import json
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
# getting the name of the directory
 | 
			
		||||
# where the this file is present.
 | 
			
		||||
current = os.path.dirname(os.path.realpath(__file__))
 | 
			
		||||
 
 | 
			
		||||
# Getting the parent directory name
 | 
			
		||||
# where the current directory is present.
 | 
			
		||||
parent = os.path.dirname(current)
 | 
			
		||||
 
 | 
			
		||||
# adding the parent directory to 
 | 
			
		||||
# the sys.path.
 | 
			
		||||
sys.path.append(parent)
 | 
			
		||||
 | 
			
		||||
from src.agenda_culturel.import_tasks.downloader import *
 | 
			
		||||
from src.agenda_culturel.import_tasks.extractor import *
 | 
			
		||||
from src.agenda_culturel.import_tasks.importer import *
 | 
			
		||||
from src.agenda_culturel.import_tasks.custom_extractors import *
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
 | 
			
		||||
    u2e = URL2Events(SimpleDownloader(), wordpress_mec.CExtractor())
 | 
			
		||||
    url = "https://www.lesvinzelles.com/index.php/programme/"
 | 
			
		||||
    url_human = "https://www.lesvinzelles.com/index.php/programme/"
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        events = u2e.process(url, url_human, cache = "cache-les-vinzelles.html", default_values = {"location": "Les Vinzelles"}, published = True)
 | 
			
		||||
 | 
			
		||||
        exportfile = "events-les-vinzelles.json"
 | 
			
		||||
        print("Saving events to file {}".format(exportfile))
 | 
			
		||||
        with open(exportfile, "w") as f:
 | 
			
		||||
            json.dump(events, f, indent=4, default=str)
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        print("Exception: " + str(e))
 | 
			
		||||
@@ -112,6 +112,8 @@ def run_recurrent_import(self, pk):
 | 
			
		||||
        extractor = lefotomat.CExtractor()
 | 
			
		||||
    elif rimport.processor == RecurrentImport.PROCESSOR.LAPUCEALOREILLE:
 | 
			
		||||
        extractor = lapucealoreille.CExtractor()
 | 
			
		||||
    elif rimport.processor == RecurrentImport.PROCESSOR.MECWORDPRESS:
 | 
			
		||||
        extractor = wordpress_mec.CExtractor()
 | 
			
		||||
    else:
 | 
			
		||||
        extractor = None
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,99 @@
 | 
			
		||||
from ..generic_extractors import *
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# A class dedicated to get events from MEC Wordpress plugin
 | 
			
		||||
# URL: https://webnus.net/modern-events-calendar/
 | 
			
		||||
class CExtractor(TwoStepsExtractor):
 | 
			
		||||
    
 | 
			
		||||
    def local2agendaCategory(self, category):
 | 
			
		||||
        mapping = {
 | 
			
		||||
            "Musique": "Concert",
 | 
			
		||||
            "CONCERT": "Concert",
 | 
			
		||||
            "VISITE": "Autre",
 | 
			
		||||
            "Spectacle": "Théâtre",
 | 
			
		||||
            "Rencontre": "Autre",
 | 
			
		||||
            "Atelier": "Autre",
 | 
			
		||||
            "Projection": "Autre",
 | 
			
		||||
        }
 | 
			
		||||
        if category in mapping:
 | 
			
		||||
            return mapping[category]
 | 
			
		||||
        else:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
    def build_event_url_list(self, content):
 | 
			
		||||
        soup = BeautifulSoup(content, "xml")
 | 
			
		||||
 | 
			
		||||
        events = soup.select("div.mec-tile-event-content")
 | 
			
		||||
        for e in events:
 | 
			
		||||
            link = e.select("h4.mec-event-title a")
 | 
			
		||||
            if len(link) == 1:
 | 
			
		||||
                url = link[0]["href"]
 | 
			
		||||
                title = link[0].get_text()
 | 
			
		||||
                
 | 
			
		||||
                if self.add_event_url(url):
 | 
			
		||||
                    print(url, title)
 | 
			
		||||
                    self.add_event_title(url, title)
 | 
			
		||||
 | 
			
		||||
                categories = e.select(".mec-label-normal")
 | 
			
		||||
                if len(categories) == 0:
 | 
			
		||||
                    categories = e.select(".mec-category")
 | 
			
		||||
                if len(categories) > 0:
 | 
			
		||||
                    category = self.local2agendaCategory(categories[0].get_text())
 | 
			
		||||
                    if category is not None:
 | 
			
		||||
                        self.add_event_category(url, category)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def add_event_from_content(
 | 
			
		||||
        self,
 | 
			
		||||
        event_content,
 | 
			
		||||
        event_url,
 | 
			
		||||
        url_human=None,
 | 
			
		||||
        default_values=None,
 | 
			
		||||
        published=False,
 | 
			
		||||
    ):
 | 
			
		||||
        soup = BeautifulSoup(event_content, "xml")
 | 
			
		||||
        
 | 
			
		||||
        start_day = soup.select(".mec-start-date-label")
 | 
			
		||||
        if start_day and len(start_day) > 0:
 | 
			
		||||
            start_day = self.parse_french_date(start_day[0].get_text())
 | 
			
		||||
        else:
 | 
			
		||||
            start_day = None
 | 
			
		||||
        t = soup.select(".mec-single-event-time .mec-events-abbr")
 | 
			
		||||
        if t:
 | 
			
		||||
            t = t[0].get_text().split("-")
 | 
			
		||||
            start_time = self.parse_french_time(t[0])
 | 
			
		||||
            if len(t) > 1:
 | 
			
		||||
                end_time = self.parse_french_time(t[1])
 | 
			
		||||
            else:
 | 
			
		||||
                end_time = None
 | 
			
		||||
        else:
 | 
			
		||||
            start_time = None
 | 
			
		||||
            end_time = None
 | 
			
		||||
            
 | 
			
		||||
        image = soup.select(".mec-events-event-image img")
 | 
			
		||||
        if image:
 | 
			
		||||
            image = image[0]["src"]
 | 
			
		||||
        else:
 | 
			
		||||
            image = None
 | 
			
		||||
        description = soup.select(".mec-event-content")[0].get_text()
 | 
			
		||||
 | 
			
		||||
        url_human = event_url
 | 
			
		||||
 | 
			
		||||
        self.add_event_with_props(
 | 
			
		||||
            event_url,
 | 
			
		||||
            None,
 | 
			
		||||
            None,
 | 
			
		||||
            start_day,
 | 
			
		||||
            None if "location" not in default_values else default_values["location"],
 | 
			
		||||
            description,
 | 
			
		||||
            None,
 | 
			
		||||
            recurrences=None,
 | 
			
		||||
            uuids=[event_url],
 | 
			
		||||
            url_human=url_human,
 | 
			
		||||
            start_time=start_time,
 | 
			
		||||
            end_day=None,
 | 
			
		||||
            end_time=end_time,
 | 
			
		||||
            published=published,
 | 
			
		||||
            image=image,
 | 
			
		||||
        )
 | 
			
		||||
@@ -2,6 +2,10 @@ from abc import abstractmethod
 | 
			
		||||
from urllib.parse import urlparse
 | 
			
		||||
from urllib.parse import parse_qs
 | 
			
		||||
 | 
			
		||||
import logging
 | 
			
		||||
 | 
			
		||||
logger = logging.getLogger(__name__)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from .extractor import *
 | 
			
		||||
from django.utils.translation import gettext_lazy as _
 | 
			
		||||
@@ -171,6 +175,7 @@ class TwoStepsExtractor(Extractor):
 | 
			
		||||
        default_values=None,
 | 
			
		||||
        published=False,
 | 
			
		||||
        only_future=True,
 | 
			
		||||
        ignore_404=True
 | 
			
		||||
    ):
 | 
			
		||||
        self.only_future = only_future
 | 
			
		||||
        self.now = datetime.datetime.now().date()
 | 
			
		||||
@@ -195,10 +200,16 @@ class TwoStepsExtractor(Extractor):
 | 
			
		||||
            # first download the content associated with this link
 | 
			
		||||
            content_event = self.downloader.get_content(event_url)
 | 
			
		||||
            if content_event is None:
 | 
			
		||||
                raise Exception(_("Cannot extract event from url {}").format(event_url))
 | 
			
		||||
            # then extract event information from this html document
 | 
			
		||||
            self.add_event_from_content(
 | 
			
		||||
                content_event, event_url, url_human, default_values, published
 | 
			
		||||
            )
 | 
			
		||||
                msg = "Cannot extract event from url {}".format(event_url)
 | 
			
		||||
                if ignore_404:
 | 
			
		||||
                    logger.error(msg)
 | 
			
		||||
                else:
 | 
			
		||||
                    print("go")
 | 
			
		||||
                    raise Exception(msg)
 | 
			
		||||
            else:
 | 
			
		||||
                # then extract event information from this html document
 | 
			
		||||
                self.add_event_from_content(
 | 
			
		||||
                    content_event, event_url, url_human, default_values, published
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
        return self.get_structure()
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,18 @@
 | 
			
		||||
# Generated by Django 4.2.7 on 2024-06-02 12:01
 | 
			
		||||
 | 
			
		||||
from django.db import migrations, models
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Migration(migrations.Migration):
 | 
			
		||||
 | 
			
		||||
    dependencies = [
 | 
			
		||||
        ('agenda_culturel', '0063_alter_event_exact_location'),
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    operations = [
 | 
			
		||||
        migrations.AlterField(
 | 
			
		||||
            model_name='recurrentimport',
 | 
			
		||||
            name='processor',
 | 
			
		||||
            field=models.CharField(choices=[('ical', 'ical'), ('icalnobusy', 'ical no busy'), ('icalnovc', 'ical no VC'), ('lacoope', 'lacoope.org'), ('lacomedie', 'la comédie'), ('lefotomat', 'le fotomat'), ('lapucealoreille', 'la puce à loreille'), ('Plugin wordpress MEC', 'Plugin wordpress MEC')], default='ical', max_length=20, verbose_name='Processor'),
 | 
			
		||||
        ),
 | 
			
		||||
    ]
 | 
			
		||||
@@ -1194,6 +1194,7 @@ class RecurrentImport(models.Model):
 | 
			
		||||
        LACOMEDIE = "lacomedie", _("la comédie")
 | 
			
		||||
        LEFOTOMAT = "lefotomat", _("le fotomat")
 | 
			
		||||
        LAPUCEALOREILLE = "lapucealoreille", _("la puce à l" "oreille")
 | 
			
		||||
        MECWORDPRESS = "Plugin wordpress MEC", _("Plugin wordpress MEC")
 | 
			
		||||
 | 
			
		||||
    class DOWNLOADER(models.TextChoices):
 | 
			
		||||
        SIMPLE = "simple", _("simple")
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user