44
									
								
								experimentations/get_la_raymonde.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										44
									
								
								experimentations/get_la_raymonde.py
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,44 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/python3
 | 
				
			||||||
 | 
					# coding: utf-8
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import json
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# getting the name of the directory
 | 
				
			||||||
 | 
					# where the this file is present.
 | 
				
			||||||
 | 
					current = os.path.dirname(os.path.realpath(__file__))
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					# Getting the parent directory name
 | 
				
			||||||
 | 
					# where the current directory is present.
 | 
				
			||||||
 | 
					parent = os.path.dirname(current)
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					# adding the parent directory to 
 | 
				
			||||||
 | 
					# the sys.path.
 | 
				
			||||||
 | 
					sys.path.append(parent)
 | 
				
			||||||
 | 
					sys.path.append(parent + "/src")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from src.agenda_culturel.import_tasks.downloader import *
 | 
				
			||||||
 | 
					from src.agenda_culturel.import_tasks.extractor import *
 | 
				
			||||||
 | 
					from src.agenda_culturel.import_tasks.importer import *
 | 
				
			||||||
 | 
					from src.agenda_culturel.import_tasks.custom_extractors import *
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    u2e = URL2Events(SimpleDownloader(), laraymonde.CExtractor())
 | 
				
			||||||
 | 
					    url = "https://www.raymondbar.net/"
 | 
				
			||||||
 | 
					    url_human = "https://www.raymondbar.net/"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        events = u2e.process(url, url_human, cache = "cache-la-raymonde.html", default_values = {"location": "La Raymonde", "category": "Fêtes & Concerts"}, published = True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        exportfile = "events-la-raymonde.json"
 | 
				
			||||||
 | 
					        print("Saving events to file {}".format(exportfile))
 | 
				
			||||||
 | 
					        with open(exportfile, "w") as f:
 | 
				
			||||||
 | 
					            json.dump(events, f, indent=4, default=str)
 | 
				
			||||||
 | 
					    except Exception as e:
 | 
				
			||||||
 | 
					        print("Exception: " + str(e))
 | 
				
			||||||
@@ -150,6 +150,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
 | 
				
			|||||||
        extractor = arachnee.CExtractor()
 | 
					        extractor = arachnee.CExtractor()
 | 
				
			||||||
    elif rimport.processor == RecurrentImport.PROCESSOR.LERIO:
 | 
					    elif rimport.processor == RecurrentImport.PROCESSOR.LERIO:
 | 
				
			||||||
        extractor = lerio.CExtractor()
 | 
					        extractor = lerio.CExtractor()
 | 
				
			||||||
 | 
					    elif rimport.processor == RecurrentImport.PROCESSOR.LARAYMONDE:
 | 
				
			||||||
 | 
					        extractor = laraymonde.CExtractor()
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        extractor = None
 | 
					        extractor = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -0,0 +1,67 @@
 | 
				
			|||||||
 | 
					from ..generic_extractors import *
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					from datetime import datetime
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# A class dedicated to get events from Raymond Bar
 | 
				
			||||||
 | 
					# URL: https://www.raymondbar.net/
 | 
				
			||||||
 | 
					class CExtractor(TwoStepsExtractorNoPause):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self):
 | 
				
			||||||
 | 
					        super().__init__()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def build_event_url_list(self, content, infuture_days=180):
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        soup = BeautifulSoup(content, "html.parser")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        links = soup.select(".showsList .showMore")
 | 
				
			||||||
 | 
					        if links:
 | 
				
			||||||
 | 
					            for l in links:
 | 
				
			||||||
 | 
					                print(l["href"])
 | 
				
			||||||
 | 
					                self.add_event_url(l["href"])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def add_event_from_content(
 | 
				
			||||||
 | 
					        self,
 | 
				
			||||||
 | 
					        event_content,
 | 
				
			||||||
 | 
					        event_url,
 | 
				
			||||||
 | 
					        url_human=None,
 | 
				
			||||||
 | 
					        default_values=None,
 | 
				
			||||||
 | 
					        published=False,
 | 
				
			||||||
 | 
					    ):
 | 
				
			||||||
 | 
					        soup = BeautifulSoup(event_content, "html.parser")
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        title = soup.select_one(".showDesc h4 a.summary").text
 | 
				
			||||||
 | 
					        start_day = soup.select_one(".showDate .value-title")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if not start_day is None:
 | 
				
			||||||
 | 
					            start_day = start_day["title"]
 | 
				
			||||||
 | 
					            if not start_day is None:
 | 
				
			||||||
 | 
					                start_day = start_day.split("T")[0]
 | 
				
			||||||
 | 
					        if start_day is None:
 | 
				
			||||||
 | 
					            print("impossible de récupérer la date")
 | 
				
			||||||
 | 
					            return
 | 
				
			||||||
 | 
					        description = soup.select_one('.showDetails.description').text
 | 
				
			||||||
 | 
					        image = soup.select('.showDetails.description img')
 | 
				
			||||||
 | 
					        if not image is None:
 | 
				
			||||||
 | 
					            image_alt = image[-1]["alt"]
 | 
				
			||||||
 | 
					            image = image[-1]["src"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.add_event_with_props(
 | 
				
			||||||
 | 
					                    default_values,
 | 
				
			||||||
 | 
					                    event_url,
 | 
				
			||||||
 | 
					                    title,
 | 
				
			||||||
 | 
					                    None,
 | 
				
			||||||
 | 
					                    start_day,
 | 
				
			||||||
 | 
					                    None,
 | 
				
			||||||
 | 
					                    description,
 | 
				
			||||||
 | 
					                    [],
 | 
				
			||||||
 | 
					                    recurrences=None,
 | 
				
			||||||
 | 
					                    uuids=[event_url],
 | 
				
			||||||
 | 
					                    url_human=event_url,
 | 
				
			||||||
 | 
					                    start_time=None,
 | 
				
			||||||
 | 
					                    end_day=None,
 | 
				
			||||||
 | 
					                    end_time=None,
 | 
				
			||||||
 | 
					                    published=published,
 | 
				
			||||||
 | 
					                    image=image,
 | 
				
			||||||
 | 
					                    image_alt=image_alt
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
@@ -18,7 +18,6 @@ class CExtractor(TwoStepsExtractorNoPause):
 | 
				
			|||||||
        links = soup.select("td.seance_link a")
 | 
					        links = soup.select("td.seance_link a")
 | 
				
			||||||
        if links:
 | 
					        if links:
 | 
				
			||||||
            for l in links:
 | 
					            for l in links:
 | 
				
			||||||
                print(l["href"])
 | 
					 | 
				
			||||||
                self.add_event_url(l["href"])
 | 
					                self.add_event_url(l["href"])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def to_text_select_one(soup, filter):
 | 
					    def to_text_select_one(soup, filter):
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -2009,6 +2009,7 @@ class RecurrentImport(models.Model):
 | 
				
			|||||||
        C3C = "cour3coquins", _("la cour des 3 coquins")
 | 
					        C3C = "cour3coquins", _("la cour des 3 coquins")
 | 
				
			||||||
        ARACHNEE = "arachnee", _("Arachnée concert")
 | 
					        ARACHNEE = "arachnee", _("Arachnée concert")
 | 
				
			||||||
        LERIO = "rio", _('Le Rio')
 | 
					        LERIO = "rio", _('Le Rio')
 | 
				
			||||||
 | 
					        LARAYMONDE = "raymonde", _('La Raymonde')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    class DOWNLOADER(models.TextChoices):
 | 
					    class DOWNLOADER(models.TextChoices):
 | 
				
			||||||
        SIMPLE = "simple", _("simple")
 | 
					        SIMPLE = "simple", _("simple")
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user