Extraction des informations depuis le ical
This commit is contained in:
		@@ -10,6 +10,7 @@ from selenium import webdriver
 | 
			
		||||
from selenium.webdriver.chrome.service import Service
 | 
			
		||||
from selenium.webdriver.chrome.options import Options
 | 
			
		||||
import icalendar
 | 
			
		||||
from datetime import datetime, date
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -61,27 +62,127 @@ class ChromiumHeadlessDownloader(Downloader):
 | 
			
		||||
class Extractor(ABC):
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        pass
 | 
			
		||||
        self.header = {}
 | 
			
		||||
        self.events = []
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def extract(self, content, url, url_human = None):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    def set_header(self, url):
 | 
			
		||||
        self.header["url"] = url
 | 
			
		||||
        self.header["date"] = datetime.now()
 | 
			
		||||
 | 
			
		||||
    def clear_events(self):
 | 
			
		||||
        self.events = []
 | 
			
		||||
 | 
			
		||||
    def add_event(self, title, category, start_day, location, description, tags, url=None, url_human=None, start_time=None, end_day=None, end_time=None, last_modified=None):
 | 
			
		||||
        if title is None:
 | 
			
		||||
            print("ERROR: cannot import an event without name")
 | 
			
		||||
            return
 | 
			
		||||
        if start_day is None:
 | 
			
		||||
            print("ERROR: cannot import an event without start day")
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        event = {
 | 
			
		||||
            "title": title,
 | 
			
		||||
            "category": category,
 | 
			
		||||
            "start_day": start_day,
 | 
			
		||||
            "location": location,
 | 
			
		||||
            "descritpion": description,
 | 
			
		||||
            "tags": tags
 | 
			
		||||
        }
 | 
			
		||||
        if url is not None:
 | 
			
		||||
            event["url"] = url
 | 
			
		||||
        if url_human is not None:
 | 
			
		||||
            event["url_human"] = url_human
 | 
			
		||||
        if start_time is not None:
 | 
			
		||||
            event["start_time"] = start_time
 | 
			
		||||
        if end_day is not None:
 | 
			
		||||
            event["end_day"] = end_day
 | 
			
		||||
        if end_time is not None:
 | 
			
		||||
            event["end_time"] = end_time
 | 
			
		||||
 | 
			
		||||
        if last_modified is not None:
 | 
			
		||||
            event["last_modified"] = last_modified
 | 
			
		||||
 | 
			
		||||
        self.events.append(event)
 | 
			
		||||
 | 
			
		||||
    def default_value_if_exists(self, default_values, key):
 | 
			
		||||
        return default_values[key] if default_values is not None and key in default_values else None
 | 
			
		||||
 | 
			
		||||
    def get_structure(self):
 | 
			
		||||
        return { "header": self.header, "events": self.events}
 | 
			
		||||
 | 
			
		||||
class ICALExtractor(Extractor):
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
 | 
			
		||||
    def extract(self, content, url, url_human = None):
 | 
			
		||||
    def get_item_from_vevent(self, event, name, raw = False):
 | 
			
		||||
        try:
 | 
			
		||||
            r = event.decoded(name)
 | 
			
		||||
            if raw:
 | 
			
		||||
                return r
 | 
			
		||||
            else:
 | 
			
		||||
                return r.decode()
 | 
			
		||||
        except:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
    def get_dt_item_from_vevent(self, event, name):
 | 
			
		||||
        item = self.get_item_from_vevent(event, name, raw = True)
 | 
			
		||||
 | 
			
		||||
        day = None
 | 
			
		||||
        time = None
 | 
			
		||||
 | 
			
		||||
        if item is not None:
 | 
			
		||||
            if isinstance(item, datetime):
 | 
			
		||||
                day = item.date()
 | 
			
		||||
                time = item.time()
 | 
			
		||||
            elif isinstance(item, date):
 | 
			
		||||
                day = item
 | 
			
		||||
                time = None
 | 
			
		||||
 | 
			
		||||
        return day, time
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def extract(self, content, url, url_human = None, default_values = None):
 | 
			
		||||
        print("Extracting ical events from {}".format(url))
 | 
			
		||||
        self.set_header(url)
 | 
			
		||||
        self.clear_events()
 | 
			
		||||
        
 | 
			
		||||
        calendar = icalendar.Calendar.from_ical(content)
 | 
			
		||||
 | 
			
		||||
        for event in calendar.walk('VEVENT'):
 | 
			
		||||
            print(event)
 | 
			
		||||
            title = self.get_item_from_vevent(event, "SUMMARY")
 | 
			
		||||
            category = self.default_value_if_exists(default_values, "category")
 | 
			
		||||
 | 
			
		||||
        return []
 | 
			
		||||
            start_day, start_time = self.get_dt_item_from_vevent(event, "DTSTART")
 | 
			
		||||
 | 
			
		||||
            end_day, end_time = self.get_dt_item_from_vevent(event, "DTEND")
 | 
			
		||||
 | 
			
		||||
            location = self.default_value_if_exists(default_values, "location")
 | 
			
		||||
 | 
			
		||||
            description = self.get_item_from_vevent(event, "DESCRIPTION")
 | 
			
		||||
 | 
			
		||||
            last_modified = self.get_item_from_vevent(event, "LAST_MODIFIED")
 | 
			
		||||
 | 
			
		||||
            uuid = self.get_item_from_vevent(event, "UID")
 | 
			
		||||
 | 
			
		||||
            if uuid is not None:
 | 
			
		||||
                event_url = url + "#" + uuid
 | 
			
		||||
 | 
			
		||||
            tags = self.default_value_if_exists(default_values, "tags")
 | 
			
		||||
 | 
			
		||||
            last_modified = self.get_item_from_vevent(event, "LAST-MODIFIED", raw = True)
 | 
			
		||||
 | 
			
		||||
            rrule = self.get_item_from_vevent(event, "RRULE", raw = True)
 | 
			
		||||
            if rrule is not None:
 | 
			
		||||
                print("Recurrent event not yet supported", rrule)
 | 
			
		||||
 | 
			
		||||
            self.add_event(title, category, start_day, location, description, tags, url=event_url, url_human=url_human, start_time=start_time, end_day=end_day, end_time=end_time, last_modified=last_modified)
 | 
			
		||||
 | 
			
		||||
        return self.get_structure()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -92,8 +193,7 @@ class URL2Events:
 | 
			
		||||
        self.downloader = downloader
 | 
			
		||||
        self.extractor = extractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def process(self, url, url_human = None, cache = None):
 | 
			
		||||
    def process(self, url, url_human = None, cache = None, default_values = None):
 | 
			
		||||
 | 
			
		||||
        if cache and os.path.exists(cache):
 | 
			
		||||
            print("Loading cache ({})".format(cache))
 | 
			
		||||
@@ -110,7 +210,7 @@ class URL2Events:
 | 
			
		||||
                with open(cache, "w") as text_file:
 | 
			
		||||
                    text_file.write(content)
 | 
			
		||||
 | 
			
		||||
        return self.extractor.extract(content, url, url_human)
 | 
			
		||||
        return self.extractor.extract(content, url, url_human, default_values)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
@@ -119,6 +219,6 @@ if __name__ == "__main__":
 | 
			
		||||
    url = "https://calendar.google.com/calendar/ical/programmation.lesaugustes%40gmail.com/public/basic.ics"
 | 
			
		||||
    url_human = "https://www.cafelesaugustes.fr/la-programmation/"
 | 
			
		||||
 | 
			
		||||
    events = u2e.process(url, url_human, cache = "cache-augustes.ical")
 | 
			
		||||
    events = u2e.process(url, url_human, cache = "cache-augustes.ical", default_values = {"category": "Autre", "location": "Café lecture les Augustes"})
 | 
			
		||||
 | 
			
		||||
    print(events)
 | 
			
		||||
    #print(events)
 | 
			
		||||
		Reference in New Issue
	
	Block a user