wip
This commit is contained in:
		@@ -1,7 +1,4 @@
 | 
			
		||||
import icalendar
 | 
			
		||||
import warnings
 | 
			
		||||
 | 
			
		||||
import bbcode
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from datetime import datetime, date, timedelta
 | 
			
		||||
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
 | 
			
		||||
@@ -12,6 +9,19 @@ from celery.utils.log import get_task_logger
 | 
			
		||||
 | 
			
		||||
logger = get_task_logger(__name__)
 | 
			
		||||
 | 
			
		||||
class Timeslot:
 | 
			
		||||
    def __init__(self, start_time, end_time):
 | 
			
		||||
        self.start_time = start_time
 | 
			
		||||
        self.end_time = end_time
 | 
			
		||||
 | 
			
		||||
    def merge(self, hours):
 | 
			
		||||
        self.start_time = min(self.start_time, hours.start_time)
 | 
			
		||||
 | 
			
		||||
        if not hours.end_time is None:
 | 
			
		||||
            if not self.end_time is None:
 | 
			
		||||
                self.end_time = max(self.end_time, hours.end_time)
 | 
			
		||||
            else:
 | 
			
		||||
                self.end_time = hours.end_time
 | 
			
		||||
 | 
			
		||||
class CExtractor(Extractor):
 | 
			
		||||
 | 
			
		||||
@@ -32,6 +42,21 @@ class CExtractor(Extractor):
 | 
			
		||||
 | 
			
		||||
        return (year, month)
 | 
			
		||||
    
 | 
			
		||||
    def find_hours(text):
 | 
			
		||||
        text = re.split(r"[ -/=>]+", text)
 | 
			
		||||
        text = [Extractor.parse_french_time(k) for k in text]
 | 
			
		||||
        text = [k for k in text if not k is None]
 | 
			
		||||
        match len(text):
 | 
			
		||||
            case 0:
 | 
			
		||||
                return None
 | 
			
		||||
            case 1:
 | 
			
		||||
                return Timeslot(text[0], None)
 | 
			
		||||
            case 2:
 | 
			
		||||
                return Timeslot(text[0], text[1])
 | 
			
		||||
            case _:
 | 
			
		||||
                return None
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    def is_nickname(text):
 | 
			
		||||
        return '@' in text
 | 
			
		||||
    
 | 
			
		||||
@@ -42,7 +67,27 @@ class CExtractor(Extractor):
 | 
			
		||||
            if word in text:
 | 
			
		||||
                return True
 | 
			
		||||
        return False
 | 
			
		||||
    
 | 
			
		||||
    def find_timeslot(text):
 | 
			
		||||
        text = re.sub(' +', ' ', text).split(' ')
 | 
			
		||||
        day_name = text[0]
 | 
			
		||||
        day_num = text[1]
 | 
			
		||||
        hours = text[2]
 | 
			
		||||
 | 
			
		||||
        if not Extractor.guess_day_name(day_name):
 | 
			
		||||
            return None
 | 
			
		||||
        
 | 
			
		||||
        day_num = [c for c in re.split(r'\D+', day_num) if c != ""]
 | 
			
		||||
        if len(day_num) == 0:
 | 
			
		||||
            return None
 | 
			
		||||
        day_num = int(day_num[0])
 | 
			
		||||
 | 
			
		||||
        hours = CExtractor.find_hours(hours)
 | 
			
		||||
        if hours is None:
 | 
			
		||||
            return None
 | 
			
		||||
        
 | 
			
		||||
        return (day_num, hours)
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
    #['Samedi 12', '@Manon', '14:30-18:00', 'Dimanches 13', '@gaeldu63', '14h30 (j utilise la scie a format)']
 | 
			
		||||
    #['Mercredi 16 :']
 | 
			
		||||
@@ -52,25 +97,22 @@ class CExtractor(Extractor):
 | 
			
		||||
        result = []
 | 
			
		||||
 | 
			
		||||
        date = None
 | 
			
		||||
        tstart = None
 | 
			
		||||
        tend = None
 | 
			
		||||
        slot = None
 | 
			
		||||
        is_open = False
 | 
			
		||||
 | 
			
		||||
        # for each element in the paragraph
 | 
			
		||||
        for e in p.stripped_strings:
 | 
			
		||||
            day = CExtractor.find_day_name(e)
 | 
			
		||||
            day = CExtractor.find_timeslot(e)
 | 
			
		||||
            if not day is None:
 | 
			
		||||
                if not date is None and is_open:
 | 
			
		||||
                    # we reach a new day
 | 
			
		||||
                    result.append((date, tstart, tend))
 | 
			
		||||
                    result.append((date, slot))
 | 
			
		||||
                if isinstance(day, tuple):
 | 
			
		||||
                    date = day[0]
 | 
			
		||||
                    tstart = day[1]
 | 
			
		||||
                    tend = day[2]
 | 
			
		||||
                    slot = day[1]
 | 
			
		||||
                else:
 | 
			
		||||
                    date = day
 | 
			
		||||
                    tstart = None
 | 
			
		||||
                    tend = None
 | 
			
		||||
                    slot = None
 | 
			
		||||
                is_open = False
 | 
			
		||||
                continue
 | 
			
		||||
            elif not is_open:
 | 
			
		||||
@@ -83,15 +125,10 @@ class CExtractor(Extractor):
 | 
			
		||||
            
 | 
			
		||||
            hours = CExtractor.find_hours(e)
 | 
			
		||||
            if not hours is None:
 | 
			
		||||
                # we found hours
 | 
			
		||||
                if tstart is None:
 | 
			
		||||
                    tstart = hours[0]
 | 
			
		||||
                if slot is None:
 | 
			
		||||
                    slot = hours
 | 
			
		||||
                else:
 | 
			
		||||
                    tstart = min(tstart, hours[0])
 | 
			
		||||
                if tend is None:
 | 
			
		||||
                    tend = hours[1]
 | 
			
		||||
                else:
 | 
			
		||||
                    tend = max(tend, hours[1])
 | 
			
		||||
                    slot.merge(hours)
 | 
			
		||||
                continue
 | 
			
		||||
            
 | 
			
		||||
            if CExtractor.is_canceled(e):
 | 
			
		||||
@@ -100,7 +137,7 @@ class CExtractor(Extractor):
 | 
			
		||||
 | 
			
		||||
        if not date is None and is_open:
 | 
			
		||||
            # we reach a new day
 | 
			
		||||
            result.append((date, tstart, tend))
 | 
			
		||||
            result.append((date, slot))
 | 
			
		||||
 | 
			
		||||
        return result
 | 
			
		||||
        # [(10, time(14, 0, 0), time(17, 0, 0)), ]
 | 
			
		||||
@@ -124,7 +161,7 @@ class CExtractor(Extractor):
 | 
			
		||||
            # annule
 | 
			
		||||
            # menage
 | 
			
		||||
            for p in description.select('p'):
 | 
			
		||||
                CExtractor.find_time_slots(p)
 | 
			
		||||
                CExtractor.find_timeslots(p)
 | 
			
		||||
                
 | 
			
		||||
                if not '@' in p.text:
 | 
			
		||||
                    continue
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user