un peu de refactoring pour les extracteurs
This commit is contained in:
		@@ -26,10 +26,20 @@ logger = get_task_logger(__name__)
 | 
			
		||||
 | 
			
		||||
class Extractor:
 | 
			
		||||
 | 
			
		||||
    name = None
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def is_known_url(url):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def extract(url):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def clean_url(url):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    def download(url):
 | 
			
		||||
        try:
 | 
			
		||||
            options = Options()
 | 
			
		||||
@@ -69,6 +79,8 @@ class Extractor:
 | 
			
		||||
 | 
			
		||||
class ExtractorFacebook(Extractor):
 | 
			
		||||
 | 
			
		||||
    name = "Facebook"
 | 
			
		||||
 | 
			
		||||
    class SimpleFacebookEvent:
 | 
			
		||||
 | 
			
		||||
        def __init__(self, data):
 | 
			
		||||
@@ -194,8 +206,7 @@ class ExtractorFacebook(Extractor):
 | 
			
		||||
            image = self.get_element("image")
 | 
			
		||||
            local_image = None if image is None else Extractor.download_media(image)
 | 
			
		||||
 | 
			
		||||
            u = urlparse(url)
 | 
			
		||||
            unique_url = u.scheme + "://" + u.netloc + u.path
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
            return Event(title=self.get_element("name"), 
 | 
			
		||||
                status=Event.STATUS.DRAFT,
 | 
			
		||||
@@ -208,7 +219,20 @@ class ExtractorFacebook(Extractor):
 | 
			
		||||
                local_image=local_image,
 | 
			
		||||
                image=self.get_element("image"),
 | 
			
		||||
                image_alt=self.get_element("image_alt"),
 | 
			
		||||
                reference_urls=[unique_url])
 | 
			
		||||
                reference_urls=[url])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def clean_url(url):
 | 
			
		||||
 | 
			
		||||
        if ExtractorFacebook.is_known_url(url):
 | 
			
		||||
            u = urlparse(url)
 | 
			
		||||
            return u.scheme + "://" + u.netloc + u.path
 | 
			
		||||
        else:
 | 
			
		||||
            return url
 | 
			
		||||
 | 
			
		||||
    def is_known_url(url):
 | 
			
		||||
        u = urlparse(url)
 | 
			
		||||
        return u.netloc in ["facebook.com", "www.facebook.com", "m.facebook.com"]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def process_page(txt, url):
 | 
			
		||||
@@ -230,6 +254,13 @@ class ExtractorFacebook(Extractor):
 | 
			
		||||
 | 
			
		||||
class ExtractorAllURLs:
 | 
			
		||||
 | 
			
		||||
    extractors = [ExtractorFacebook]
 | 
			
		||||
 | 
			
		||||
    def clean_url(url):
 | 
			
		||||
        result = url
 | 
			
		||||
        for e in ExtractorAllURLs.extractors:
 | 
			
		||||
            result = e.clean_url(result)
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
    def extract(url):
 | 
			
		||||
        logger.info("Run extraction")
 | 
			
		||||
@@ -239,13 +270,12 @@ class ExtractorAllURLs:
 | 
			
		||||
            logger.info("Cannot download url")
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        result = ExtractorFacebook.process_page(txt, url)
 | 
			
		||||
        for e in ExtractorAllURLs.extractors:
 | 
			
		||||
            result = e.process_page(txt, url)
 | 
			
		||||
 | 
			
		||||
        if result is not None:
 | 
			
		||||
            return result
 | 
			
		||||
        else:
 | 
			
		||||
            logger.info("Not a Facebook link")
 | 
			
		||||
 | 
			
		||||
        # TODO: add here other extrators
 | 
			
		||||
            if result is not None:
 | 
			
		||||
                return result
 | 
			
		||||
            else:
 | 
			
		||||
                logger.info("Not a " + e.name + " link")
 | 
			
		||||
 | 
			
		||||
        return None
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user