2025-02-20 17:32:10 +01:00

61 lines
2.2 KiB
Python

from .downloader import *
from .extractor import *
from .extractor_facebook import FacebookEventExtractor
import logging
logger = logging.getLogger(__name__)
class URL2Events:
def __init__(
self, downloader=SimpleDownloader(), extractor=None, single_event=False
):
self.downloader = downloader
self.extractor = extractor
self.single_event = single_event
def process(
self, url, url_human=None, cache=None, default_values=None, published=False,
first=True
):
referer = ""
data = None
content_type = None
content = None
if self.extractor:
referer = self.extractor.url_referer
data = self.extractor.data
content_type = self.extractor.content_type
if self.extractor.no_downloader:
content = ''
if content is None:
content = self.downloader.get_content(url, cache, referer=referer, content_type=content_type, data=data)
if content is None:
return None
if self.extractor is not None:
self.extractor.set_downloader(self.downloader)
return self.extractor.extract(
content, url, url_human, default_values, published
)
else:
# if the extractor is not defined, use a list of default extractors
for e in Extractor.get_default_extractors(self.single_event):
logger.warning('Extractor::' + type(e).__name__)
e.set_downloader(self.downloader)
try:
events = e.extract(content, url, url_human, default_values, published)
if events is not None:
if len(events) > 0:
return events
except Exception as ex:
if first and FacebookEventExtractor.is_known_url(url) and self.downloader.support_2nd_extract and e.has_2nd_method:
logger.info('Using cookie trick on a facebook event')
e.prepare_2nd_extract()
return self.process(url, url_human, cache, default_values, published, False)
return None