61 lines
2.2 KiB
Python
61 lines
2.2 KiB
Python
from .downloader import *
|
|
from .extractor import *
|
|
from .extractor_facebook import FacebookEventExtractor
|
|
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
class URL2Events:
|
|
def __init__(
|
|
self, downloader=SimpleDownloader(), extractor=None, single_event=False
|
|
):
|
|
self.downloader = downloader
|
|
self.extractor = extractor
|
|
self.single_event = single_event
|
|
|
|
def process(
|
|
self, url, url_human=None, cache=None, default_values=None, published=False,
|
|
first=True
|
|
):
|
|
referer = ""
|
|
data = None
|
|
content_type = None
|
|
content = None
|
|
if self.extractor:
|
|
referer = self.extractor.url_referer
|
|
data = self.extractor.data
|
|
content_type = self.extractor.content_type
|
|
if self.extractor.no_downloader:
|
|
content = ''
|
|
|
|
if content is None:
|
|
content = self.downloader.get_content(url, cache, referer=referer, content_type=content_type, data=data)
|
|
|
|
if content is None:
|
|
return None
|
|
|
|
if self.extractor is not None:
|
|
self.extractor.set_downloader(self.downloader)
|
|
return self.extractor.extract(
|
|
content, url, url_human, default_values, published
|
|
)
|
|
else:
|
|
# if the extractor is not defined, use a list of default extractors
|
|
for e in Extractor.get_default_extractors(self.single_event):
|
|
logger.warning('Extractor::' + type(e).__name__)
|
|
e.set_downloader(self.downloader)
|
|
try:
|
|
events = e.extract(content, url, url_human, default_values, published)
|
|
if events is not None:
|
|
if len(events) > 0:
|
|
return events
|
|
except Exception as ex:
|
|
if first and FacebookEventExtractor.is_known_url(url) and self.downloader.support_2nd_extract and e.has_2nd_method:
|
|
logger.info('Using cookie trick on a facebook event')
|
|
e.prepare_2nd_extract()
|
|
return self.process(url, url_human, cache, default_values, published, False)
|
|
return None
|