On récupère l'erreur d'exécution par catch de l'exception

This commit is contained in:
Jean-Marie Favreau 2025-02-08 11:27:23 +01:00
parent 90b27505c7
commit a4f1af4d63
3 changed files with 20 additions and 21 deletions

View File

@ -236,7 +236,6 @@ class FacebookEventExtractor(Extractor):
self.has_2nd_method = True
def prepare_2nd_extract_dler(downloader):
logger.warning("prepare_2nd_extract_dler")
if downloader.support_2nd_extract:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
@ -245,11 +244,8 @@ class FacebookEventExtractor(Extractor):
path = './/div[not(@aria-hidden)]/div[@aria-label="Allow all cookies"]'
element = WebDriverWait(downloader.driver, 10).until(EC.visibility_of_element_located((By.XPATH, path)))
button = downloader.driver.find_element(By.XPATH, path)
logger.warning("button")
logger.warning(button)
button.click()
t.sleep(3)
logger.warning(downloader.driver.page_source)
def prepare_2nd_extract(self):
FacebookEventExtractor.prepare_2nd_extract_dler(self.downloader)

View File

@ -253,18 +253,20 @@ class TwoStepsExtractor(Extractor):
raise Exception(msg)
else:
# then extract event information from this html document
self.add_event_from_content(
content_event, event_url, url_human, default_values, published
)
# some website (FB) sometime need a second step
if first and len(self.events) == 0 and self.has_2nd_method_in_list and self.downloader.support_2nd_extract:
first = False
self.prepare_2nd_extract_in_list()
content_event = self.downloader.get_content(event_url)
if not content_event is None:
self.add_event_from_content(
content_event, event_url, url_human, default_values, published
)
try:
self.add_event_from_content(
content_event, event_url, url_human, default_values, published
)
except Exception as e:
# some website (FB) sometime need a second step
if first and len(self.events) == 0 and self.has_2nd_method_in_list and self.downloader.support_2nd_extract:
first = False
self.prepare_2nd_extract_in_list()
content_event = self.downloader.get_content(event_url)
if not content_event is None:
self.add_event_from_content(
content_event, event_url, url_human, default_values, published
)
return self.get_structure()

View File

@ -37,11 +37,12 @@ class URL2Events:
for e in Extractor.get_default_extractors(self.single_event):
logger.warning('Extractor::' + type(e).__name__)
e.set_downloader(self.downloader)
events = e.extract(content, url, url_human, default_values, published)
if events is not None:
if len(events) > 0:
return events
else:
try:
events = e.extract(content, url, url_human, default_values, published)
if events is not None:
if len(events) > 0:
return events
except Exception as e:
logger.warning("cas sans event")
if first and FacebookEventExtractor.is_known_url(url) and self.downloader.support_2nd_extract and e.has_2nd_method:
logger.warning("on avance")