From 623274b1a64295f302388539c3a7434b87027e09 Mon Sep 17 00:00:00 2001 From: Jean-Marie Favreau Date: Wed, 26 Jun 2024 10:45:56 +0200 Subject: [PATCH] =?UTF-8?q?-=20Fix=20crash=20r=C3=A9cup=C3=A9ration=20?= =?UTF-8?q?=C3=A9v=C3=A9nement=20FB=20-=20Tentative=20d'acc=C3=A9l=C3=A9ra?= =?UTF-8?q?tion=20quand=20on=20utilise=20Selenium=20(objet=20cr=C3=A9?= =?UTF-8?q?=C3=A9=20au=20chargement=20de=20Celery)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/agenda_culturel/celery.py | 4 +++- src/agenda_culturel/import_tasks/downloader.py | 6 +++--- src/agenda_culturel/import_tasks/extractor.py | 1 + src/agenda_culturel/import_tasks/importer.py | 5 ++++- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/agenda_culturel/celery.py b/src/agenda_culturel/celery.py index 09c0b8e..efb5bd6 100644 --- a/src/agenda_culturel/celery.py +++ b/src/agenda_culturel/celery.py @@ -20,6 +20,8 @@ app = Celery("agenda_culturel") logger = get_task_logger(__name__) +chromiumDownloader = ChromiumHeadlessDownloader() + # Using a string here means the worker doesn't have to serialize # the configuration object to child processes. @@ -96,7 +98,7 @@ def run_recurrent_import(self, pk): downloader = ( SimpleDownloader() if rimport.downloader == RecurrentImport.DOWNLOADER.SIMPLE - else ChromiumHeadlessDownloader() + else chromiumDownloader ) if rimport.processor == RecurrentImport.PROCESSOR.ICAL: extractor = ICALExtractor() diff --git a/src/agenda_culturel/import_tasks/downloader.py b/src/agenda_culturel/import_tasks/downloader.py index c6a9cdf..3a4776d 100644 --- a/src/agenda_culturel/import_tasks/downloader.py +++ b/src/agenda_culturel/import_tasks/downloader.py @@ -67,14 +67,14 @@ class ChromiumHeadlessDownloader(Downloader): self.options.add_argument("--disable-dev-shm-usage") self.options.add_argument("--no-sandbox") self.service = Service("/usr/bin/chromedriver") + self.driver = webdriver.Chrome(service=self.service, options=self.options) - def download(self, url, post=None): + def download(self, url, referer=None, post=None): if post: raise Exception("POST method with Chromium headless not yet implemented") print("Download {}".format(url)) - self.driver = webdriver.Chrome(service=self.service, options=self.options) self.driver.get(url) doc = self.driver.page_source - self.driver.quit() + self.driver.close() return doc diff --git a/src/agenda_culturel/import_tasks/extractor.py b/src/agenda_culturel/import_tasks/extractor.py index 7870a7d..ccb4380 100644 --- a/src/agenda_culturel/import_tasks/extractor.py +++ b/src/agenda_culturel/import_tasks/extractor.py @@ -16,6 +16,7 @@ class Extractor(ABC): self.header = {} self.events = [] self.downloader = None + self.referer = "" def guess_end_day(self, start_day, start_time, end_time): if end_time: diff --git a/src/agenda_culturel/import_tasks/importer.py b/src/agenda_culturel/import_tasks/importer.py index 960f93c..753381f 100644 --- a/src/agenda_culturel/import_tasks/importer.py +++ b/src/agenda_culturel/import_tasks/importer.py @@ -13,7 +13,10 @@ class URL2Events: def process( self, url, url_human=None, cache=None, default_values=None, published=False ): - content = self.downloader.get_content(url, cache, referer=self.extractor.url_referer) + referer = "" + if self.extractor: + referer = self.extractor.url_referer + content = self.downloader.get_content(url, cache, referer=referer) if content is None: return None