Ajout d'un détecteur de pages pas complètement chargées
This commit is contained in:
parent
bb2a6b04e5
commit
7648d1d794
@ -17,30 +17,28 @@ logger = logging.getLogger(__name__)
|
|||||||
# such as https://www.facebook.com/laJeteeClermont/events
|
# such as https://www.facebook.com/laJeteeClermont/events
|
||||||
class CExtractor(TwoStepsExtractor):
|
class CExtractor(TwoStepsExtractor):
|
||||||
|
|
||||||
def find_event_id_fragment_in_array(self, array, first=True):
|
def find_event_id_fragment_in_array(self, array):
|
||||||
found = False
|
|
||||||
if isinstance(array, dict):
|
if isinstance(array, dict):
|
||||||
if "__typename" in array and array["__typename"] == "Event" and "id" in array:
|
if "__typename" in array and array["__typename"] == "Event" and "id" in array:
|
||||||
self.add_event_url("https://www.facebook.com/events/" + array["id"] + "/")
|
self.add_event_url("https://www.facebook.com/events/" + array["id"] + "/")
|
||||||
found = True
|
self.found = True
|
||||||
if not found:
|
else:
|
||||||
for k in array:
|
for k in array:
|
||||||
found = self.find_event_id_fragment_in_array(array[k], False) or found
|
if k == "pageItems":
|
||||||
|
self.has_page_items = True
|
||||||
|
self.find_event_id_fragment_in_array(array[k])
|
||||||
elif isinstance(array, list):
|
elif isinstance(array, list):
|
||||||
for e in array:
|
for e in array:
|
||||||
found = self.find_event_id_fragment_in_array(e, False) or found
|
self.find_event_id_fragment_in_array(e)
|
||||||
return found
|
|
||||||
|
|
||||||
|
|
||||||
def find_in_js(self, soup):
|
def find_in_js(self, soup):
|
||||||
found = False
|
|
||||||
|
|
||||||
for json_script in soup.find_all("script", type="application/json"):
|
for json_script in soup.find_all("script", type="application/json"):
|
||||||
json_txt = json_script.get_text()
|
json_txt = json_script.get_text()
|
||||||
json_struct = json.loads(json_txt)
|
json_struct = json.loads(json_txt)
|
||||||
found = self.find_event_id_fragment_in_array(json_struct) or found
|
self.find_event_id_fragment_in_array(json_struct)
|
||||||
|
|
||||||
return found
|
|
||||||
|
|
||||||
|
|
||||||
def build_event_url_list(self, content):
|
def build_event_url_list(self, content):
|
||||||
@ -48,16 +46,20 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
|
|
||||||
debug = False
|
debug = False
|
||||||
|
|
||||||
found = False
|
self.found = False
|
||||||
links = soup.find_all("a")
|
links = soup.find_all("a")
|
||||||
for link in links:
|
for link in links:
|
||||||
if link.get("href").startswith('https://www.facebook.com/events/'):
|
if link.get("href").startswith('https://www.facebook.com/events/'):
|
||||||
self.add_event_url(link.get('href').split('?')[0])
|
self.add_event_url(link.get('href').split('?')[0])
|
||||||
found = True
|
self.found = True
|
||||||
|
|
||||||
found = self.find_in_js(soup) or found
|
self.has_page_items = False
|
||||||
|
self.find_in_js(soup)
|
||||||
|
|
||||||
if not found and debug:
|
if not self.has_page_items:
|
||||||
|
raise Exception(_("the page was not yet populated with events, so the loading time was probably too short"))
|
||||||
|
|
||||||
|
if not self.found and debug:
|
||||||
directory = "errors/"
|
directory = "errors/"
|
||||||
if not os.path.exists(directory):
|
if not os.path.exists(directory):
|
||||||
os.makedirs(directory)
|
os.makedirs(directory)
|
||||||
|
@ -8,7 +8,7 @@ msgid ""
|
|||||||
msgstr ""
|
msgstr ""
|
||||||
"Project-Id-Version: agenda_culturel\n"
|
"Project-Id-Version: agenda_culturel\n"
|
||||||
"Report-Msgid-Bugs-To: \n"
|
"Report-Msgid-Bugs-To: \n"
|
||||||
"POT-Creation-Date: 2025-02-07 10:17+0100\n"
|
"POT-Creation-Date: 2025-02-07 11:55+0100\n"
|
||||||
"PO-Revision-Date: 2023-10-29 14:16+0000\n"
|
"PO-Revision-Date: 2023-10-29 14:16+0000\n"
|
||||||
"Last-Translator: Jean-Marie Favreau <jeanmarie.favreau@free.fr>\n"
|
"Last-Translator: Jean-Marie Favreau <jeanmarie.favreau@free.fr>\n"
|
||||||
"Language-Team: Jean-Marie Favreau <jeanmarie.favreau@free.fr>\n"
|
"Language-Team: Jean-Marie Favreau <jeanmarie.favreau@free.fr>\n"
|
||||||
@ -352,7 +352,15 @@ msgstr "Informations"
|
|||||||
msgid "Add a comment"
|
msgid "Add a comment"
|
||||||
msgstr "Ajouter un commentaire"
|
msgstr "Ajouter un commentaire"
|
||||||
|
|
||||||
#: agenda_culturel/import_tasks/custom_extractors/fbevents.py:99
|
#: agenda_culturel/import_tasks/custom_extractors/fbevents.py:60
|
||||||
|
msgid ""
|
||||||
|
"the page was not yet populated with events, so the loading time was probably "
|
||||||
|
"too short"
|
||||||
|
msgstr ""
|
||||||
|
"la page n'était pas encore peuplée des événements, le temps de chargement a "
|
||||||
|
"sans doute été trop court"
|
||||||
|
|
||||||
|
#: agenda_culturel/import_tasks/custom_extractors/fbevents.py:101
|
||||||
msgid "Cannot get Facebook event from {}"
|
msgid "Cannot get Facebook event from {}"
|
||||||
msgstr "Impossible de récupérer un événement Facebook depuis {}"
|
msgstr "Impossible de récupérer un événement Facebook depuis {}"
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user