From a21b9d030e93b4cf016657d83ff333cea8fece55 Mon Sep 17 00:00:00 2001 From: Jean-Marie Favreau Date: Sun, 15 Oct 2023 16:22:52 +0200 Subject: [PATCH] =?UTF-8?q?Ajout=20d'un=20script=20qui=20arrive=20=C3=A0?= =?UTF-8?q?=20r=C3=A9cup=C3=A9rer=20les=20infos=20d'un=20=C3=A9v=C3=A9neme?= =?UTF-8?q?nt=20depuis=20son=20URL=20facebook=20(et=20c'=C3=A9tait=20chaud?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- experimentations/get_facebook_event.py | 85 ++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100755 experimentations/get_facebook_event.py diff --git a/experimentations/get_facebook_event.py b/experimentations/get_facebook_event.py new file mode 100755 index 0000000..87b411a --- /dev/null +++ b/experimentations/get_facebook_event.py @@ -0,0 +1,85 @@ +#!/usr/bin/python3 +# coding: utf-8 + +import requests +import hashlib +import os +from selenium import webdriver +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.chrome.options import Options + +from bs4 import BeautifulSoup + +import json + +class Event: + + name = "event" + keys = ["start_time_formatted", 'start_timestamp', 'is_past', "name", "price_info", "cover_media_renderer", "event_creator", "id", "day_time_sentence", "event_place", "comet_neighboring_siblings"] + + def __init__(self, event): + self.data = event + + def __str__(self): + return self.data["name"] + + def find_event_in_array(array): + if isinstance(array, dict): + #print([k for k in array]) + if len(Event.keys) == len([k for k in Event.keys if k in array]): + return Event(array) + else: + for k in array: + v = Event.find_event_in_array(array[k]) + if v != None: + return v + elif isinstance(array, list): + for e in array: + v = Event.find_event_in_array(e) + if v != None: + return v + return None + + +#url="https://www.facebook.com/events/ical/export/?eid=2294200007432315" +url="https://www.facebook.com/events/2294199997432316/2294200007432315/" +#url_cal = "https://www.facebook.com/events/ical/export/?eid=993406668581410" +#url="https://jmtrivial.info" + +cachedir = "cache" +result = hashlib.md5(url.encode()) +hash = result.hexdigest() + +filename = os.path.join(cachedir, hash + ".html") + +if os.path.isfile(filename): + #print("Use cache") + with open(filename) as f: + doc = "\n".join(f.readlines()) +else: + print("Download page") + + options = Options() + options.add_argument("--headless=new") + service = Service("/usr/bin/chromedriver") + + driver = webdriver.Chrome(service=service, options=options) + driver.get(url) + doc = driver.page_source + driver.quit() + + dir = os.path.dirname(filename) + if not os.path.exists(dir): + os.makedirs(dir) + with open(filename, "w") as text_file: + text_file.write(doc) + + +soup = BeautifulSoup(doc) + +for json_script in soup.find_all('script', type="application/json"): + json_txt = json_script.get_text() + json_struct = json.loads(json_txt) + event = Event.find_event_in_array(json_struct) + if event != None: + print(event)