On ajoute un referer pour toutes les requêtes
This commit is contained in:
parent
9c9abd27dd
commit
becce291af
@ -7,6 +7,7 @@ from bs4 import BeautifulSoup
|
|||||||
# URL pour les humains: https://lacomediedeclermont.com/saison23-24/
|
# URL pour les humains: https://lacomediedeclermont.com/saison23-24/
|
||||||
class CExtractor(TwoStepsExtractor):
|
class CExtractor(TwoStepsExtractor):
|
||||||
nom_lieu = "La Comédie de Clermont"
|
nom_lieu = "La Comédie de Clermont"
|
||||||
|
url_referer = "https://lacomediedeclermont.com/saison23-24/"
|
||||||
|
|
||||||
def category_comedie2agenda(self, category):
|
def category_comedie2agenda(self, category):
|
||||||
mapping = {
|
mapping = {
|
||||||
|
@ -39,19 +39,19 @@ class SimpleDownloader(Downloader):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
def download(self, url, referer=None, post=None):
|
def download(self, url, referer=None, post=None):
|
||||||
print("Downloading {}".format(url))
|
print("Downloading {} referer: {} post: {}".format(url, referer, post))
|
||||||
try:
|
try:
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0",
|
||||||
|
}
|
||||||
|
if referer is not None:
|
||||||
|
headers["Referer"] = referer
|
||||||
|
req = Request(url, headers=headers)
|
||||||
if post:
|
if post:
|
||||||
post_args = urlencode(post).encode("utf-8")
|
post_args = urlencode(post).encode("utf-8")
|
||||||
headers = {
|
|
||||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0",
|
|
||||||
}
|
|
||||||
if referer is not None:
|
|
||||||
headers["Referer"] = referer
|
|
||||||
req = Request(url, headers=headers)
|
|
||||||
resource = urllib.request.urlopen(req, post_args)
|
resource = urllib.request.urlopen(req, post_args)
|
||||||
else:
|
else:
|
||||||
resource = urllib.request.urlopen(url)
|
resource = urllib.request.urlopen(req)
|
||||||
data = resource.read().decode(resource.headers.get_content_charset())
|
data = resource.read().decode(resource.headers.get_content_charset())
|
||||||
return data
|
return data
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -47,6 +47,8 @@ class GGCalendar:
|
|||||||
# - then for each document downloaded from these urls, build the events
|
# - then for each document downloaded from these urls, build the events
|
||||||
# This class is an abstract class
|
# This class is an abstract class
|
||||||
class TwoStepsExtractor(Extractor):
|
class TwoStepsExtractor(Extractor):
|
||||||
|
url_referer=None
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.event_urls = None
|
self.event_urls = None
|
||||||
|
@ -13,7 +13,7 @@ class URL2Events:
|
|||||||
def process(
|
def process(
|
||||||
self, url, url_human=None, cache=None, default_values=None, published=False
|
self, url, url_human=None, cache=None, default_values=None, published=False
|
||||||
):
|
):
|
||||||
content = self.downloader.get_content(url, cache)
|
content = self.downloader.get_content(url, cache, referer=self.extractor.url_referer)
|
||||||
|
|
||||||
if content is None:
|
if content is None:
|
||||||
return None
|
return None
|
||||||
|
Loading…
x
Reference in New Issue
Block a user