On ajoute un referer pour toutes les requêtes
This commit is contained in:
		@@ -7,6 +7,7 @@ from bs4 import BeautifulSoup
 | 
			
		||||
# URL pour les humains: https://lacomediedeclermont.com/saison23-24/
 | 
			
		||||
class CExtractor(TwoStepsExtractor):
 | 
			
		||||
    nom_lieu = "La Comédie de Clermont"
 | 
			
		||||
    url_referer = "https://lacomediedeclermont.com/saison23-24/"
 | 
			
		||||
 | 
			
		||||
    def category_comedie2agenda(self, category):
 | 
			
		||||
        mapping = {
 | 
			
		||||
 
 | 
			
		||||
@@ -39,19 +39,19 @@ class SimpleDownloader(Downloader):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
 | 
			
		||||
    def download(self, url, referer=None, post=None):
 | 
			
		||||
        print("Downloading {}".format(url))
 | 
			
		||||
        print("Downloading {} referer: {} post: {}".format(url, referer, post))
 | 
			
		||||
        try:
 | 
			
		||||
            headers = {
 | 
			
		||||
                "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0",
 | 
			
		||||
            }
 | 
			
		||||
            if referer is not None:
 | 
			
		||||
                headers["Referer"] = referer
 | 
			
		||||
            req = Request(url, headers=headers)
 | 
			
		||||
            if post:
 | 
			
		||||
                post_args = urlencode(post).encode("utf-8")
 | 
			
		||||
                headers = {
 | 
			
		||||
                    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0",
 | 
			
		||||
                }
 | 
			
		||||
                if referer is not None:
 | 
			
		||||
                    headers["Referer"] = referer
 | 
			
		||||
                req = Request(url, headers=headers)
 | 
			
		||||
                resource = urllib.request.urlopen(req, post_args)
 | 
			
		||||
            else:
 | 
			
		||||
                resource = urllib.request.urlopen(url)
 | 
			
		||||
                resource = urllib.request.urlopen(req)
 | 
			
		||||
            data = resource.read().decode(resource.headers.get_content_charset())
 | 
			
		||||
            return data
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
 
 | 
			
		||||
@@ -47,6 +47,8 @@ class GGCalendar:
 | 
			
		||||
# - then for each document downloaded from these urls, build the events
 | 
			
		||||
# This class is an abstract class
 | 
			
		||||
class TwoStepsExtractor(Extractor):
 | 
			
		||||
    url_referer=None
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
        self.event_urls = None
 | 
			
		||||
 
 | 
			
		||||
@@ -13,7 +13,7 @@ class URL2Events:
 | 
			
		||||
    def process(
 | 
			
		||||
        self, url, url_human=None, cache=None, default_values=None, published=False
 | 
			
		||||
    ):
 | 
			
		||||
        content = self.downloader.get_content(url, cache)
 | 
			
		||||
        content = self.downloader.get_content(url, cache, referer=self.extractor.url_referer)
 | 
			
		||||
 | 
			
		||||
        if content is None:
 | 
			
		||||
            return None
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user