Première intégration d'un événement FB
This commit is contained in:
		@@ -5,7 +5,7 @@ WORKDIR /usr/src/app
 | 
			
		||||
 | 
			
		||||
RUN --mount=type=cache,target=/var/cache/apt \
 | 
			
		||||
	apt-get update && \
 | 
			
		||||
    apt-get install --no-install-recommends -y build-essential libpq-dev gettext chromium-driver \
 | 
			
		||||
    apt-get install --no-install-recommends -y build-essential libpq-dev gettext chromium-driver  \
 | 
			
		||||
    && rm -rf /var/lib/apt/lists/*
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -3,4 +3,4 @@
 | 
			
		||||
set -o errexit
 | 
			
		||||
set -o nounset
 | 
			
		||||
 | 
			
		||||
celery -A "$APP_NAME" worker -l info
 | 
			
		||||
python3 /usr/local/lib/python3.11/site-packages/watchdog/watchmedo.py auto-restart -d agenda_culturel -p '*.py' --recursive -- celery -A "$APP_NAME" worker -l info
 | 
			
		||||
 
 | 
			
		||||
@@ -45,6 +45,7 @@ services:
 | 
			
		||||
    volumes:
 | 
			
		||||
        - redis_data:/data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  celery-worker: &celery-worker
 | 
			
		||||
    container_name: "${APP_NAME}-celery-worker"
 | 
			
		||||
    build:
 | 
			
		||||
 
 | 
			
		||||
@@ -6,6 +6,7 @@ from celery.utils.log import get_task_logger
 | 
			
		||||
 | 
			
		||||
from .extractors import ExtractorAllURLs 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Set the default Django settings module for the 'celery' program.
 | 
			
		||||
APP_ENV = os.getenv("APP_ENV", "dev")
 | 
			
		||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", f"agenda_culturel.settings.{APP_ENV}")
 | 
			
		||||
@@ -27,15 +28,25 @@ app.autodiscover_tasks()
 | 
			
		||||
 | 
			
		||||
@app.task(bind=True)
 | 
			
		||||
def create_event_from_submission(self, url):
 | 
			
		||||
    from agenda_culturel.models import Event
 | 
			
		||||
 | 
			
		||||
    logger.info(f"{url=}")
 | 
			
		||||
    try:
 | 
			
		||||
        logger.info("About to create event from submission")
 | 
			
		||||
        events = ExtractorAllURLs.extract(url)
 | 
			
		||||
        # TODO
 | 
			
		||||
    except BadHeaderError:
 | 
			
		||||
        logger.info("BadHeaderError")
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(e)
 | 
			
		||||
 | 
			
		||||
    if len(Event.objects.filter(reference_urls__contains=[url])) != 0:
 | 
			
		||||
        logger.info("Already known url: ", url)
 | 
			
		||||
    else:
 | 
			
		||||
        try:
 | 
			
		||||
            logger.info("About to create event from submission")
 | 
			
		||||
            events = ExtractorAllURLs.extract(url)
 | 
			
		||||
 | 
			
		||||
            if events != None:
 | 
			
		||||
                for e in events:
 | 
			
		||||
                    e.save()
 | 
			
		||||
 | 
			
		||||
        except BadHeaderError:
 | 
			
		||||
            logger.info("BadHeaderError")
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(e)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
app.conf.timezone = "Europe/Paris"
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
from abc import ABC, abstractmethod
 | 
			
		||||
#from .models import Event
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from selenium import webdriver
 | 
			
		||||
from selenium.webdriver.chrome.service import Service
 | 
			
		||||
@@ -8,6 +8,8 @@ from selenium.webdriver.chrome.options import Options
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from celery.utils.log import get_task_logger
 | 
			
		||||
@@ -67,21 +69,27 @@ class ExtractorFacebook(Extractor):
 | 
			
		||||
                            return v
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
    def extract(url):
 | 
			
		||||
        txt = Extractor.download(url)
 | 
			
		||||
        if txt is None:
 | 
			
		||||
            logger.error("Cannot download " + url)
 | 
			
		||||
            return None
 | 
			
		||||
        else:
 | 
			
		||||
            soup = BeautifulSoup(txt, "html.parser")
 | 
			
		||||
            for json_script in soup.find_all('script', type="application/json"):
 | 
			
		||||
                json_txt = json_script.get_text()
 | 
			
		||||
                json_struct = json.loads(json_txt)
 | 
			
		||||
                fevent = ExtractorFacebook.FacebookEvent.find_event_in_array(json_struct)
 | 
			
		||||
                if fevent != None:
 | 
			
		||||
                    logger.info(str(fevent))
 | 
			
		||||
                    result = "TODO"
 | 
			
		||||
                    return result
 | 
			
		||||
 | 
			
		||||
        def build_event(self, url):
 | 
			
		||||
            from .models import Event
 | 
			
		||||
            # TODO
 | 
			
		||||
            return Event(title=self.data["name"], 
 | 
			
		||||
                        status=Event.STATUS.DRAFT,
 | 
			
		||||
                        start_day=datetime.fromtimestamp(self.data["start_timestamp"]),
 | 
			
		||||
                        reference_urls=[url])
 | 
			
		||||
 | 
			
		||||
    def process_page(txt, url):
 | 
			
		||||
 | 
			
		||||
        soup = BeautifulSoup(txt, "html.parser")
 | 
			
		||||
        for json_script in soup.find_all('script', type="application/json"):
 | 
			
		||||
            json_txt = json_script.get_text()
 | 
			
		||||
            json_struct = json.loads(json_txt)
 | 
			
		||||
            fevent = ExtractorFacebook.FacebookEvent.find_event_in_array(json_struct)
 | 
			
		||||
            if fevent != None:
 | 
			
		||||
                logger.info(str(fevent.data))
 | 
			
		||||
 | 
			
		||||
                result = fevent.build_event(url)
 | 
			
		||||
                return [result]
 | 
			
		||||
        
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
@@ -92,11 +100,18 @@ class ExtractorAllURLs:
 | 
			
		||||
    def extract(url):
 | 
			
		||||
        logger.info("Run extraction")
 | 
			
		||||
 | 
			
		||||
        result = ExtractorFacebook.extract(url)
 | 
			
		||||
        txt = Extractor.download(url)
 | 
			
		||||
        if txt is None:
 | 
			
		||||
            logger.info("Cannot download url")
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        if result is None:
 | 
			
		||||
        result = ExtractorFacebook.process_page(txt, url)
 | 
			
		||||
 | 
			
		||||
        if result is not None:
 | 
			
		||||
            return result
 | 
			
		||||
        else:
 | 
			
		||||
            logger.info("Not a Facebook link")
 | 
			
		||||
            # add here other extrators
 | 
			
		||||
            pass
 | 
			
		||||
 | 
			
		||||
        return result
 | 
			
		||||
        # TODO: add here other extrators
 | 
			
		||||
 | 
			
		||||
        return None
 | 
			
		||||
 
 | 
			
		||||
@@ -1 +1,2 @@
 | 
			
		||||
<h1>{{ object.title }}</h1>
 | 
			
		||||
<p>Date : {{ object.start_day }}</p>
 | 
			
		||||
@@ -10,7 +10,7 @@ from .views import *
 | 
			
		||||
urlpatterns = [
 | 
			
		||||
    path("", EventListView.as_view(), name="home"),
 | 
			
		||||
    re_path(r'^(?P<mode>' + '|'.join([dm.value for dm in DisplayModes]) + ')/$', view_interval, name='view_interval'),
 | 
			
		||||
    path("event/<pk>-<extra>", EventDetailView.as_view(), name="view_event"),
 | 
			
		||||
    path("event/<int:pk>-<extra>", EventDetailView.as_view(), name="view_event"),
 | 
			
		||||
    path("proposer", EventSubmissionFormView.as_view(), name="event_submission_form"),
 | 
			
		||||
    path("admin/", admin.site.urls),
 | 
			
		||||
    path("test_app/", include("test_app.urls")),
 | 
			
		||||
 
 | 
			
		||||
@@ -22,3 +22,4 @@ redis==4.5.5
 | 
			
		||||
whitenoise==6.4.0
 | 
			
		||||
selenium==4.14.0
 | 
			
		||||
BeautifulSoup4==4.12.2
 | 
			
		||||
watchdog==3.0.0
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user