Restructuration des fichiers d'import
This commit is contained in:
parent
571a6775c4
commit
3a78972391
@ -43,7 +43,7 @@ On peut activer à la main (pour l'instant) un proxy type socket pour l'import d
|
||||
### Ajout d'une nouvelle source *custom*
|
||||
|
||||
Pour ajouter une nouvelle source custom:
|
||||
- ajouter un fichier dans ```src/agenda_culturel/import_tasks/custom_extractors``` en s'inspirant des autres fichiers présents. Il existe de nombreuses facilités dans les classes mères correspondantes
|
||||
- ajouter un fichier dans ```src/agenda_culturel/import_tasks/custom_extractors``` (ou ```src/agenda_culturel/import_tasks/generic_extractors``` s'il s'agit d'un format de source qui est réutilisable) en s'inspirant des autres fichiers présents. Il existe de nombreuses facilités dans les classes mères correspondantes
|
||||
- s'inspirer des scripts présents dans ```experimentations/``` pour créer son propre script de test
|
||||
- quand l'import fonctionne de manière indépendante dans ces expérimentations, il est tant de l'ajouter au site internet:
|
||||
- ajouter à la classe ```RecurrentImport.PROCESSOR``` présente dans le fichier ```src/agenda_culturel/models.py``` une entrée correspondant à cette source pour qu'elle soit proposée aux utilisateurs
|
||||
|
@ -21,7 +21,7 @@ sys.path.append(parent + "/src")
|
||||
from src.agenda_culturel.import_tasks.downloader import *
|
||||
from src.agenda_culturel.import_tasks.extractor import *
|
||||
from src.agenda_culturel.import_tasks.importer import *
|
||||
from src.agenda_culturel.import_tasks.extractor_facebook import *
|
||||
from src.agenda_culturel.import_tasks.generic_extractors.fbevent import *
|
||||
|
||||
|
||||
|
||||
@ -29,7 +29,7 @@ from src.agenda_culturel.import_tasks.extractor_facebook import *
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
u2e = URL2Events(ChromiumHeadlessDownloader(), FacebookEventExtractor())
|
||||
u2e = URL2Events(ChromiumHeadlessDownloader(), CExtractor())
|
||||
url="https://www.facebook.com/events/3575802569389184/3575802576055850/?active_tab=about"
|
||||
|
||||
events = u2e.process(url, cache = "fb.html", published = True)
|
||||
|
@ -21,7 +21,7 @@ sys.path.append(parent + "/src")
|
||||
from src.agenda_culturel.import_tasks.downloader import *
|
||||
from src.agenda_culturel.import_tasks.extractor import *
|
||||
from src.agenda_culturel.import_tasks.importer import *
|
||||
from src.agenda_culturel.import_tasks.custom_extractors import *
|
||||
from src.agenda_culturel.import_tasks.generic_extractors import *
|
||||
|
||||
|
||||
|
||||
|
@ -21,7 +21,7 @@ sys.path.append(parent + "/src")
|
||||
from src.agenda_culturel.import_tasks.downloader import *
|
||||
from src.agenda_culturel.import_tasks.extractor import *
|
||||
from src.agenda_culturel.import_tasks.importer import *
|
||||
from src.agenda_culturel.import_tasks.extractor_ical import *
|
||||
from src.agenda_culturel.import_tasks.generic_extractors.ical import *
|
||||
|
||||
|
||||
|
||||
|
@ -21,7 +21,7 @@ sys.path.append(parent + "/src")
|
||||
from src.agenda_culturel.import_tasks.downloader import *
|
||||
from src.agenda_culturel.import_tasks.extractor import *
|
||||
from src.agenda_culturel.import_tasks.importer import *
|
||||
from src.agenda_culturel.import_tasks.custom_extractors import *
|
||||
from src.agenda_culturel.import_tasks.generic_extractors import *
|
||||
|
||||
|
||||
|
||||
|
@ -21,7 +21,7 @@ sys.path.append(parent + "/src")
|
||||
from src.agenda_culturel.import_tasks.downloader import *
|
||||
from src.agenda_culturel.import_tasks.extractor import *
|
||||
from src.agenda_culturel.import_tasks.importer import *
|
||||
from src.agenda_culturel.import_tasks.custom_extractors import *
|
||||
from src.agenda_culturel.import_tasks.generic_extractors import *
|
||||
|
||||
|
||||
|
||||
|
2
experimentations/get_mobilizon.py
Normal file → Executable file
2
experimentations/get_mobilizon.py
Normal file → Executable file
@ -21,7 +21,7 @@ sys.path.append(parent + "/src")
|
||||
from src.agenda_culturel.import_tasks.downloader import *
|
||||
from src.agenda_culturel.import_tasks.extractor import *
|
||||
from src.agenda_culturel.import_tasks.importer import *
|
||||
from src.agenda_culturel.import_tasks.custom_extractors import *
|
||||
from src.agenda_culturel.import_tasks.generic_extractors import *
|
||||
|
||||
|
||||
|
||||
|
@ -21,7 +21,7 @@ sys.path.append(parent + "/src")
|
||||
from src.agenda_culturel.import_tasks.downloader import *
|
||||
from src.agenda_culturel.import_tasks.extractor import *
|
||||
from src.agenda_culturel.import_tasks.importer import *
|
||||
from src.agenda_culturel.import_tasks.custom_extractors import *
|
||||
from src.agenda_culturel.import_tasks.generic_extractors import *
|
||||
|
||||
|
||||
|
||||
|
@ -14,8 +14,8 @@ from contextlib import contextmanager
|
||||
from .import_tasks.downloader import *
|
||||
from .import_tasks.extractor import *
|
||||
from .import_tasks.importer import *
|
||||
from .import_tasks.extractor_ical import *
|
||||
from .import_tasks.custom_extractors import *
|
||||
from .import_tasks.generic_extractors import *
|
||||
|
||||
|
||||
# Set the default Django settings module for the 'celery' program.
|
||||
@ -140,13 +140,13 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
|
||||
|
||||
|
||||
if rimport.processor == RecurrentImport.PROCESSOR.ICAL:
|
||||
extractor = ICALExtractor()
|
||||
extractor = ical.ICALExtractor()
|
||||
elif rimport.processor == RecurrentImport.PROCESSOR.ICALNOBUSY:
|
||||
extractor = ICALNoBusyExtractor()
|
||||
extractor = ical.ICALNoBusyExtractor()
|
||||
elif rimport.processor == RecurrentImport.PROCESSOR.ICALNOVC:
|
||||
extractor = ICALNoVCExtractor()
|
||||
extractor = ical.ICALNoVCExtractor()
|
||||
elif rimport.processor == RecurrentImport.PROCESSOR.ICALNAIVETZ:
|
||||
extractor = ICALNaiveTimezone()
|
||||
extractor = ical.ICALNaiveTimezone()
|
||||
elif rimport.processor == RecurrentImport.PROCESSOR.LACOOPE:
|
||||
extractor = lacoope.CExtractor()
|
||||
elif rimport.processor == RecurrentImport.PROCESSOR.LACOMEDIE:
|
||||
|
@ -1,4 +1,4 @@
|
||||
from ..generic_extractors import *
|
||||
from ..twosteps_extractor import *
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# A class dedicated to get events from Arachnée Concert
|
||||
|
@ -1,4 +1,4 @@
|
||||
from ..generic_extractors import *
|
||||
from ..twosteps_extractor import *
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import timedelta
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
from ..generic_extractors import *
|
||||
from ..twosteps_extractor import *
|
||||
import json5
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
from ..generic_extractors import *
|
||||
from ..twosteps_extractor import *
|
||||
from ..generic_extractors.ggcal_link import GGCalendar
|
||||
import re
|
||||
import json5
|
||||
from bs4 import BeautifulSoup
|
||||
|
@ -1,4 +1,4 @@
|
||||
from ..generic_extractors import *
|
||||
from ..twosteps_extractor import *
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
from ..generic_extractors import *
|
||||
from ..twosteps_extractor import *
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
from ..generic_extractors import *
|
||||
from ..twosteps_extractor import *
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# A class dedicated to get events from Le Fotomat'
|
||||
|
@ -1,4 +1,4 @@
|
||||
from ..generic_extractors import *
|
||||
from ..twosteps_extractor import *
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
from ..generic_extractors import *
|
||||
from ..twosteps_extractor import *
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime, date
|
||||
|
||||
|
@ -284,8 +284,8 @@ class Extractor(ABC):
|
||||
return {"header": self.header, "events": self.events}
|
||||
|
||||
def clean_url(url):
|
||||
from .extractor_ical import ICALExtractor
|
||||
from .extractor_facebook import FacebookEventExtractor
|
||||
from .generic_extractors.ical import ICALExtractor
|
||||
from .generic_extractors.fbevent import CExtractor as FacebookEventExtractor
|
||||
|
||||
result = url
|
||||
for e in [ICALExtractor, FacebookEventExtractor]:
|
||||
@ -293,9 +293,9 @@ class Extractor(ABC):
|
||||
return result
|
||||
|
||||
def get_default_extractors(single_event=False):
|
||||
from .extractor_ical import ICALExtractor
|
||||
from .extractor_facebook import FacebookEventExtractor
|
||||
from .extractor_ggcal_link import GoogleCalendarLinkEventExtractor
|
||||
from .generic_extractors.ical import ICALExtractor
|
||||
from .generic_extractors.fbevent import CExtractor as FacebookEventExtractor
|
||||
from .generic_extractors.ggcal_link import CExtractor as GoogleCalendarLinkEventExtractor
|
||||
|
||||
if single_event:
|
||||
return [FacebookEventExtractor(), GoogleCalendarLinkEventExtractor(), EventNotFoundExtractor()]
|
||||
|
@ -1,88 +0,0 @@
|
||||
from datetime import datetime
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from .extractor import *
|
||||
from .generic_extractors import *
|
||||
|
||||
import json
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class GoogleCalendarLinkEventExtractor(Extractor):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.possible_urls = ["https://calendar.google.com/calendar/", "https://addtocalendar.com/", "https://www.google.com/calendar/event"]
|
||||
|
||||
|
||||
def guess_image(self, soup, url):
|
||||
image = soup.find("meta", property="og:image")
|
||||
|
||||
if image is None:
|
||||
for img in soup.select('img'):
|
||||
if img.find_parent(name='nav'):
|
||||
continue
|
||||
image = img["src"]
|
||||
break
|
||||
else:
|
||||
image = image["content"]
|
||||
|
||||
if image.startswith("/"):
|
||||
root_url = "https://" + urlparse(url).netloc + "/"
|
||||
image = root_url + image
|
||||
|
||||
return image
|
||||
|
||||
|
||||
def extract(
|
||||
self, content, url, url_human=None, default_values=None, published=False
|
||||
):
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
|
||||
for ggu in self.possible_urls:
|
||||
|
||||
link_calendar = soup.select('a[href^="' + ggu + '"]')
|
||||
if len(link_calendar) != 0:
|
||||
|
||||
gg_cal = GGCalendar(link_calendar[0]["href"])
|
||||
|
||||
if gg_cal.is_valid_event():
|
||||
start_day = gg_cal.start_day
|
||||
start_time = gg_cal.start_time
|
||||
description = gg_cal.description.replace(' ', '')
|
||||
end_day = gg_cal.end_day
|
||||
end_time = gg_cal.end_time
|
||||
location = gg_cal.location
|
||||
title = gg_cal.title
|
||||
url_human = url
|
||||
|
||||
self.set_header(url)
|
||||
|
||||
image = self.guess_image(soup, url)
|
||||
|
||||
category = None
|
||||
|
||||
self.add_event(
|
||||
default_values,
|
||||
title=title,
|
||||
category=category,
|
||||
start_day=start_day,
|
||||
location=location,
|
||||
description=description,
|
||||
tags=[],
|
||||
uuids=[url],
|
||||
recurrences=None,
|
||||
url_human=url_human,
|
||||
start_time=start_time,
|
||||
end_day=end_day,
|
||||
end_time=end_time,
|
||||
published=published,
|
||||
image=image,
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
|
||||
return self.get_structure()
|
@ -0,0 +1,7 @@
|
||||
from os.path import dirname, basename, isfile, join
|
||||
import glob
|
||||
|
||||
modules = glob.glob(join(dirname(__file__), "*.py"))
|
||||
__all__ = [
|
||||
basename(f)[:-3] for f in modules if isfile(f) and not f.endswith("__init__.py")
|
||||
]
|
@ -1,4 +1,4 @@
|
||||
from ..generic_extractors import *
|
||||
from ..twosteps_extractor import *
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
|
@ -5,7 +5,7 @@ import time as t
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
|
||||
|
||||
from .extractor import *
|
||||
from ..extractor import *
|
||||
import json
|
||||
|
||||
import logging
|
||||
@ -231,7 +231,7 @@ class FacebookEvent:
|
||||
result.append(clone.build_event(url_base + nb_e.elements["id"] + "/"))
|
||||
return result
|
||||
|
||||
class FacebookEventExtractor(Extractor):
|
||||
class CExtractor(Extractor):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
@ -259,11 +259,11 @@ class FacebookEventExtractor(Extractor):
|
||||
t.sleep(5)
|
||||
|
||||
def prepare_2nd_extract(self):
|
||||
FacebookEventExtractor.prepare_2nd_extract_dler(self.downloader)
|
||||
CExtractor.prepare_2nd_extract_dler(self.downloader)
|
||||
|
||||
|
||||
def clean_url(url):
|
||||
if FacebookEventExtractor.is_known_url(url, False):
|
||||
if CExtractor.is_known_url(url, False):
|
||||
u = urlparse(url)
|
||||
result = "https://www.facebook.com" + u.path
|
||||
|
@ -1,5 +1,5 @@
|
||||
from ..generic_extractors import *
|
||||
from ..extractor_facebook import FacebookEvent, FacebookEventExtractor
|
||||
from ..twosteps_extractor import *
|
||||
from .fbevent import FacebookEvent
|
||||
import json5
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
@ -0,0 +1,158 @@
|
||||
from datetime import datetime
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from ..extractor import *
|
||||
from ..twosteps_extractor import *
|
||||
|
||||
import json
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class GGCalendar:
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
self.extract_info()
|
||||
|
||||
def filter_keys(params):
|
||||
result = {}
|
||||
|
||||
for k, v in params.items():
|
||||
if k.startswith('e[0]'):
|
||||
result[k.replace('e[0][', '')[:-1]] = v
|
||||
else:
|
||||
result[k] = v
|
||||
|
||||
return result
|
||||
|
||||
def is_valid_event(self):
|
||||
return self.start_day is not None and self.title is not None
|
||||
|
||||
def extract_info(self):
|
||||
parsed_url = urlparse(self.url.replace("#", "%23"))
|
||||
params = parse_qs(parsed_url.query)
|
||||
|
||||
params = GGCalendar.filter_keys(params)
|
||||
|
||||
self.location = params["location"][0] if "location" in params else ""
|
||||
self.title = params["text"][0] if "text" in params else params["title"][0] if "title" in params else ""
|
||||
self.description = params["description"][0] if "description" in params else params["details"][0] if "details" in params else ""
|
||||
if self.description != "":
|
||||
self.description = BeautifulSoup(self.description, "html.parser").text
|
||||
if "dates" in params:
|
||||
dates = [x.replace(" ", "+") for x in params["dates"][0].split("/")]
|
||||
if len(dates) > 0:
|
||||
date = parser.parse(dates[0])
|
||||
self.start_day = date.date()
|
||||
self.start_time = date.time()
|
||||
if len(dates) == 2:
|
||||
date = parser.parse(dates[1])
|
||||
self.end_day = date.date()
|
||||
self.end_time = date.time()
|
||||
else:
|
||||
self.end_day = None
|
||||
self.end_time = None
|
||||
elif "date_start" in params:
|
||||
date = parser.parse(params["date_start"][0])
|
||||
self.start_day = date.date()
|
||||
self.start_time = date.time()
|
||||
if "date_end" in params:
|
||||
dateend = parser.parse(params["date_end"][0])
|
||||
if dateend != date:
|
||||
self.end_day = dateend.date()
|
||||
self.end_time = dateend.time()
|
||||
else:
|
||||
self.end_day = None
|
||||
self.end_time = None
|
||||
if self.start_time == datetime.time(0):
|
||||
self.start_time = None
|
||||
|
||||
else:
|
||||
self.end_day = None
|
||||
self.end_time = None
|
||||
else:
|
||||
raise Exception("Unable to find a date in google calendar URL")
|
||||
self.start_day = None
|
||||
self.start_time = None
|
||||
self.end_day = None
|
||||
self.end_time = None
|
||||
|
||||
|
||||
|
||||
class CExtractor(Extractor):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.possible_urls = ["https://calendar.google.com/calendar/", "https://addtocalendar.com/", "https://www.google.com/calendar/event"]
|
||||
|
||||
|
||||
def guess_image(self, soup, url):
|
||||
image = soup.find("meta", property="og:image")
|
||||
|
||||
if image is None:
|
||||
for img in soup.select('img'):
|
||||
if img.find_parent(name='nav'):
|
||||
continue
|
||||
image = img["src"]
|
||||
break
|
||||
else:
|
||||
image = image["content"]
|
||||
|
||||
if image.startswith("/"):
|
||||
root_url = "https://" + urlparse(url).netloc + "/"
|
||||
image = root_url + image
|
||||
|
||||
return image
|
||||
|
||||
|
||||
def extract(
|
||||
self, content, url, url_human=None, default_values=None, published=False
|
||||
):
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
|
||||
for ggu in self.possible_urls:
|
||||
|
||||
link_calendar = soup.select('a[href^="' + ggu + '"]')
|
||||
if len(link_calendar) != 0:
|
||||
|
||||
gg_cal = GGCalendar(link_calendar[0]["href"])
|
||||
|
||||
if gg_cal.is_valid_event():
|
||||
start_day = gg_cal.start_day
|
||||
start_time = gg_cal.start_time
|
||||
description = gg_cal.description.replace(' ', '')
|
||||
end_day = gg_cal.end_day
|
||||
end_time = gg_cal.end_time
|
||||
location = gg_cal.location
|
||||
title = gg_cal.title
|
||||
url_human = url
|
||||
|
||||
self.set_header(url)
|
||||
|
||||
image = self.guess_image(soup, url)
|
||||
|
||||
category = None
|
||||
|
||||
self.add_event(
|
||||
default_values,
|
||||
title=title,
|
||||
category=category,
|
||||
start_day=start_day,
|
||||
location=location,
|
||||
description=description,
|
||||
tags=[],
|
||||
uuids=[url],
|
||||
recurrences=None,
|
||||
url_human=url_human,
|
||||
start_time=start_time,
|
||||
end_day=end_day,
|
||||
end_time=end_time,
|
||||
published=published,
|
||||
image=image,
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
|
||||
return self.get_structure()
|
@ -8,7 +8,7 @@ from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
|
||||
import pytz
|
||||
|
||||
|
||||
from .extractor import *
|
||||
from ..extractor import *
|
||||
|
||||
from celery.utils.log import get_task_logger
|
||||
|
@ -1,4 +1,4 @@
|
||||
from ..generic_extractors import *
|
||||
from ..twosteps_extractor import *
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
@ -1,4 +1,4 @@
|
||||
from ..generic_extractors import *
|
||||
from ..twosteps_extractor import *
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
from .downloader import *
|
||||
from .extractor import *
|
||||
from .extractor_facebook import FacebookEventExtractor
|
||||
from .generic_extractors.fbevent import CExtractor as FacebookEventExtractor
|
||||
|
||||
import logging
|
||||
|
||||
|
@ -14,76 +14,6 @@ from django.utils.translation import gettext_lazy as _
|
||||
from dateutil import parser
|
||||
import datetime
|
||||
|
||||
|
||||
class GGCalendar:
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
self.extract_info()
|
||||
|
||||
def filter_keys(params):
|
||||
result = {}
|
||||
|
||||
for k, v in params.items():
|
||||
if k.startswith('e[0]'):
|
||||
result[k.replace('e[0][', '')[:-1]] = v
|
||||
else:
|
||||
result[k] = v
|
||||
|
||||
return result
|
||||
|
||||
def is_valid_event(self):
|
||||
return self.start_day is not None and self.title is not None
|
||||
|
||||
def extract_info(self):
|
||||
parsed_url = urlparse(self.url.replace("#", "%23"))
|
||||
params = parse_qs(parsed_url.query)
|
||||
|
||||
params = GGCalendar.filter_keys(params)
|
||||
|
||||
self.location = params["location"][0] if "location" in params else ""
|
||||
self.title = params["text"][0] if "text" in params else params["title"][0] if "title" in params else ""
|
||||
self.description = params["description"][0] if "description" in params else params["details"][0] if "details" in params else ""
|
||||
if self.description != "":
|
||||
self.description = BeautifulSoup(self.description, "html.parser").text
|
||||
if "dates" in params:
|
||||
dates = [x.replace(" ", "+") for x in params["dates"][0].split("/")]
|
||||
if len(dates) > 0:
|
||||
date = parser.parse(dates[0])
|
||||
self.start_day = date.date()
|
||||
self.start_time = date.time()
|
||||
if len(dates) == 2:
|
||||
date = parser.parse(dates[1])
|
||||
self.end_day = date.date()
|
||||
self.end_time = date.time()
|
||||
else:
|
||||
self.end_day = None
|
||||
self.end_time = None
|
||||
elif "date_start" in params:
|
||||
date = parser.parse(params["date_start"][0])
|
||||
self.start_day = date.date()
|
||||
self.start_time = date.time()
|
||||
if "date_end" in params:
|
||||
dateend = parser.parse(params["date_end"][0])
|
||||
if dateend != date:
|
||||
self.end_day = dateend.date()
|
||||
self.end_time = dateend.time()
|
||||
else:
|
||||
self.end_day = None
|
||||
self.end_time = None
|
||||
if self.start_time == datetime.time(0):
|
||||
self.start_time = None
|
||||
|
||||
else:
|
||||
self.end_day = None
|
||||
self.end_time = None
|
||||
else:
|
||||
raise Exception("Unable to find a date in google calendar URL")
|
||||
self.start_day = None
|
||||
self.start_time = None
|
||||
self.end_day = None
|
||||
self.end_time = None
|
||||
|
||||
|
||||
# A class to extract events from URL with two steps:
|
||||
# - first build a list of urls where the events will be found
|
||||
# - then for each document downloaded from these urls, build the events
|
@ -36,7 +36,7 @@ import recurrence
|
||||
import copy
|
||||
import unicodedata
|
||||
from collections import defaultdict
|
||||
from .import_tasks.extractor_facebook import FacebookEventExtractor
|
||||
from .import_tasks.generic_extractors.fbevent import CExtractor as FacebookEventExtractor
|
||||
from .import_tasks.extractor import Extractor
|
||||
|
||||
from django.template.defaultfilters import date as _date
|
||||
|
Loading…
x
Reference in New Issue
Block a user