feat(job): source from current website api ; chore: deploy to prod
This commit is contained in:
@@ -9,6 +9,30 @@ from model import Model
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
UA = 'Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0'
|
||||
|
||||
def yield_from_joinmbz():
|
||||
# load from jmbz api
|
||||
import requests
|
||||
url = 'https://instances.joinmobilizon.org/api/v1/instances?start=0&count=1000'
|
||||
joinres = requests.get(url, headers={'user-agent': UA}).json().get('data')
|
||||
# load from db
|
||||
with sqlite3.connect(f'{os.environ.get("DATADIR")}/sqlpage.db', timeout=15.0, isolation_level='IMMEDIATE') as db:
|
||||
rows = db.execute('select rowid, domain from instances').fetchall()
|
||||
max_rowid = max(row[0] for row in rows)
|
||||
domains = {row[1] for row in rows}
|
||||
for r in joinres:
|
||||
if r['host'] not in domains:
|
||||
logging.debug(f'''new: {r['id']}, {r['host']}''')
|
||||
# insert new records
|
||||
db.execute('''insert into instances(rowid, domain, failure, "createdAt") values (?,?,?,?)''', (
|
||||
r['id']
|
||||
, r['host']
|
||||
, -1
|
||||
, int(time.time())
|
||||
))
|
||||
yield from yield_from_db()
|
||||
|
||||
|
||||
def yield_from_db():
|
||||
with sqlite3.connect(f'file:{os.environ.get("DATADIR")}/sqlpage.db?mode=ro', uri=True) as db:
|
||||
@@ -16,13 +40,19 @@ def yield_from_db():
|
||||
yield row
|
||||
|
||||
def yield_tests():
|
||||
#yield (105, 'keskonfai.fr',)
|
||||
yield (105, 'keskonfai.fr',)
|
||||
yield (7, 'mobilizon.fr',)
|
||||
|
||||
SOURCES = {
|
||||
"test": yield_tests,
|
||||
"local": yield_from_db,
|
||||
"remote": yield_from_joinmbz,
|
||||
}
|
||||
|
||||
class Fedator(Spider):
|
||||
name = "fedator"
|
||||
custom_settings = {
|
||||
"USER_AGENT": "Mozilla/5.0 (X11; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0",
|
||||
"USER_AGENT": UA,
|
||||
"ROBOTSTXT_OBEY": False,
|
||||
"REFERER_ENABLED": False,
|
||||
"COOKIES_ENABLED": False,
|
||||
@@ -40,9 +70,8 @@ class Fedator(Spider):
|
||||
gql_query = open('src/query.gql', 'r').read()
|
||||
from json import dumps
|
||||
body = dumps({"query": gql_query})
|
||||
# bbody = bytes(body, encoding='utf-8')
|
||||
# sys.stdout.buffer.write(bbody)
|
||||
for row in yield_tests():
|
||||
source = SOURCES.get(os.environ.get("SOURCE","test"))
|
||||
for row in source():
|
||||
domain = row[1]
|
||||
yield Request(
|
||||
url = f"https://{domain}/api",
|
||||
@@ -55,7 +84,7 @@ class Fedator(Spider):
|
||||
res:Model = Model.model_validate_json(response.body)
|
||||
meta = response.request.meta
|
||||
instance_id = meta.get('record')[0]
|
||||
with sqlite3.connect(f'file:{os.environ.get("DATADIR")}/sqlpage.db', timeout=15.0, isolation_level='IMMEDIATE') as db:
|
||||
with sqlite3.connect(f'{os.environ.get("DATADIR")}/sqlpage.db', timeout=15.0, isolation_level='IMMEDIATE') as db:
|
||||
# insert new stats
|
||||
stats = res.data.statistics
|
||||
db.execute('''insert into stats(insertedAt, instance_id, users, local_groups, total_groups, local_events, total_events, local_comments, total_comments, following, followers) values (?,?,?,?,?,?,?,?,?,?,?)
|
||||
|
10
src/model.py
10
src/model.py
@@ -7,12 +7,12 @@ from pydantic import BaseModel, Field
|
||||
|
||||
class Config(BaseModel):
|
||||
country_code: Any = Field(..., alias='countryCode')
|
||||
description: str
|
||||
description: Optional[str] = None
|
||||
languages: List[str]
|
||||
long_description: str = Field(..., alias='longDescription')
|
||||
long_description: Optional[str] = Field(..., alias='longDescription')
|
||||
name: str
|
||||
registrations_open: bool = Field(..., alias='registrationsOpen')
|
||||
slogan: str
|
||||
slogan: Optional[str] = None
|
||||
version: str
|
||||
|
||||
|
||||
@@ -20,8 +20,8 @@ class Statistics(BaseModel):
|
||||
number_of_comments: int = Field(..., alias='numberOfComments')
|
||||
number_of_events: int = Field(..., alias='numberOfEvents')
|
||||
number_of_groups: int = Field(..., alias='numberOfGroups')
|
||||
number_of_instance_followers: int = Field(..., alias='numberOfInstanceFollowers')
|
||||
number_of_instance_followings: int = Field(..., alias='numberOfInstanceFollowings')
|
||||
number_of_instance_followers: Optional[int] = Field(..., alias='numberOfInstanceFollowers')
|
||||
number_of_instance_followings: Optional[int] = Field(..., alias='numberOfInstanceFollowings')
|
||||
number_of_local_comments: int = Field(..., alias='numberOfLocalComments')
|
||||
number_of_local_events: int = Field(..., alias='numberOfLocalEvents')
|
||||
number_of_local_groups: int = Field(..., alias='numberOfLocalGroups')
|
||||
|
Reference in New Issue
Block a user