diff --git a/DESIGN.md b/DESIGN.md index 85efadb..3e42f6e 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -52,14 +52,16 @@ - [ ] list failed domain - [ ] rescan an instance - [ ] allow to blacklist a domain -- [ ] package - - [ ] docker image with : s6 or tini + crond + python(pyinstaller) + sqlpage + - [ ] remove duplicate stats ; keep one per day per instance +- [x] package + - [x] docker image with : s6 + crond + python + sqlpage - [ ] deploy on `beta-instances.mobilizon.org` - - [ ] DNS - - [ ] HC ping - - - + - [x] DNS -> LB3 + - [x] HC ping + - [x] upload image + - [x] docker compose config + - [x] upload database + - [x] start service # impl diff --git a/Dockerfile b/Dockerfile index 05dbee5..3930556 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ FROM python:3.13-alpine -RUN apk add --no-cache s6 bash wget unzip +RUN apk add --no-cache s6 bash wget unzip curl COPY requirements.txt . diff --git a/docker-compose.template.yml b/docker-compose.template.yml new file mode 100644 index 0000000..53f39b4 --- /dev/null +++ b/docker-compose.template.yml @@ -0,0 +1,19 @@ +# put reverse proxy in the project network + +services: + web: + restart: unless-stopped + image: mobilizon-instances:0.1.0 + environment: + SQLPAGE_ENVIRONMENT: production + SQLPAGE_WEB_ROOT: /app/webroot + SQLPAGE_DATABASE_URL: sqlite:///data/sqlpage.db + SCHEDULE: "2 5 * * *" # UTC time + HC_PING: https:/// + DATADIR: /data + SOURCE: remote # source on instances.joinmobilizon.org + volumes: + - data:/data + +volumes: + data: diff --git a/src/instances.py b/src/instances.py index 21e8e58..8b9ae26 100644 --- a/src/instances.py +++ b/src/instances.py @@ -9,6 +9,30 @@ from model import Model logger = logging.getLogger(__name__) +UA = 'Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0' + +def yield_from_joinmbz(): + # load from jmbz api + import requests + url = 'https://instances.joinmobilizon.org/api/v1/instances?start=0&count=1000' + joinres = requests.get(url, headers={'user-agent': UA}).json().get('data') + # load from db + with sqlite3.connect(f'{os.environ.get("DATADIR")}/sqlpage.db', timeout=15.0, isolation_level='IMMEDIATE') as db: + rows = db.execute('select rowid, domain from instances').fetchall() + max_rowid = max(row[0] for row in rows) + domains = {row[1] for row in rows} + for r in joinres: + if r['host'] not in domains: + logging.debug(f'''new: {r['id']}, {r['host']}''') + # insert new records + db.execute('''insert into instances(rowid, domain, failure, "createdAt") values (?,?,?,?)''', ( + r['id'] + , r['host'] + , -1 + , int(time.time()) + )) + yield from yield_from_db() + def yield_from_db(): with sqlite3.connect(f'file:{os.environ.get("DATADIR")}/sqlpage.db?mode=ro', uri=True) as db: @@ -16,13 +40,19 @@ def yield_from_db(): yield row def yield_tests(): - #yield (105, 'keskonfai.fr',) + yield (105, 'keskonfai.fr',) yield (7, 'mobilizon.fr',) +SOURCES = { + "test": yield_tests, + "local": yield_from_db, + "remote": yield_from_joinmbz, +} + class Fedator(Spider): name = "fedator" custom_settings = { - "USER_AGENT": "Mozilla/5.0 (X11; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0", + "USER_AGENT": UA, "ROBOTSTXT_OBEY": False, "REFERER_ENABLED": False, "COOKIES_ENABLED": False, @@ -40,9 +70,8 @@ class Fedator(Spider): gql_query = open('src/query.gql', 'r').read() from json import dumps body = dumps({"query": gql_query}) - # bbody = bytes(body, encoding='utf-8') - # sys.stdout.buffer.write(bbody) - for row in yield_tests(): + source = SOURCES.get(os.environ.get("SOURCE","test")) + for row in source(): domain = row[1] yield Request( url = f"https://{domain}/api", @@ -55,7 +84,7 @@ class Fedator(Spider): res:Model = Model.model_validate_json(response.body) meta = response.request.meta instance_id = meta.get('record')[0] - with sqlite3.connect(f'file:{os.environ.get("DATADIR")}/sqlpage.db', timeout=15.0, isolation_level='IMMEDIATE') as db: + with sqlite3.connect(f'{os.environ.get("DATADIR")}/sqlpage.db', timeout=15.0, isolation_level='IMMEDIATE') as db: # insert new stats stats = res.data.statistics db.execute('''insert into stats(insertedAt, instance_id, users, local_groups, total_groups, local_events, total_events, local_comments, total_comments, following, followers) values (?,?,?,?,?,?,?,?,?,?,?) diff --git a/src/model.py b/src/model.py index 44e1745..fd6293f 100644 --- a/src/model.py +++ b/src/model.py @@ -7,12 +7,12 @@ from pydantic import BaseModel, Field class Config(BaseModel): country_code: Any = Field(..., alias='countryCode') - description: str + description: Optional[str] = None languages: List[str] - long_description: str = Field(..., alias='longDescription') + long_description: Optional[str] = Field(..., alias='longDescription') name: str registrations_open: bool = Field(..., alias='registrationsOpen') - slogan: str + slogan: Optional[str] = None version: str @@ -20,8 +20,8 @@ class Statistics(BaseModel): number_of_comments: int = Field(..., alias='numberOfComments') number_of_events: int = Field(..., alias='numberOfEvents') number_of_groups: int = Field(..., alias='numberOfGroups') - number_of_instance_followers: int = Field(..., alias='numberOfInstanceFollowers') - number_of_instance_followings: int = Field(..., alias='numberOfInstanceFollowings') + number_of_instance_followers: Optional[int] = Field(..., alias='numberOfInstanceFollowers') + number_of_instance_followings: Optional[int] = Field(..., alias='numberOfInstanceFollowings') number_of_local_comments: int = Field(..., alias='numberOfLocalComments') number_of_local_events: int = Field(..., alias='numberOfLocalEvents') number_of_local_groups: int = Field(..., alias='numberOfLocalGroups')