1
0

chore: package

This commit is contained in:
setop 2025-07-05 01:34:59 +02:00
parent 8e324b2a1f
commit 9e411cee54
16 changed files with 424 additions and 4 deletions

2
.dockerignore Normal file
View File

@ -0,0 +1,2 @@
.env
**/__pycache__

176
.gitignore vendored
View File

@ -1 +1,177 @@
*.env
# Created by https://www.toptal.com/developers/gitignore/api/python
# Edit at https://www.toptal.com/developers/gitignore?templates=python
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
### Python Patch ###
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
poetry.toml
# ruff
.ruff_cache/
# LSP config files
pyrightconfig.json
# End of https://www.toptal.com/developers/gitignore/api/python

View File

@ -12,20 +12,26 @@
# MVP
- [x] chore: data model
- [ ] model : add "s" to failure
- [ ] model : add "s" to folloing
- [ ] add "blacklisted"
- [x] data migration scripts
- [ ] homepage
- [x] form to register an instance
- [ ] check for duplicates
- [ ] grab first stats : version, users, groups, events
- [ ] if fail, set failure to 1
- [ ] confirmation page
- [x] confirmation page
- [ ] find a way to populate location
- [x] instances list, no pagination
- [ ] abuse link
- [ ] cron
- [ ] select instances where failure < max_failure
- [ ] scaper
- [x] select instances where failure < max_failure
- [ ] for each entry, fetch stats
- [ ] if fail, set failure to failure+1
- [ ] if fail, set failure to min(1, failure+1)
- [ ] if success, set failure = 0
- [x] insert new stats
- [x] update instances info
- [ ] stats page
- [x] big numbers
- [x] total Instances
@ -41,6 +47,19 @@
- [ ] versions pie chart
- [ ] languages pie chart (user weighted ?)
- [ ] location pie chart
- [ ] admin panel
- [ ] authentication (shared secret, oauth ?)
- [ ] list failed domain
- [ ] rescan an instance
- [ ] allow to blacklist a domain
- [ ] package
- [ ] docker image with : s6 or tini + crond + python(pyinstaller) + sqlpage
- [ ] deploy on `beta-instances.mobilizon.org`
- [ ] DNS
- [ ] HC ping
# impl

24
Dockerfile Normal file
View File

@ -0,0 +1,24 @@
FROM python:3.13-alpine
RUN apk add --no-cache s6 bash wget unzip
COPY requirements.txt .
RUN python -m pip install --no-cache-dir -r requirements.txt
COPY ./s6 /etc/s6
WORKDIR /app
COPY scripts scripts
COPY src src
COPY sqlpage sqlpage
COPY webroot webroot
RUN wget -q https://github.com/sqlpage/SQLPage/releases/download/v0.35.2/sqlpage-aws-lambda.zip \
&& unzip sqlpage-aws-lambda.zip bootstrap \
&& rm sqlpage-aws-lambda.zip
ENTRYPOINT [ "/usr/bin/s6-svscan", "/etc/s6" ]
EXPOSE 8080

View File

@ -1 +1,8 @@
. .venv/bin/activate
export VERSION=0.1.0
docker build -t mobilizon-instances:${VERSION} .
docker run -d --name mbzinstances --env-file .envfile -v $(realpath sqlpage):/data mobilizon-instances:${VERSION}

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
Scrapy
pydantic

9
s6/crond/run Executable file
View File

@ -0,0 +1,9 @@
#!/bin/bash -eu
# specific to alpine, won't work with debian
# place crontab /!\ UTC time
printf "ping=\"curl -fsS -m 10 --retry 5 -o /dev/null ${HC_PING}\"\n${SCHEDULE} /app/scripts/runjob.sh && \${ping} || \${ping}/fail \n" | crontab -
# start cron
crond -f

4
s6/sqlpage/run Executable file
View File

@ -0,0 +1,4 @@
#!/bin/sh -eu
/app/bootstrap -c /app/sqlpage/sqlpage.json

View File

@ -1,4 +1,10 @@
-- set some pragma
PRAGMA journal_mode = WAL;
PRAGMA busy_timeout = 15000; -- for interactive, 15s for background tasks
PRAGMA synchronous = NORMAL;
PRAGMA cache_size = 1000000000; -- means infinite
PRAGMA foreign_keys = true;
PRAGMA temp_store = memory;
CREATE TABLE instances (
-- PK will be rowid

5
scripts/runjob.sh Executable file
View File

@ -0,0 +1,5 @@
#!/bin/sh -eu
CMDD=$(dirname $(realpath $0))
cd $(dirname "${CMDD}")
python -m scrapy runspider src/instances.py

93
src/instances.py Normal file
View File

@ -0,0 +1,93 @@
import sys
import os
import logging
import time
import sqlite3
from scrapy import Spider, Request
from scrapy.http import Response
from model import Model
logger = logging.getLogger(__name__)
def yield_from_db():
with sqlite3.connect(f'file:{os.environ.get("DATADIR")}/sqlpage.db?mode=ro', uri=True) as db:
for row in db.execute('select rowid, domain from instances where failure < 5'):
yield row
def yield_tests():
#yield (105, 'keskonfai.fr',)
yield (7, 'mobilizon.fr',)
class Fedator(Spider):
name = "fedator"
custom_settings = {
"USER_AGENT": "Mozilla/5.0 (X11; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0",
"ROBOTSTXT_OBEY": False,
"REFERER_ENABLED": False,
"COOKIES_ENABLED": False,
"TELNETCONSOLE_ENABLED": False,
"HTTPCACHE_ENABLED": False,
"DEFAULT_REQUEST_HEADERS": {
"Accept": "application/json",
"Content-Type": "application/json",
},
"DOWNLOAD_TIMEOUT": 10,
}
async def start(self):
gql_query = open('src/query.gql', 'r').read()
from json import dumps
body = dumps({"query": gql_query})
# bbody = bytes(body, encoding='utf-8')
# sys.stdout.buffer.write(bbody)
for row in yield_tests():
domain = row[1]
yield Request(
url = f"https://{domain}/api",
method = 'POST',
body = body,
meta = { "record": row, },
)
def parse(self, response: Response):
res:Model = Model.model_validate_json(response.body)
meta = response.request.meta
instance_id = meta.get('record')[0]
with sqlite3.connect(f'file:{os.environ.get("DATADIR")}/sqlpage.db', timeout=15.0, isolation_level='IMMEDIATE') as db:
# insert new stats
stats = res.data.statistics
db.execute('''insert into stats(insertedAt, instance_id, users, local_groups, total_groups, local_events, total_events, local_comments, total_comments, following, followers) values (?,?,?,?,?,?,?,?,?,?,?)
''', (
int(time.time()) # insertedAt
, instance_id
, stats.number_of_users # users
, stats.number_of_local_groups
, stats.number_of_groups
, stats.number_of_local_events
, stats.number_of_events
, stats.number_of_local_comments
, stats.number_of_comments
, stats.number_of_instance_followings
, stats.number_of_instance_followers
))
# update info
config = res.data.config
db.execute('''update instances set name=?,slogan=?,description=?,languages=?,open=?,version=?
-- ,location=?
,failure=?,updatedAt=? where rowid=?
''', (
config.name
, config.slogan
, config.description
, ",".join(config.languages)
, config.registrations_open
, config.version
#, config.location
, 0 # failure
, int(time.time()) # updatedAt
, instance_id
))

38
src/model.py Normal file
View File

@ -0,0 +1,38 @@
from __future__ import annotations
from typing import Any, List, Optional
from pydantic import BaseModel, Field
class Config(BaseModel):
country_code: Any = Field(..., alias='countryCode')
description: str
languages: List[str]
long_description: str = Field(..., alias='longDescription')
name: str
registrations_open: bool = Field(..., alias='registrationsOpen')
slogan: str
version: str
class Statistics(BaseModel):
number_of_comments: int = Field(..., alias='numberOfComments')
number_of_events: int = Field(..., alias='numberOfEvents')
number_of_groups: int = Field(..., alias='numberOfGroups')
number_of_instance_followers: int = Field(..., alias='numberOfInstanceFollowers')
number_of_instance_followings: int = Field(..., alias='numberOfInstanceFollowings')
number_of_local_comments: int = Field(..., alias='numberOfLocalComments')
number_of_local_events: int = Field(..., alias='numberOfLocalEvents')
number_of_local_groups: int = Field(..., alias='numberOfLocalGroups')
number_of_users: int = Field(..., alias='numberOfUsers')
class Data(BaseModel):
config: Config
statistics: Statistics
class Model(BaseModel):
data: Data

23
src/query.gql Normal file
View File

@ -0,0 +1,23 @@
query About {
statistics {
numberOfUsers
numberOfLocalGroups
numberOfGroups
numberOfLocalEvents
numberOfEvents
numberOfLocalComments
numberOfComments
numberOfInstanceFollowings
numberOfInstanceFollowers
}
config {
name
version
registrationsOpen
slogan
description
longDescription
countryCode
languages
}
}

View File

@ -2,6 +2,8 @@ select
'shell' as component
, 'Mobilizon Instances' as title
, 'social' as icon
, JSON('{"title":"Statistics","link":"/stats", "icon":"chart-dots"}') as menu_item
, JSON('{"link":"mailto:contact@kaihuri.org?subject=report%20an%20instance","title":"Report an instance","icon":"forms"}') as menu_item
, '' as footer
;
@ -34,6 +36,7 @@ select
select
domain as Url
, name
, version
, slogan
, description
from instances

View File

@ -1,3 +1,12 @@
select
'shell' as component
, 'Mobilizon statistics' as title
, 'chart-dots' as icon
, JSON('{"title":"Instances list","link":"/", "icon":"social"}') as menu_item
, JSON('{"link":"mailto:contact@kaihuri.org?subject=report%20an%20instance","title":"Report an instance","icon":"forms"}') as menu_item
, '' as footer
;
select
'big_number' as component
, 4 as columns