build graph from mobilizon instances and followers
This commit is contained in:
commit
234773b8f0
|
@ -0,0 +1 @@
|
|||
python 3.9.9
|
|
@ -0,0 +1,17 @@
|
|||
. .venv/bin/activate
|
||||
|
||||
pip install scrapy
|
||||
|
||||
wget https://framagit.org/-/snippets/6539/raw/main/pp.awk
|
||||
|
||||
curl -fsS 'https://instances.joinmobilizon.org/api/v1/instances?start=0&count=1000' | jq -r '.data[].host' instances.json > instances.txt
|
||||
|
||||
scrapy runspider -o out.csv:csv fedcrawler_followers.py
|
||||
|
||||
dos2unix out.csv
|
||||
|
||||
uniq0 out.csv >| out.u.csv
|
||||
|
||||
bash <(awk -f pp.awk template.dot) < out.u.csv >| out.dot
|
||||
|
||||
neato -Tsvg -o out.svg out.dot
|
|
@ -0,0 +1,53 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import logging
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from scrapy.spiders import Spider
|
||||
from scrapy import Request
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
class Fedator(Spider):
|
||||
name = 'fedator'
|
||||
custom_settings = {
|
||||
"USER_AGENT" : 'Mozilla/5.0 (X11; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0',
|
||||
"ROBOTSTXT_OBEY" : False,
|
||||
"REFERER_ENABLED" : False,
|
||||
"COOKIES_ENABLED" : False,
|
||||
"TELNETCONSOLE_ENABLED" : False,
|
||||
"HTTPCACHE_ENABLED" : True,
|
||||
"DEFAULT_REQUEST_HEADERS" : {
|
||||
'Accept': 'application/activity+json',
|
||||
},
|
||||
}
|
||||
|
||||
MAX_DEPTH = 10
|
||||
|
||||
visited = set()
|
||||
|
||||
def start_requests(self):
|
||||
for line in open("instances.txt").readlines():
|
||||
host = line[:-1]
|
||||
self.visited.add(host)
|
||||
yield Request(f'https://{host}/@relay/followers?page=1', meta= {"dst":host, "depth" : 0, "page":1 })
|
||||
|
||||
|
||||
def parse(self, response):
|
||||
followers = response.json()["orderedItems"] # "https://mobilizon.sans-nuage.fr/relay"
|
||||
if len(followers)>0:
|
||||
dst = response.request.meta["dst"]
|
||||
page = response.request.meta["page"] + 1
|
||||
yield Request(f'https://{dst}/@relay/followers?page={page}', meta= {"dst":dst, "depth" : 0, "page": page})
|
||||
depth = response.request.meta["depth"]
|
||||
for follower in followers:
|
||||
host = follower.split("/")[2]
|
||||
yield {"src": host, "dst" : dst }
|
||||
if host not in self.visited and depth < self.MAX_DEPTH:
|
||||
self.visited.add(host)
|
||||
yield Request(f'https://{host}/@relay/followers?page=1', meta= {"dst":host, "depth" : depth+1, "page":1 })
|
||||
else:
|
||||
logger.debug(f"already visited of maxdepth ({depth+1})")
|
|
@ -0,0 +1,13 @@
|
|||
digraph fediverse {
|
||||
graph [overlap=false]
|
||||
#!
|
||||
IFS=","
|
||||
while read src dst
|
||||
do
|
||||
if [ ! $src == "src" ];
|
||||
then
|
||||
echo "\"${src}\" -> \"${dst}\""
|
||||
fi
|
||||
done
|
||||
#!
|
||||
}
|
Loading…
Reference in New Issue