This commit is contained in:
ge 2023-04-26 16:22:48 +03:00
commit 21e3ae3b1c
5 changed files with 169 additions and 0 deletions

13
Pipfile Normal file
View File

@ -0,0 +1,13 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
bottle = "*"
gunicorn = "*"
[dev-packages]
[requires]
python_version = "3.10"

45
Pipfile.lock generated Normal file
View File

@ -0,0 +1,45 @@
{
"_meta": {
"hash": {
"sha256": "6cf82d4043d4c4bf722b99766ec56d49b04447a5e89726986ac957c46efc2196"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.10"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"bottle": {
"hashes": [
"sha256:d6f15f9d422670b7c073d63bd8d287b135388da187a0f3e3c19293626ce034ea",
"sha256:e1a9c94970ae6d710b3fb4526294dfeb86f2cb4a81eff3a4b98dc40fb0e5e021"
],
"index": "pypi",
"version": "==0.12.25"
},
"gunicorn": {
"hashes": [
"sha256:9dcc4547dbb1cb284accfb15ab5667a0e5d1881cc443e0677b4882a4067a807e",
"sha256:e0a968b5ba15f8a328fdfd7ab1fcb5af4470c28aaf7e55df02a99bc13138e6e8"
],
"index": "pypi",
"version": "==20.1.0"
},
"setuptools": {
"hashes": [
"sha256:23aaf86b85ca52ceb801d32703f12d77517b2556af839621c641fca11287952b",
"sha256:f104fa03692a2602fa0fec6c6a9e63b6c8a968de13e17c026957dd1f53d80990"
],
"markers": "python_version >= '3.7'",
"version": "==67.7.2"
}
},
"develop": {}
}

40
README.md Normal file
View File

@ -0,0 +1,40 @@
# Cursed API for ArchiveBox
ArchiveBox [doesn't have a web API](https://github.com/ArchiveBox/ArchiveBox/issues/496) yet. This is shitty single-endpoint API to automate page archiving. It uses subprocess to run archivebox CLI. archivebox CLI runs in new thread to avoid blocking main thread.
# Install and run
Install dependencies:
```
pip install bottle gunicorn
```
Start API on server where ArchiveBox container is running. Set actual path to docker-compose.yml.
```
ARCHIVEBOX_BIN="docker compose -f /opt/archievebox/docker-compose.yml run archivebox" python cursed_archivebox_api.py
```
# Environment
| Variable | Default |
| ----------------- | --------------------- |
| `ARCHIVEBOX_BIN` | `/usr/bin/archivebox` (default for non-Docker installations) |
| `CURSED_PORT` | `9998` |
| `CURSED_HOST` | `0.0.0.0` |
| `CURSED_SERVER` | `gunicorn` See [server backends](https://bottlepy.org/docs/dev/deployment.html#switching-the-server-backend) |
# GET /add
Query parameters:
* `url`. Resource URL
* `depth`. Archive depth. Default: 0 (current page)
* `tag`. List of comma separated tags e.g. `my_tag`, `my_tag,another_one`.
Example:
```
curl -i 'http://localhost:9998/add?url=https://example.com&depth=0&tag=api,example'
```

67
cursed_archivebox_api.py Normal file
View File

@ -0,0 +1,67 @@
import os
import json
import logging
import subprocess
from multiprocessing import Process
from bottle import run, get, request, response
ARCHIVEBOX_BIN = os.getenv('ARCHIVEBOX_BIN') or '/usr/bin/archivebox'
CURSED_PORT = os.getenv('CURSED_PORT') or 9998
CURSED_HOST = os.getenv('CURSED_HOST') or '0.0.0.0'
CURSED_SERVER = os.getenv('CURSED_SERVER') or 'gunicorn'
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s:%(levelname)s:%(name)s: %(message)s"
)
def shell_exec(command: list, to_stdin: str = None) -> None:
"""Execute shell command and return output."""
pipe = subprocess.Popen(command,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
if to_stdin:
to_stdin = '%s\n' % to_stdin
pipe.stdin.write(to_stdin.encode('utf-8'))
pipe.stdin.flush()
output, error = pipe.communicate()
output = output.strip().decode("utf-8")
error = error.decode("utf-8")
if pipe.returncode != 0:
raise RuntimeError(error)
return output
def run_bg_task(cmd):
logging.debug('PID=%s Run "background" thread...', os.getpid())
shell_exec(cmd)
logging.debug('PID=%s Background thread finished', os.getpid())
@get('/add')
def add_to_archive() -> str:
url = request.query.url or None
depth = request.query.depth or None
tag = request.query.tag or None
cmd = ARCHIVEBOX_BIN.split()
cmd.append("add")
if depth:
cmd.append('--depth=' + str(depth))
if tag:
cmd.append('--tag=' + tag)
if url is None:
response.status = 400
return json.dumps({'msg': 'Error: No URL query parameter provided'})
cmd.append("'" + url + "'")
logging.debug('PID=%s Command to run: %s', os.getpid(), cmd)
taskrun = Process(target=run_bg_task, args=(cmd,))
taskrun.start()
return json.dumps({'msg': 'OK'})
run(server=CURSED_SERVER, host=CURSED_HOST, port=CURSED_PORT)

4
requirements.txt Normal file
View File

@ -0,0 +1,4 @@
-i https://pypi.org/simple
bottle==0.12.25
gunicorn==20.1.0
setuptools==67.7.2 ; python_version >= '3.7'