This commit is contained in:
cash
2026-03-29 23:50:49 -05:00
commit eb5e194331
56 changed files with 4010 additions and 0 deletions

0
backend/web/__init__.py Normal file
View File

303
backend/web/app.py Normal file
View File

@@ -0,0 +1,303 @@
"""app.py Jul 15 2025"""
from __future__ import annotations
import asyncio
import mimetypes
import os
import secrets
import shutil
import signal
import sysconfig
import threading
from pathlib import Path
from typing import Dict
import aiofiles
import structlog
from quart import (
Quart,
Response,
jsonify,
redirect,
render_template,
request,
session,
url_for, render_template_string,
)
from backend.core.logging import init_logging
from core.settings import TMP_DIR, DOWNLOAD_DIR
from core.formats import choose_format, _lookup_cache_sync, _cached_metadata_fetch
from core.download import download, EST_MB
from core.formats import choose_format as choose_format_logic
from core.db_xp import is_ip_banned, ensure_user, get_status
from core.web.db_extra import invalid_over_limit, init_proxy_seed, start_background_tasks
from core.db import metadata, engine
from core import progress_bus
init_logging()
log = structlog.get_logger()
def _frontend_root() -> Path:
here = Path(__file__).resolve().parent
dev = here.parent.parent / "frontend"
return dev if dev.exists() else Path(sysconfig.get_path("data")) / "share" / "s1ne" / "frontend"
FRONTEND_ROOT = _frontend_root()
app = Quart(
__name__,
template_folder=str(FRONTEND_ROOT / "templates"),
static_folder=str(FRONTEND_ROOT / "static"),
)
app.secret_key = os.getenv("SECRET_KEY_WORD")
_tasks: Dict[str, asyncio.Task] = {}
async def _cleanup_temp(interval: int = 900) -> None:
while True:
cutoff = asyncio.get_event_loop().time() - 60 * 60 * 12
for p in (TMP_DIR / "ytlocks").glob("*.lock"):
if p.stat().st_mtime < cutoff:
p.unlink(missing_ok=True)
for pattern in ("yt_*", "tmp*"):
for p in TMP_DIR.glob(pattern):
if p.is_dir() and p.stat().st_mtime < cutoff:
shutil.rmtree(p, ignore_errors=True)
await asyncio.sleep(interval)
async def _file_iter(path: Path, chunk: int = 1 << 15):
async with aiofiles.open(path, "rb") as f:
while (blk := await f.read(chunk)):
yield blk
async def _shutdown_waiter():
await asyncio.sleep(0.1)
log.info("shutdown.tasks_cancelled")
def _graceful_exit() -> None:
log.info("shutdown.initiated")
for t in list(_tasks.values()):
if not t.done():
t.cancel()
asyncio.create_task(_shutdown_waiter())
def force_exit():
import time
time.sleep(5)
os._exit(1)
threading.Thread(target=force_exit, daemon=True).start()
@app.before_serving
async def _launch_tasks() -> None:
metadata.create_all(engine)
await init_proxy_seed()
start_background_tasks(asyncio.get_running_loop())
asyncio.create_task(_cleanup_temp())
loop = asyncio.get_running_loop()
for sig in (signal.SIGTERM, signal.SIGINT):
loop.add_signal_handler(sig, _graceful_exit)
@app.route("/")
async def home():
if not request.cookies.get("auth"):
return await render_template("login.html")
ip = request.remote_addr or "0.0.0.0"
ensure_user(ip)
soft_banned = get_status(ip)["soft_banned"]
return await render_template("index.html", soft_banned=soft_banned)
@app.route("/login", methods=["GET", "POST"])
async def login():
if request.method == "GET":
return await render_template("login.html")
form = await request.form
if form.get("password") == os.getenv("MASTER_PASSWORD"):
resp = redirect(url_for("home"))
resp.set_cookie("auth", "1", httponly=True, secure=False)
return resp
return await render_template("login.html", error_badge="Incorrect password")
@app.route("/logout")
async def logout():
session.clear()
resp = redirect(url_for("login"))
resp.delete_cookie("auth")
return resp
@app.route("/choose_format", methods=["POST"])
async def handle_choose_format() -> Response:
url: str
try:
if request.content_type and "application/json" in request.content_type:
data = await request.get_json(silent=True) or {}
url = (data.get("url") or "").strip()
else:
form = await request.form
url = (form.get("url") or "").strip()
if not url:
#log.warning("choose_format.missing_url")
return jsonify({"error": "url field required"}), 422
run_id: str = session.get("run_id") or secrets.token_urlsafe(10)
session["run_id"] = run_id
#log.info("choose_format.run_id_set", run_id=run_id, url=url)
res: dict = await choose_format_logic(url)
res["sid"] = run_id
if "error" in res:
#log.warning("choose_format.logic_error", error=res["error"], url=url)
return jsonify(res), 400
log.info("choose_format.success", url=url, title=res.get("title"), platform=res.get("platform"))
return jsonify(res)
except Exception as e:
log.exception("choose_format.exception", err=str(e))
return jsonify({"error": "Internal error during format selection"}), 500
@app.route("/download_file")
async def dl():
ip = request.remote_addr or "0.0.0.0"
if ip == "127.0.0.1":
log.info("dev_mode.skip_ban_check", ip=ip)
else:
if is_ip_banned(ip):
log.warning("download.reject.banned", ip=ip)
return jsonify({"error": "Banned"}), 403
url = request.args.get("url", "").strip()
fmt = request.args.get("format_id", "").strip()
sid = request.args.get("sid", "").strip()
run_id = session.get("run_id")
if run_id is None:
pass
if is_ip_banned(ip):
log.warning("download.reject.banned", ip=ip)
return jsonify({"error": "Banned"}), 403
if not url or not fmt:
log.warning("download.reject.missing_params", url=url, fmt=fmt)
return jsonify({"error": "Missing URL or format"}), 400
if sid in _tasks and not _tasks[sid].done():
log.warning("download.reject.already_running", sid=sid)
return jsonify({"error": "download already running"}), 409
if sid != run_id:
log.warning("download.reject.sid_mismatch", sid=sid, session_run_id=run_id)
return jsonify({
"error": "Session mismatch please refresh the page and select a format again."
}), 403
progress_bus.register(sid)
async def _run_download() -> Path:
try:
meta = await asyncio.to_thread(_lookup_cache_sync, url)
if meta:
chosen = next((f for f in meta["formats"] if f["format_id"] == fmt), None)
est = (
chosen.get("filesize")
or chosen.get("filesize_approx")
or 0
) if chosen else 0
EST_MB.set(int(est / 1_048_576))
log.info("download.starting", sid=sid, url=url, fmt=fmt)
path_str = await download(url, fmt, ip, sid)
return Path(path_str)
finally:
_tasks.pop(sid, None)
task = asyncio.create_task(_run_download())
_tasks[sid] = task
try:
tmp_path = await task
mime = mimetypes.guess_type(tmp_path.name)[0] or "application/octet-stream"
log.info("download.success", file=str(tmp_path), sid=sid)
resp = Response(
_file_iter(tmp_path),
headers={
"Content-Type": mime,
"Content-Disposition": f'attachment; filename="{tmp_path.name}"',
},
)
if hasattr(resp, "call_after_response"):
def _after():
progress_bus.update(sid, status="finished", pct=100, progress="Done")
progress_bus.clear(sid)
if str(tmp_path.parent).startswith(str(TMP_DIR)):
shutil.rmtree(tmp_path.parent, ignore_errors=True)
resp.call_after_response(_after)
return resp
except asyncio.CancelledError:
log.warning("download.cancelled", sid=sid)
progress_bus.update(sid, status="cancelled", progress="Cancelled")
return jsonify({"error": "Download cancelled"}), 499
except Exception as e:
log.exception("download.failed", sid=sid, err=str(e))
progress_bus.update(sid, status="error", progress="Error")
return jsonify({"error": "Download failed"}), 500
@app.route("/cancel_download", methods=["POST"])
async def cancel_dl():
sid = request.args.get("sid", "").strip()
if sid:
task = _tasks.get(sid)
if task and not task.done():
task.cancel()
progress_bus.update(sid, status="cancelled", progress="Cancelled")
return jsonify({"status": "cancelled"})
@app.route("/api/progress/<sid>")
async def progress_stream(sid: str):
q = progress_bus.subscribe(sid)
async def gen():
while True:
msg = await q.get()
yield f"data: {msg}\n\n"
return Response(
gen(),
content_type="text/event-stream",
headers={"Cache-Control": "no-store"},
)
@app.before_serving
async def _on_startup():
pass

320
backend/web/db_extra.py Normal file
View File

@@ -0,0 +1,320 @@
"""
backend/web/db_extra.py - 16 May 2025
"""
from __future__ import annotations
import asyncio
import datetime as dt
from datetime import timezone
import structlog
from typing import List
from sqlalchemy import (
Table, Column, Text, Float, Integer, Boolean, DateTime, func,
select, insert, update, delete, or_, inspect, text
)
from sqlalchemy.dialects.postgresql import insert as pg_insert
from backend.core.db import SessionLocal, metadata, engine
from backend.core.settings import (
MAX_IN_USE, FAIL_COOLDOWN_SEC, MIN_SCORE, PROXY_LIST_FILE,
_WINDOW_MINUTES, PROXY_USERNAME, PROXY_PASSWORD,
_MAX_LOGIN_FAILS, _MAX_INVALID_URLS
)
log = structlog.get_logger()
_IS_PG = engine.url.get_backend_name().startswith("postgres")
def _insert_ignore(tbl: Table, **vals):
if _IS_PG:
return pg_insert(tbl).values(**vals).on_conflict_do_nothing()
return insert(tbl).prefix_with("OR IGNORE").values(**vals)
def _clamp_zero(expr):
"""SQLportable max(expr, 0)."""
return func.greatest(expr, 0) if _IS_PG else func.max(expr, 0)
proxy_tbl = Table(
"proxies", metadata,
Column("url", Text, primary_key=True),
Column("score", Float, nullable=False, server_default="1.0"),
Column("fails", Integer, nullable=False, server_default="0"),
Column("banned", Boolean, nullable=False, server_default="false"),
Column("in_use", Integer, nullable=False, server_default="0"),
Column("last_fail", DateTime),
Column("updated_at", DateTime, server_default=func.now(), index=True),
)
login_tbl = Table(
"login_attempts", metadata,
Column("ip", Text, primary_key=True),
Column("count", Integer, nullable=False, server_default="0"),
Column("updated_at", DateTime, nullable=False, server_default=func.now()),
)
invalid_tbl = Table(
"invalid_urls", metadata,
Column("ip", Text, primary_key=True),
Column("count", Integer, nullable=False, server_default="0"),
Column("updated_at", DateTime, nullable=False, server_default=func.now()),
)
dl_stats = Table(
"dl_stats", metadata,
Column("id", Integer, primary_key=True, autoincrement=True),
Column("ok", Boolean, nullable=False),
Column("ts", DateTime, nullable=False, server_default=func.now(), index=True),
)
def _ensure_proxy_columns() -> None:
insp = inspect(engine)
if "proxies" not in insp.get_table_names():
return
existing = {c["name"] for c in insp.get_columns("proxies")}
add: list[tuple[str, str]] = []
if "in_use" not in existing: add.append(("in_use", "INTEGER DEFAULT 0"))
if "last_fail" not in existing: add.append(("last_fail", "TIMESTAMP"))
if not add:
return
with engine.begin() as conn:
for col, ddl in add:
if _IS_PG:
conn.execute(text(f"ALTER TABLE proxies ADD COLUMN IF NOT EXISTS {col} {ddl};"))
else:
conn.execute(text(f"ALTER TABLE proxies ADD COLUMN {col} {ddl};"))
log.info("proxy.schema.auto_migrated", added=[c for c, _ in add])
#metadata.create_all(engine)
_ensure_proxy_columns()
def _seed() -> None:
if not PROXY_LIST_FILE.exists():
return
with SessionLocal.begin() as s:
for ln in PROXY_LIST_FILE.read_text().splitlines():
ln = ln.strip()
if not ln:
continue
ip, port = ln.split(":", 1)
px = (
f"http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{ip}:{port}"
if PROXY_USERNAME else f"http://{ip}:{port}"
)
s.execute(_insert_ignore(proxy_tbl, url=px))
def _candidate_stmt(now: dt.datetime):
cool_ts = now - dt.timedelta(seconds=FAIL_COOLDOWN_SEC)
jitter = func.random() * 0.01
return (
select(proxy_tbl.c.url)
.where(
proxy_tbl.c.banned.is_(False),
proxy_tbl.c.score > MIN_SCORE,
proxy_tbl.c.in_use < MAX_IN_USE,
or_(proxy_tbl.c.last_fail.is_(None), proxy_tbl.c.last_fail < cool_ts),
)
.order_by((proxy_tbl.c.score + jitter).desc())
.limit(1)
.with_for_update(nowait=False)
)
def acquire_proxy() -> str | None:
now = dt.datetime.now(timezone.utc)
with SessionLocal.begin() as s:
row = s.execute(_candidate_stmt(now)).first()
if not row:
return None
px = row[0]
s.execute(
update(proxy_tbl)
.where(proxy_tbl.c.url == px)
.values(in_use=proxy_tbl.c.in_use + 1, updated_at=now)
)
return px
def release_proxy(px: str, ok: bool) -> None:
if not px or px == "DIRECT":
return
now = dt.datetime.now(timezone.utc)
with SessionLocal.begin() as s:
new_in_use = proxy_tbl.c.in_use - 1
s.execute(
update(proxy_tbl)
.where(proxy_tbl.c.url == px)
.values(
in_use=_clamp_zero(new_in_use),
updated_at=now,
last_fail=None if ok else now,
)
)
_buffer: asyncio.Queue[tuple[str, bool]] = asyncio.Queue(maxsize=2048)
def queue_proxy_result(px: str, ok: bool) -> None:
try:
_buffer.put_nowait((px, ok))
except asyncio.QueueFull:
try:
_buffer.get_nowait()
_buffer.put_nowait((px, ok))
except Exception:
pass
async def _flusher() -> None:
while True:
await asyncio.sleep(0.4)
if _buffer.empty():
continue
batch: dict[str, tuple[int, int]] = {}
while not _buffer.empty():
px, ok = _buffer.get_nowait()
succ, fail = batch.get(px, (0, 0))
if ok:
succ += 1
else:
fail += 1
batch[px] = (succ, fail)
now = dt.datetime.now(timezone.utc)
with SessionLocal.begin() as s:
for px, (succ, fail) in batch.items():
delta = 0.1 * succ - 0.2 * fail
stmt = (
update(proxy_tbl)
.where(proxy_tbl.c.url == px)
.values(
score=_clamp_zero(proxy_tbl.c.score + delta),
fails=_clamp_zero(proxy_tbl.c.fails + fail - succ),
banned=(proxy_tbl.c.fails + fail) > 5,
updated_at=now,
)
)
s.execute(stmt)
def start_background_tasks(loop: asyncio.AbstractEventLoop) -> None:
loop.create_task(_flusher())
loop.create_task(asyncio.to_thread(_seed))
_WINDOW_N = 50
def add_dl_stat(ok: bool) -> None:
now = dt.datetime.now(timezone.utc)
with SessionLocal.begin() as s:
s.execute(insert(dl_stats).values(ok=ok, ts=now))
# -------- FIX ③ --------
oldest_keep = select(dl_stats.c.id).order_by(
dl_stats.c.id.desc()
).limit(500)
s.execute(
delete(dl_stats).where(~dl_stats.c.id.in_(oldest_keep))
)
def recent_success_rate(n: int = _WINDOW_N) -> float:
with SessionLocal() as s:
vals = (
s.execute(select(dl_stats.c.ok).order_by(dl_stats.c.id.desc()).limit(n))
.scalars()
.all()
)
return 0.5 if not vals else sum(vals) / len(vals)
def _inc(table: Table, ip: str) -> None:
now = dt.datetime.now(timezone.utc)
with SessionLocal.begin() as s:
row = s.execute(select(table).where(table.c.ip == ip)).first()
if not row:
s.execute(insert(table).values(ip=ip, count=1, updated_at=now))
else:
s.execute(
update(table)
.where(table.c.ip == ip)
.values(count=row.count + 1, updated_at=now)
)
def record_login(ip: str, success: bool) -> None:
if success:
with SessionLocal.begin() as s:
s.execute(update(login_tbl).where(login_tbl.c.ip == ip).values(count=0))
else:
_inc(login_tbl, ip)
def inc_invalid(ip: str) -> None:
_inc(invalid_tbl, ip)
def _over_limit(table: Table, ip: str, cap: int) -> bool:
with SessionLocal() as s:
row = s.execute(
select(table.c.count, table.c.updated_at).where(table.c.ip == ip)
).first()
if not row:
return False
count, ts = row
now = dt.datetime.now(timezone.utc)
if ts.tzinfo is None:
ts = ts.replace(tzinfo=timezone.utc)
if (now - ts).total_seconds() > _WINDOW_MINUTES * 60:
with SessionLocal.begin() as sx:
sx.execute(update(table).where(table.c.ip == ip).values(count=0))
return False
return count >= cap
def too_many_attempts(ip: str) -> bool:
return _over_limit(login_tbl, ip, _MAX_LOGIN_FAILS)
def invalid_over_limit(ip: str) -> bool:
return _over_limit(invalid_tbl, ip, _MAX_INVALID_URLS)
def pick_proxy() -> str | None:
return acquire_proxy()
def ensure_proxy(px: str) -> None:
with SessionLocal.begin() as s:
s.execute(_insert_ignore(proxy_tbl, url=px))
def update_proxy(px: str, ok: bool) -> None:
queue_proxy_result(px, ok)
async def init_proxy_seed() -> None:
await asyncio.to_thread(_seed)