This commit is contained in:
cash
2026-03-29 23:50:49 -05:00
commit eb5e194331
56 changed files with 4010 additions and 0 deletions

0
backend/core/__init__.py Normal file
View File

73
backend/core/db.py Normal file
View File

@@ -0,0 +1,73 @@
"""
Shared SQLAlchemy engine / session + schema bootstrap
"""
from __future__ import annotations
import importlib, datetime
import os
from sqlalchemy import (
create_engine, event, Table, Column, Text, DateTime, Index, make_url
)
from sqlalchemy.engine import url
from sqlalchemy.orm import sessionmaker, registry
from backend.core.settings import (
SQLALCHEMY_DATABASE_URI, DB_POOL_SIZE, DB_ECHO, SKIP_SCHEMA_BOOTSTRAP
)
IS_PG = SQLALCHEMY_DATABASE_URI.startswith("postgresql")
parsed_url = make_url(SQLALCHEMY_DATABASE_URI) # string into URL object
# engine & session
connect_args = {"sslmode": "require"} if parsed_url.drivername.startswith("postgresql") else {}
engine = create_engine(
SQLALCHEMY_DATABASE_URI,
pool_size = DB_POOL_SIZE,
max_overflow = 20,
pool_timeout = 30,
echo = DB_ECHO,
future = True,
pool_pre_ping = True,
pool_recycle=3600,
connect_args = connect_args
)
# SQLite -> WAL for concurrency
if SQLALCHEMY_DATABASE_URI.startswith("sqlite:///"):
@event.listens_for(engine, "connect")
def _set_wal(dbapi_conn, _):
dbapi_conn.execute("PRAGMA journal_mode=WAL;")
SessionLocal = sessionmaker(bind=engine, autoflush=False,
expire_on_commit=False, future=True)
# metadata (tables from every module)
mapper_registry = registry()
metadata = mapper_registry.metadata
# download-cache table
download_cache = Table(
"download_cache", metadata,
Column("key", Text, primary_key=True),
Column("path", Text, nullable=False),
Column("ext", Text, nullable=False),
Column("created_at", DateTime, default=datetime.datetime.utcnow,
nullable=False, index=True),
)
Index("ix_download_cache_created", download_cache.c.created_at)
# auto-bootstrap all
def _bootstrap_schema() -> None:
"""Import modules then create."""
table_modules = (
"backend.core.db_xp",
"backend.web.db_extra",
"backend.core.formats",
)
for mod in table_modules:
importlib.import_module(mod)
metadata.create_all(engine)
if SKIP_SCHEMA_BOOTSTRAP != "1":
_bootstrap_schema()

14
backend/core/db_cache.py Normal file
View File

@@ -0,0 +1,14 @@
"""
compat layer - exposes getconn used by older code
"""
from contextlib import contextmanager
from backend.core.db import engine
@contextmanager
def getconn():
conn = engine.raw_connection()
try:
yield conn
finally:
conn.commit()
conn.close()

31
backend/core/db_utils.py Normal file
View File

@@ -0,0 +1,31 @@
# backend/core/db_utils.py
from sqlalchemy import insert as sa_insert
from sqlalchemy.dialects.postgresql import insert as pg_insert
from core.db import engine
_IS_PG = engine.url.get_backend_name().startswith("postgres")
def upsert(
tbl,
insert_values: dict,
conflict_cols: list[str],
update_values: dict | None = None,
):
if _IS_PG:
stmt = (
pg_insert(tbl)
.values(**insert_values)
.on_conflict_do_update(
index_elements=conflict_cols,
set_=update_values or insert_values,
)
)
else:
stmt = (
sa_insert(tbl)
.values(**insert_values)
.prefix_with("OR REPLACE") # SQLite
)
return stmt

70
backend/core/db_xp.py Normal file
View File

@@ -0,0 +1,70 @@
"""db_xp.py minimal user helper"""
from __future__ import annotations
from datetime import datetime, timezone
from typing import Dict, Any
from sqlalchemy import (
Table, MetaData, select, func, insert as sa_insert, text, inspect
)
from sqlalchemy.dialects.postgresql import insert as pg_insert
from backend.core.db import SessionLocal, engine
_NOW = lambda: datetime.now(timezone.utc)
metadata = MetaData()
_IS_PG = engine.url.get_backend_name().startswith("postgres")
def _get_users_table() -> Table:
return Table("users", metadata, autoload_with=engine)
def _get_column_info():
try:
insp = inspect(engine)
cols = {c["name"]: c for c in insp.get_columns("users")}
return cols
except Exception:
return {}
def _insert_ignore(**vals):
users = _get_users_table()
if _IS_PG:
return (
pg_insert(users)
.values(**vals)
.on_conflict_do_nothing(index_elements=["ip"])
)
return sa_insert(users).values(**vals).prefix_with("OR IGNORE")
def ensure_user(ip: str) -> None:
cols = _get_column_info()
has_data = (
"data" in cols and
not cols["data"].get("nullable", True) # NOT NULL
)
vals = dict(
ip=ip,
first_visit=_NOW(),
ban_status=False,
soft_banned=False,
)
if has_data:
vals["data"] = {}
stmt = _insert_ignore(**vals)
with SessionLocal.begin() as s:
s.execute(stmt)
def is_ip_banned(ip: str) -> bool:
users = _get_users_table()
with SessionLocal() as s:
return bool(s.scalar(select(users.c.ban_status).where(users.c.ip == ip)))
def get_status(ip: str) -> Dict[str, Any]:
ensure_user(ip)
users = _get_users_table()
with SessionLocal() as s:
soft = s.scalar(select(users.c.soft_banned).where(users.c.ip == ip))
return {"soft_banned": bool(soft)}

287
backend/core/download.py Normal file
View File

@@ -0,0 +1,287 @@
"""backend/core/download.py — patched 2025-06-03"""
from __future__ import annotations
import asyncio
import contextvars
import datetime
import hashlib
import os
import random
import re
from pathlib import Path
from typing import Dict
import yt_dlp
from sqlalchemy import select, delete
from sqlalchemy.exc import NoResultFound
from core.db_utils import upsert
from core.settings import (
DOWNLOAD_DIR,
TMP_DIR,
PER_IP_CONCURRENCY,
DOWNLOAD_CACHE_TTL_SEC,
)
from core.network import get_proxy, record_proxy
from core.db_xp import ensure_user
from core.db import SessionLocal, download_cache
from core.progress_bus import update as set_progress
from core.formats import _cached_metadata_fetch, _clean_proxy
os.makedirs(DOWNLOAD_DIR, exist_ok=True)
os.makedirs(TMP_DIR, exist_ok=True)
EST_MB = contextvars.ContextVar("est_mb", default=0)
MAX_GLOBAL_DOWNLOADS = PER_IP_CONCURRENCY * 4
_global_semaphore = asyncio.Semaphore(MAX_GLOBAL_DOWNLOADS)
_ip_semaphores: Dict[str, asyncio.BoundedSemaphore] = {}
_inflight: Dict[str, asyncio.Task[str]] = {}
_ip_cache: Dict[str, set[str]] = {}
def _get_ip_cache(ip: str) -> set[str]:
return _ip_cache.setdefault(ip, set())
def _url_fmt_hash(url: str, fmt: str) -> str:
return hashlib.blake2s(f"{url}::{fmt}".encode(), digest_size=16).hexdigest()
_ansi_escape = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
async def download(url: str, fmt_id: str, ip: str, sid: str) -> str:
fmt = fmt_id or "bestaudio"
key = _url_fmt_hash(url, fmt)
dup_key = f"{ip}::{url}::{fmt}"
cached = await asyncio.to_thread(_lookup_cache_sync, key)
if cached:
set_progress(sid, status="cached", pct=100, progress="Instant served from cache")
return cached
if key in _inflight:
return await _inflight[key]
ip_cache_set = _get_ip_cache(ip)
if dup_key in ip_cache_set:
cached2 = await asyncio.to_thread(_lookup_cache_sync, key)
if cached2:
set_progress(sid, status="cached", pct=100, progress="Instant served from cache")
return cached2
ip_cache_set.add(dup_key)
sem = _ip_semaphores.setdefault(ip, asyncio.BoundedSemaphore(PER_IP_CONCURRENCY))
async def _run() -> str:
async with _global_semaphore, sem:
ensure_user(ip)
set_progress(sid, status="starting", pct=0, progress="Starting…")
try:
info = await asyncio.to_thread(_cached_metadata_fetch, url)
except Exception as e:
set_progress(sid, status="error", progress=f"Metadata fetch failed: {e}")
raise
attempt = 0
last_exc: Exception | None = None
while attempt < 3:
attempt += 1
proxies = [get_proxy() for _ in range(5)]
random.shuffle(proxies)
proxies.append("DIRECT")
for proxy_url in proxies:
try:
final_path = await _single_download(
url,
fmt,
key,
sid,
proxy_url,
info,
)
except asyncio.CancelledError:
raise
except Exception as exc:
last_exc = exc
record_proxy(proxy_url, False)
clean_proxy = _clean_proxy(proxy_url)
set_progress(
sid,
status="retrying",
progress=f"Retry {attempt} failed (proxy {clean_proxy})",
)
await asyncio.sleep(1 + random.random())
continue
else:
record_proxy(proxy_url, True)
await asyncio.to_thread(_store_cache_sync, key, final_path)
set_progress(sid, status="finished", pct=100, progress="Done")
return final_path
set_progress(sid, status="error", progress="Download failed")
raise RuntimeError(f"All download attempts failed: {last_exc!r}")
task = asyncio.create_task(_run())
_inflight[key] = task
try:
return await task
finally:
_inflight.pop(key, None)
asyncio.create_task(_expire_ip_cache_entry(ip, dup_key))
async def _single_download(
url: str,
fmt: str,
_unused_cache_key: str,
sid: str,
proxy_url: str,
info: dict,
) -> str:
title = info.get("title") or "unknown"
artist = info.get("artist") or info.get("uploader") or "unknown"
def _clean(s: str) -> str:
return re.sub(r'[\\/*?:"<>|]', "", s)
safe_title = _clean(title)
safe_artist = _clean(artist)
short_id = hashlib.blake2s(f"{url}::{fmt}".encode(), digest_size=8).hexdigest()
base = f"{safe_title} - {safe_artist} - {short_id}"
fmt_entry = next((f for f in info.get("formats", []) if f.get("format_id") == fmt), None)
is_audio_only = bool(fmt_entry and fmt_entry.get("vcodec") == "none")
if "soundcloud.com" in url.lower():
is_audio_only = True
# force .mp3 for audio-only, .mp4 otherwise
ext_guess = "mp3" if is_audio_only else "mp4"
outtmpl_path = DOWNLOAD_DIR / f"{base}.%(ext)s"
final_path_expected = DOWNLOAD_DIR / f"{base}.{ext_guess}"
if final_path_expected.exists() and final_path_expected.stat().st_size > 0:
return str(final_path_expected)
cmd = ["yt-dlp", "-f", fmt, "-o", str(outtmpl_path), url]
if is_audio_only:
cmd = ["yt-dlp", "-x", "--audio-format", "mp3", "-o", str(outtmpl_path), url]
else:
cmd = ["yt-dlp", "-f", f"{fmt}+bestaudio", "-o", str(outtmpl_path), url]
if proxy_url and proxy_url.upper() != "DIRECT":
cmd.insert(1, f"--proxy={proxy_url}")
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stderr_buffer: list[str] = []
try:
while True:
try:
line = await asyncio.wait_for(proc.stderr.readline(), timeout=10)
except asyncio.TimeoutError:
break
if not line:
break
decoded = line.decode(errors="ignore").strip()
if decoded:
stderr_buffer.append(decoded)
if len(stderr_buffer) > 10:
stderr_buffer.pop(0)
set_progress(sid, status="running", pct=None, progress=decoded)
try:
rc = await asyncio.wait_for(proc.wait(), timeout=15)
except asyncio.TimeoutError:
proc.kill()
await proc.wait()
raise RuntimeError("yt-dlp stalled and was killed (timeout)")
if rc != 0:
raise RuntimeError(f"yt-dlp exited with code {rc}. Last lines: {' | '.join(stderr_buffer)}")
candidates = [
p for p in DOWNLOAD_DIR.glob(f"{base}.*")
if p.is_file() and p.stat().st_size > 0
]
if not candidates:
raise RuntimeError("No output file produced")
candidates.sort(key=lambda p: p.stat().st_mtime, reverse=True)
return str(candidates[0])
except asyncio.CancelledError:
proc.kill()
await proc.wait()
raise
except Exception:
for f in DOWNLOAD_DIR.glob(f"{base}.*"):
f.unlink(missing_ok=True)
raise
def _lookup_cache_sync(key: str) -> str | None:
now = datetime.datetime.now(datetime.timezone.utc)
with SessionLocal() as session:
try:
row = session.execute(
select(download_cache.c.path, download_cache.c.created_at)
.where(download_cache.c.key == key)
).one()
except NoResultFound:
return None
path_on_disk, created_at = row
if created_at.tzinfo is None:
created_at = created_at.replace(tzinfo=datetime.timezone.utc)
age = (now - created_at).total_seconds()
if age > DOWNLOAD_CACHE_TTL_SEC:
session.execute(delete(download_cache).where(download_cache.c.key == key))
session.commit()
try:
os.remove(path_on_disk)
except OSError:
pass
return None
if not os.path.exists(path_on_disk):
session.execute(delete(download_cache).where(download_cache.c.key == key))
session.commit()
return None
return path_on_disk
def _store_cache_sync(key: str, path: str) -> None:
now = datetime.datetime.now(datetime.timezone.utc)
insert_values = {
"key": key,
"path": path,
"ext": Path(path).suffix.lstrip("."),
"created_at": now,
}
stmt = upsert(
download_cache,
insert_values=insert_values,
conflict_cols=["key"],
update_values={"path": path, "ext": Path(path).suffix.lstrip("."), "created_at": now},
)
with SessionLocal.begin() as session:
session.execute(stmt)
async def _expire_ip_cache_entry(ip: str, dup_key: str, delay: int = 300) -> None:
await asyncio.sleep(delay)
_get_ip_cache(ip).discard(dup_key)

265
backend/core/formats.py Normal file
View File

@@ -0,0 +1,265 @@
"""backend/core/formats.py — patched 2025-06-03"""
from __future__ import annotations
import asyncio
import os
import re
import urllib.parse as _url
from datetime import datetime, timezone
from functools import lru_cache
from pathlib import Path
from urllib.parse import urlparse
import yt_dlp
import structlog
from sqlalchemy import select, delete, Table, Column, Text, DateTime, JSON
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.exc import NoResultFound
from core.db import SessionLocal, metadata
from core.network import get_proxy, record_proxy, stealth_headers
from core.settings import FORMAT_CACHE_TTL_SEC
log = structlog.get_logger()
format_cache = Table(
"format_cache",
metadata,
Column("url", Text, primary_key=True),
Column("cached_at", DateTime, nullable=False),
Column("info", JSON, nullable=False),
)
_YT_PAT = re.compile(r"(youtu\.be/|youtube\.com/(?:watch|shorts))", re.I)
_BC_PAT = re.compile(r"\.bandcamp\.com", re.I)
_SC_PAT = re.compile(r"(?:soundcloud\.com|on\.soundcloud\.com|m\.soundcloud\.com)", re.I)
_TW_PAT = re.compile(r"(?:twitter\.com|x\.com|mobile\.twitter\.com)", re.I)
_ansi_escape = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
# resolve cookie file path from env or fallback to root-relative path
COOKIE_FILE = Path(os.getenv("YT_COOKIE_FILE", Path(__file__).resolve().parents[2] / "playwright_cookies.txt"))
log.info("cookie_file_resolved", path=str(COOKIE_FILE), exists=COOKIE_FILE.exists())
def _canonical_url(u: str) -> str:
u = u.strip()
if not u.lower().startswith(("http://", "https://")):
return u
if _YT_PAT.search(u):
parsed = _url.urlparse(u)
if "youtu.be" in parsed.netloc:
vid = parsed.path.lstrip("/")
else:
q = _url.parse_qs(parsed.query)
vid = (q.get("v") or [None])[0]
if not vid and parsed.path.startswith("/shorts/"):
vid = parsed.path.split("/")[2]
return f"https://www.youtube.com/watch?v={vid}" if vid else u
if _BC_PAT.search(u):
parsed = _url.urlparse(u)
clean = parsed._replace(query="", fragment="")
return _url.urlunparse(clean)
if _SC_PAT.search(u):
u2 = (
u.replace("m.soundcloud.com", "soundcloud.com")
.replace("on.soundcloud.com", "soundcloud.com")
)
return u2.split("?")[0].split("#")[0]
if _TW_PAT.search(u):
parsed = _url.urlparse(
u.replace("mobile.twitter.com", "x.com").replace("twitter.com", "x.com")
)
clean = parsed._replace(query="", fragment="")
return _url.urlunparse(clean)
parsed = _url.urlparse(u)
clean = parsed._replace(query="", fragment="")
return _url.urlunparse(clean)
def _clean_proxy(proxy: str) -> str:
if not proxy or proxy.upper() == "DIRECT":
return "DIRECT"
parsed = urlparse(proxy)
return (
f"{parsed.scheme}://{parsed.hostname}{f':{parsed.port}' if parsed.port else ''}"
if parsed.hostname
else proxy
)
def platform_badge(u: str) -> str:
l = u.lower()
if "youtu" in l:
return "youtube"
if "soundcloud" in l:
return "soundcloud"
if "twitter" in l or "x.com" in l:
return "twitterx"
if "bandcamp" in l:
return "bandcamp"
return "other"
def user_facing_formats(fmts: list[dict]) -> list[dict]:
desired_heights = [1440, 1080, 720, 480, 360]
out: list[dict] = []
audio_only = [
f for f in fmts if f.get("vcodec") == "none" and f.get("acodec") != "none"
]
if audio_only:
best = max(audio_only, key=lambda x: x.get("tbr") or 0)
out.append(
{
"format_id": best["format_id"],
"ext": best.get("ext", "mp3"),
"label": "Audio (.mp3)",
}
)
for h in desired_heights:
candidates = [f for f in fmts if f.get("height") == h and f.get("vcodec") != "none"]
if candidates:
best = max(candidates, key=lambda x: x.get("tbr") or 0)
out.append(
{
"format_id": best["format_id"],
"ext": best.get("ext", "mp4"),
"label": f"{h}p",
}
)
return out
@lru_cache(maxsize=1024)
def _cached_metadata_fetch(url: str) -> dict:
opts = {"quiet": True, "skip_download": True}
try:
with yt_dlp.YoutubeDL(opts) as ydl:
return ydl.extract_info(url, download=False)
except Exception as e:
msg = _ansi_escape.sub("", str(e)).strip()
log.warning("metadata_fail_direct", url=url, err=msg)
raise
def _fetch_metadata_sync(url: str, proxy_url: str = "DIRECT") -> dict:
opts = {
"quiet": True,
"skip_download": True,
"proxy": None if proxy_url == "DIRECT" else proxy_url,
"http_headers": stealth_headers(),
"cookiefile": str(COOKIE_FILE),
}
if not COOKIE_FILE.exists():
log.warning("cookie_file_missing", path=str(COOKIE_FILE))
try:
with yt_dlp.YoutubeDL(opts) as ydl:
return ydl.extract_info(url, download=False)
except Exception as e:
clean_proxy = _clean_proxy(proxy_url)
msg = _ansi_escape.sub("", str(e)).strip()
log.warning("metadata_fail_proxy", url=url, proxy=clean_proxy, err=msg)
raise
async def _fetch_metadata(url: str) -> dict:
if any(x in url.lower() for x in ("youtube.com", "youtu.be", "bandcamp.com")):
return await asyncio.to_thread(_cached_metadata_fetch, url)
for attempt in range(1, 4):
proxy = get_proxy()
try:
info = await asyncio.to_thread(_fetch_metadata_sync, url, proxy)
if not info.get("formats"):
raise ValueError("No formats found")
record_proxy(proxy, True)
return info
except Exception as e:
record_proxy(proxy, False)
err_msg = _ansi_escape.sub("", str(e)).strip()
log.warning(
"metadata_retry_fail",
attempt=attempt,
proxy=_clean_proxy(proxy),
err=err_msg,
)
raise RuntimeError("Format fetch failed after 3 attempts")
async def choose_format(url: str) -> dict:
url = _canonical_url(url)
if not re.match(r"^https?://", url, re.I):
return {"error": "Invalid URL"}
if any(x in url.lower() for x in ("soundcloud.com", "x.com")):
return {"auto_download": True, "fmt_id": "bestaudio", "url": url}
info = await asyncio.to_thread(_lookup_cache_sync, url)
if info:
return {
"formats": user_facing_formats(info["formats"]),
"title": info.get("title", "Unknown"),
"platform": info.get("platform", ""),
"url": url,
}
info_raw = await _fetch_metadata(url)
cache_doc = {
"title": info_raw.get("title", "Unknown"),
"formats": info_raw.get("formats", []),
"platform": platform_badge(url),
}
await asyncio.to_thread(_store_cache_sync, url, cache_doc)
return {
"formats": user_facing_formats(info_raw.get("formats", [])),
"title": cache_doc["title"],
"platform": cache_doc["platform"],
"url": url,
}
def _lookup_cache_sync(url: str) -> dict | None:
now = datetime.now(timezone.utc)
with SessionLocal() as session:
try:
row = session.execute(
select(format_cache.c.info, format_cache.c.cached_at).where(
format_cache.c.url == url
)
).one()
except NoResultFound:
return None
info, cached_at = row
if cached_at.tzinfo is None:
cached_at = cached_at.replace(tzinfo=timezone.utc)
if (now - cached_at).total_seconds() > FORMAT_CACHE_TTL_SEC:
session.execute(delete(format_cache).where(format_cache.c.url == url))
session.commit()
return None
return info
def _store_cache_sync(url: str, info: dict) -> None:
now = datetime.now(timezone.utc)
stmt = (
pg_insert(format_cache)
.values(url=url, cached_at=now, info=info)
.on_conflict_do_update(index_elements=["url"], set_={"cached_at": now, "info": info})
)
with SessionLocal.begin() as session:
session.execute(stmt)

40
backend/core/logging.py Normal file
View File

@@ -0,0 +1,40 @@
"""
Logging - 16 May 2025
Dev - colored console
Prod - structured JSON
"""
from __future__ import annotations
import logging, os, structlog
from core.settings import LOG_LEVEL, ENV
def init_logging() -> None:
log_level = getattr(logging, LOG_LEVEL.upper(), logging.INFO)
if ENV == "production":
processors = [
structlog.processors.TimeStamper(fmt="%Y-%m-%dT%H:%M:%S", utc=True),
structlog.processors.add_log_level,
_add_path,
structlog.processors.JSONRenderer(),
]
else:
processors = [
structlog.processors.TimeStamper(fmt="%H:%M:%S"),
structlog.processors.add_log_level,
structlog.dev.ConsoleRenderer(colors=True),
]
structlog.configure(
wrapper_class=structlog.make_filtering_bound_logger(log_level),
processors=processors,
)
logging.getLogger("sqlalchemy.engine").setLevel(logging.WARNING)
def _add_path(_, __, event_dict):
from quart import request
if request:
event_dict["path"] = request.path
return event_dict

49
backend/core/network.py Normal file
View File

@@ -0,0 +1,49 @@
"""
network.py - 16 May 2025
"""
from __future__ import annotations
import random, structlog
from functools import lru_cache
from typing import Optional
from fake_useragent import UserAgent
from tls_client import Session as TLSSession
from backend.web.db_extra import acquire_proxy, release_proxy, queue_proxy_result
log = structlog.get_logger()
@lru_cache(maxsize=128)
def stealth_headers(rotate: bool = False) -> dict[str, str]:
if rotate:
stealth_headers.cache_clear()
browsers = ["chrome", "firefox", "edge"]
browser = random.choice(browsers)
client_id_map = {
"chrome": ["chrome_122", "chrome_121", "chrome_120"],
"firefox": ["firefox_123"],
"edge": ["edge_121"],
}
client_id = random.choice(client_id_map[browser])
TLSSession(client_identifier=client_id)
headers = {
"User-Agent": UserAgent()[browser],
"Accept-Language": random.choice(
["en-US,en;q=0.9", "en-GB,en;q=0.9", "en;q=0.8"]
),
}
return headers
def get_proxy() -> str:
px = acquire_proxy()
if px:
return px
log.debug("proxy.none", msg="DIRECT fallback")
return "DIRECT"
def record_proxy(px: str, ok: bool) -> None:
if not px or px == "DIRECT":
return
queue_proxy_result(px, ok)
release_proxy(px, ok)

View File

@@ -0,0 +1,62 @@
"""
progress_bus.py - 07 May 2025
"""
from __future__ import annotations
import asyncio, json, time
from typing import Dict, Any
# in-mem state
_progress: Dict[str, Dict[str, Any]] = {}
_watchers: Dict[str, list[asyncio.Queue[str]]] = {}
_TTL = 60 * 60 # keep finished/error records 1 h
def _now() -> float: return time.time()
def register(sid: str) -> None:
_progress[sid] = dict(pct=0, progress="", status="running", ts=_now())
_broadcast(sid)
def update(sid: str, *, pct: float | None = None,
progress: str | None = None, status: str | None = None) -> None:
if sid not in _progress:
register(sid)
p = _progress[sid]
if pct is not None: p["pct"] = pct
if progress is not None: p["progress"] = progress
if status is not None: p["status"] = status
p["ts"] = _now()
_broadcast(sid)
def get(sid: str) -> Dict[str, Any]:
_gc()
return _progress.get(sid, {"status": "idle"})
def clear(sid: str) -> None:
_progress.pop(sid, None)
_watchers.pop(sid, None)
# SSE integration
def subscribe(sid: str) -> asyncio.Queue[str]:
q: asyncio.Queue[str] = asyncio.Queue(maxsize=16)
_watchers.setdefault(sid, []).append(q)
# immediately push current state
q.put_nowait(json.dumps({"sid": sid, **get(sid)}))
return q
def _broadcast(sid: str) -> None:
if sid not in _watchers:
return
payload = json.dumps({"sid": sid, **_progress[sid]})
for q in list(_watchers[sid]):
try:
q.put_nowait(payload)
except asyncio.QueueFull:
pass # drop frame
# garbage collector
def _gc() -> None:
now = _now()
stale = [k for k, v in _progress.items()
if v["status"] in ("finished", "error") and now - v["ts"] > _TTL]
for k in stale:
clear(k)

48
backend/core/settings.py Normal file
View File

@@ -0,0 +1,48 @@
import os
from pathlib import Path
from functools import lru_cache
from dotenv import load_dotenv
load_dotenv()
# ─── Paths ───────────────────────────────────────────────
ROOT_DIR = Path(__file__).resolve().parent.parent
DATA_DIR = ROOT_DIR / "data"
TMP_DIR = Path("/tmp")
DOWNLOAD_DIR = DATA_DIR / "downloads"
USERS_DIR = DATA_DIR / "users"
ENV = os.getenv("APP_ENV", "development")
PROXY_LIST_FILE = Path(os.getenv("PROXY_LIST_FILE", ".50.txt"))
# ─── Additional ───────────────────────────────────────────────
PROXY_USERNAME = os.getenv("PROXY_USERNAME")
PROXY_PASSWORD = os.getenv("PROXY_PASSWORD")
# ─── DB and SQLAlchemy ───────────────────────────────────────────
SQLALCHEMY_DATABASE_URI= os.getenv("DATABASE_URL") or f"sqlite:///{DATA_DIR / 'local.db'}"
DB_POOL_SIZE = int(os.getenv("DB_POOL_SIZE", 20))
DB_ECHO = bool(os.getenv("DB_ECHO", False))
# ─── Concurrency ───────────────────────────────────────────────
CPU_COUNT = os.cpu_count() or 2
THREADS_MAX = min(32, CPU_COUNT * 4)
PROCS_MAX = min(CPU_COUNT, 4)
PER_IP_CONCURRENCY = int(os.getenv("PER_IP_CONCURRENCY", 2))
# ─── Cache and Tuning knobs ──────────────────────────────────────
FORMAT_CACHE_TTL_SEC = int(os.getenv("FORMAT_CACHE_TTL_SEC", 8_000))
DOWNLOAD_CACHE_TTL_SEC = int(os.getenv("DOWNLOAD_CACHE_TTL_SEC", 86_400)) # 24h
PARALLEL_CHUNK_MB = int(os.getenv("PARALLEL_CHUNK_MB", 2))
MAX_CONCURRENT_FRAG = int(os.getenv("MAX_CONCURRENT_FRAG", 4))
ARIA2C_THRESHOLD_MB = int(os.getenv("ARIA2C_THRESHOLD_MB", 512))
MIN_SCORE = float(os.getenv("PROXY_MIN_SCORE", "0.05"))
MAX_IN_USE = int(os.getenv("PROXY_CONCURRENCY_LIMIT", "4"))
FAIL_COOLDOWN_SEC = int(os.getenv("PROXY_FAIL_COOLDOWN", "600"))
_MAX_LOGIN_FAILS = int(os.getenv("MAX_LOGIN_FAILS", "12"))
_MAX_INVALID_URLS = int(os.getenv("MAX_INVALID_URLS", "20"))
_WINDOW_MINUTES = int(os.getenv("WINDOW_MINUTES", "60"))
# ─── Logging ───────────────────────────────────────────────────
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
# ─── db ───────────────────────────────────────────────────
SKIP_SCHEMA_BOOTSTRAP = int(os.getenv("SKIP_SCHEMA_BOOTSTRAP","0"))
@lru_cache
def ensure_dirs() -> None:
for p in (DATA_DIR, USERS_DIR, DOWNLOAD_DIR):
p.mkdir(parents=True, exist_ok=True)
ensure_dirs()