"""Download the 580 unique YouTube videos referenced by GolfDB.

Reads golfDB.mat (in the same dir), extracts unique youtube_id values, and
calls yt-dlp on each one. Skips IDs whose .mp4 already exists. Logs status to
download.log (one line per ID).

Usage:
    python download_videos.py [--videos-dir ../videos] [--workers 4]
"""

from __future__ import annotations

import argparse
import datetime
import subprocess
import sys
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path

from scipy.io import loadmat


def unique_youtube_ids(mat_path: Path) -> list[str]:
    arr = loadmat(str(mat_path), squeeze_me=True)["golfDB"]
    seen, out = set(), []
    for i in range(len(arr)):
        yid = str(arr[i]["youtube_id"])
        if yid not in seen:
            seen.add(yid)
            out.append(yid)
    return out


def download_one(yid: str, videos_dir: Path, log_path: Path) -> tuple[str, str]:
    target = videos_dir / f"{yid}.mp4"
    if target.exists() and target.stat().st_size > 0:
        return yid, "skip-exists"
    url = f"https://www.youtube.com/watch?v={yid}"
    cmd = [
        sys.executable,
        "-m",
        "yt_dlp",
        "-f",
        "mp4/bestvideo[ext=mp4]+bestaudio[ext=m4a]/best",
        "--merge-output-format",
        "mp4",
        "-o",
        str(videos_dir / f"{yid}.%(ext)s"),
        "--no-progress",
        "--quiet",
        "--no-warnings",
        url,
    ]
    res = subprocess.run(cmd, capture_output=True, text=True)
    if res.returncode == 0 and target.exists():
        status = "ok"
    else:
        status = f"fail:{res.returncode}:{res.stderr.strip().splitlines()[-1] if res.stderr else ''}"[:200]
    ts = datetime.datetime.utcnow().isoformat()
    with log_path.open("a") as fh:
        fh.write(f"{ts}\t{yid}\t{status}\n")
    return yid, status


def main() -> None:
    here = Path(__file__).resolve().parent
    ap = argparse.ArgumentParser()
    ap.add_argument("--mat", type=Path, default=here / "golfDB.mat")
    ap.add_argument("--videos-dir", type=Path, default=here.parent / "videos")
    ap.add_argument("--log", type=Path, default=here.parent / "download.log")
    ap.add_argument("--workers", type=int, default=4)
    args = ap.parse_args()

    args.videos_dir.mkdir(parents=True, exist_ok=True)
    ids = unique_youtube_ids(args.mat)
    print(f"[golfdb] {len(ids)} unique youtube ids; videos -> {args.videos_dir}")

    counts: dict[str, int] = {"ok": 0, "skip-exists": 0, "fail": 0}
    with ThreadPoolExecutor(max_workers=args.workers) as ex:
        futs = {ex.submit(download_one, yid, args.videos_dir, args.log): yid for yid in ids}
        done = 0
        for fut in as_completed(futs):
            yid, status = fut.result()
            done += 1
            bucket = "fail" if status.startswith("fail") else status
            counts[bucket] = counts.get(bucket, 0) + 1
            if done % 20 == 0 or status.startswith("fail"):
                print(f"  [{done}/{len(ids)}] {yid} -> {status}")
    print(f"[golfdb] done. {counts}")


if __name__ == "__main__":
    main()
