"""
TEI Pipeline Flask Server

Changes from v1:
- Model loads at startup and stays in memory
- /api/process returns inferred metadata; XML regenerable after edits
- /api/update-metadata regenerates XML with user-edited metadata
- /api/detect-language detects language from uploaded image
- Custom tag support
"""

import os
import sys
import json
import argparse
import threading
import tempfile
import webbrowser
from pathlib import Path

from flask import Flask, request, jsonify, render_template
from flask_cors import CORS

_HERE = Path(__file__).resolve().parent
sys.path.insert(0, str(_HERE))

app = Flask(__name__,
            template_folder=str(_HERE / "templates"),
            static_folder=str(_HERE / "templates"))
CORS(app)

# ── Persistent model engine (loaded once at startup) ─────────────────
_engine = None
_engine_lock = threading.Lock()
_engine_loading = False
_engine_status = "not_loaded"

_worker_thread = None
_pipeline = None
_status = {
    "step": "idle", "message": "Ready", "page": 0, "total_pages": 0,
    "progress": 0.0, "is_complete": False, "has_error": False,
    "error_message": "", "inferred_metadata": {},
}
_result_xml = None
_cancel_flag = False


def _reset():
    global _status, _result_xml, _cancel_flag
    _status = {
        "step": "idle", "message": "Ready", "page": 0, "total_pages": 0,
        "progress": 0.0, "is_complete": False, "has_error": False,
        "error_message": "", "inferred_metadata": {},
    }
    _result_xml = None
    _cancel_flag = False


def _load_engine_background(model_key="qwen2.5-vl-7b"):
    """Load the OCR engine in a background thread at startup."""
    global _engine, _engine_loading, _engine_status
    _engine_loading = True
    _engine_status = "loading"
    try:
        from core.ocr_engine import OCREngine
        _engine = OCREngine(model_key=model_key)
        _engine.load_model(progress_callback=lambda msg: None)
        _engine_status = "ready"
    except Exception as e:
        _engine_status = f"error: {e}"
    finally:
        _engine_loading = False


# ── Routes ───────────────────────────────────────────────────────────
@app.route("/")
def index():
    return render_template("index.html")


@app.route("/api/status")
def api_status():
    try:
        from core.device import get_device_info
        device_info = get_device_info()
    except Exception as e:
        device_info = {"device": "unknown", "error": str(e)}

    return jsonify({
        "ok": True, "device": device_info,
        "model_status": _engine_status,
    })


@app.route("/api/process", methods=["POST"])
def api_process():
    global _worker_thread, _result_xml, _cancel_flag, _pipeline

    if _worker_thread and _worker_thread.is_alive():
        return jsonify({"ok": False, "error": "Already processing."}), 409

    _reset()

    if "file" not in request.files:
        return jsonify({"ok": False, "error": "No file uploaded."}), 400
    file = request.files["file"]
    if not file.filename:
        return jsonify({"ok": False, "error": "Empty filename."}), 400

    suffix = Path(file.filename).suffix
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
    file.save(tmp.name)
    tmp.close()

    genre = request.form.get("genre", "auto")
    model_key = request.form.get("model", "qwen2.5-vl-7b")

    # Parse custom tags from JSON
    custom_tags = []
    raw_tags = request.form.get("custom_tags", "")
    if raw_tags:
        try:
            custom_tags = json.loads(raw_tags)
        except json.JSONDecodeError:
            pass

    # User-provided metadata (may be empty — will be merged with inferred)
    metadata = {}
    for key in ["title", "author", "date", "publisher", "pubPlace", "language"]:
        val = request.form.get(key, "").strip()
        if val:
            metadata[key] = val

    def run_pipeline():
        global _pipeline, _result_xml, _status, _cancel_flag, _engine
        try:
            from core.pipeline import OCRTEIPipeline, PipelineConfig

            config = PipelineConfig()
            config.genre = genre
            config.model_key = model_key
            config.metadata = metadata
            config.custom_tags = custom_tags

            # Wait for engine to finish loading if it's still going
            while _engine_loading:
                import time; time.sleep(1)

            _pipeline = OCRTEIPipeline(config, engine=_engine)

            def on_progress(st):
                _status["step"] = "processing"
                _status["message"] = st.current_step
                _status["page"] = st.current_page
                _status["total_pages"] = st.total_pages
                _status["progress"] = st.progress_fraction
                if _cancel_flag:
                    _pipeline.cancel()

            result = _pipeline.run(tmp.name, progress_callback=on_progress)
            _result_xml = result
            _status["is_complete"] = True
            _status["progress"] = 1.0
            _status["message"] = "Processing complete."
            _status["inferred_metadata"] = _pipeline.inferred_metadata

        except InterruptedError:
            _status["has_error"] = True
            _status["error_message"] = "Cancelled by user."
        except Exception as e:
            import traceback
            traceback.print_exc()
            _status["has_error"] = True
            _status["error_message"] = str(e)
            _status["message"] = f"Error: {e}"
        finally:
            try:
                os.unlink(tmp.name)
            except OSError:
                pass

    _worker_thread = threading.Thread(target=run_pipeline, daemon=True)
    _worker_thread.start()
    return jsonify({"ok": True, "message": "Processing started."})


@app.route("/api/progress")
def api_progress():
    return jsonify(_status)


@app.route("/api/cancel", methods=["POST"])
def api_cancel():
    global _cancel_flag
    _cancel_flag = True
    if _pipeline:
        _pipeline.cancel()
    return jsonify({"ok": True})


@app.route("/api/result")
def api_result():
    if _result_xml is None:
        return jsonify({"ok": False, "error": "No result available."}), 404
    return jsonify({"ok": True, "xml": _result_xml,
                    "inferred_metadata": _status.get("inferred_metadata", {})})


@app.route("/api/update-metadata", methods=["POST"])
def api_update_metadata():
    """Regenerate TEI XML with user-edited metadata."""
    global _result_xml, _pipeline
    if _pipeline is None or not _pipeline.ocr_results:
        return jsonify({"ok": False, "error": "No OCR results. Process a document first."}), 400

    data = request.get_json()
    metadata = data.get("metadata", {})

    try:
        _result_xml = _pipeline.regenerate_xml(metadata)
        from core.tei_schema import validate_tei_structure
        is_valid, errors = validate_tei_structure(_result_xml)
        return jsonify({"ok": True, "xml": _result_xml, "valid": is_valid, "errors": errors})
    except Exception as e:
        return jsonify({"ok": False, "error": str(e)}), 500


@app.route("/api/validate", methods=["POST"])
def api_validate():
    data = request.get_json()
    from core.tei_schema import validate_tei_structure
    is_valid, errors = validate_tei_structure(data.get("xml", ""))
    return jsonify({"ok": True, "valid": is_valid, "errors": errors})


@app.route("/api/shutdown", methods=["POST"])
def api_shutdown():
    """Cleanly shut down the server process."""
    import signal
    def _kill():
        os.kill(os.getpid(), signal.SIGTERM)
    # Respond first, then die
    threading.Timer(0.5, _kill).start()
    return jsonify({"ok": True, "message": "Shutting down."})


# ── Entry point ──────────────────────────────────────────────────────
def main():
    parser = argparse.ArgumentParser(description="TEI Pipeline Server")
    parser.add_argument("--port", type=int, default=int(os.environ.get("TEI_OCR_PORT", 5199)))
    parser.add_argument("--no-browser", action="store_true")
    parser.add_argument("--model", default="qwen2.5-vl-7b")
    args = parser.parse_args()

    # Start loading model in background immediately
    t = threading.Thread(target=_load_engine_background, args=(args.model,), daemon=True)
    t.start()

    if not args.no_browser:
        threading.Timer(1.5, lambda: webbrowser.open(f"http://127.0.0.1:{args.port}")).start()

    print(f"TEI Pipeline starting on http://127.0.0.1:{args.port}")
    app.run(host="127.0.0.1", port=args.port, debug=False, use_reloader=False)


if __name__ == "__main__":
    main()
