#!/usr/bin/env python3
"""
DH Toolkit — Digital Humanities Visualization Suite
"""
import os, sys, json, csv, io, base64, zipfile, tempfile, logging

# Ensure app directory is on path (critical for cPanel/Passenger)
APP_DIR = os.path.dirname(os.path.abspath(__file__))
if APP_DIR not in sys.path:
    sys.path.insert(0, APP_DIR)

# Set up logging for cPanel debugging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s [%(levelname)s] %(message)s',
    stream=sys.stderr
)
log = logging.getLogger('dh-toolkit')

try:
    from flask import Flask, render_template, request, jsonify, send_file, session
    log.info("Flask imported OK")
except ImportError as e:
    log.error(f"Flask import failed: {e}")
    raise

try:
    from engines.gazetteer import geocode
    from engines.network import ForceAtlas2, parse_edge_csv, build_graph, compute_network_stats
    from engines.lda import LDA, load_corpus, tokenize
    log.info("Engines imported OK")
except ImportError as e:
    log.error(f"Engine import failed: {e}")
    log.error(f"sys.path = {sys.path}")
    log.error(f"APP_DIR = {APP_DIR}")
    log.error(f"Contents of APP_DIR: {os.listdir(APP_DIR) if os.path.isdir(APP_DIR) else 'NOT A DIR'}")
    raise

app = Flask(__name__)
app.secret_key = os.environ.get('DH_TOOLKIT_SECRET', 'dh-toolkit-dev-key-change-in-prod')
app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024

# Use a writable directory for uploads — try several locations
for _upload_candidate in [
    os.path.join(APP_DIR, 'tmp'),
    os.path.join(tempfile.gettempdir(), 'dh_toolkit_uploads'),
    os.path.join(os.path.expanduser('~'), '.dh_toolkit_tmp'),
]:
    try:
        os.makedirs(_upload_candidate, exist_ok=True)
        # Test writability
        _test = os.path.join(_upload_candidate, '.write_test')
        with open(_test, 'w') as f:
            f.write('ok')
        os.remove(_test)
        UPLOAD_ROOT = _upload_candidate
        log.info(f"Upload directory: {UPLOAD_ROOT}")
        break
    except (OSError, PermissionError):
        continue
else:
    UPLOAD_ROOT = tempfile.mkdtemp(prefix='dh_toolkit_')
    log.warning(f"Using fallback temp dir: {UPLOAD_ROOT}")


def get_upload_dir():
    sid = session.get('sid')
    if not sid:
        sid = base64.urlsafe_b64encode(os.urandom(12)).decode()
        session['sid'] = sid
    path = os.path.join(UPLOAD_ROOT, sid)
    os.makedirs(path, exist_ok=True)
    return path


@app.route('/')
def index():
    return render_template('index.html')


@app.route('/api/upload', methods=['POST'])
def upload_file():
    upload_dir = get_upload_dir()
    files = request.files.getlist('files')
    uploaded = []
    for f in files:
        if f.filename:
            safe = f.filename.replace('..', '').replace('/', '_')
            f.save(os.path.join(upload_dir, safe))
            uploaded.append(safe)
    return jsonify({'files': uploaded})


@app.route('/api/upload_corpus', methods=['POST'])
def upload_corpus():
    upload_dir = get_upload_dir()
    corpus_dir = os.path.join(upload_dir, 'corpus')
    # Clear previous corpus
    if os.path.isdir(corpus_dir):
        import shutil
        shutil.rmtree(corpus_dir)
    os.makedirs(corpus_dir, exist_ok=True)

    files = request.files.getlist('files')
    count = 0
    for f in files:
        if not f.filename:
            continue
        name = os.path.basename(f.filename)
        if name.lower().endswith('.txt'):
            f.save(os.path.join(corpus_dir, name))
            count += 1
        elif name.lower().endswith('.zip'):
            with zipfile.ZipFile(io.BytesIO(f.read())) as zf:
                for member in zf.namelist():
                    if member.lower().endswith('.txt') and not os.path.basename(member).startswith('_'):
                        data = zf.read(member)
                        out_name = os.path.basename(member)
                        with open(os.path.join(corpus_dir, out_name), 'wb') as wf:
                            wf.write(data)
                        count += 1
    return jsonify({'count': count})


# ── MAP ──────────────────────────────────────────────────────────────────

@app.route('/api/map/preview', methods=['POST'])
def map_preview():
    upload_dir = get_upload_dir()
    filename = request.json.get('filename')
    path = os.path.join(upload_dir, filename)
    if not os.path.exists(path):
        return jsonify({'error': 'File not found'}), 404

    with open(path, 'r', encoding='utf-8-sig', errors='replace') as f:
        reader = csv.DictReader(f)
        headers = list(reader.fieldnames) if reader.fieldnames else []
        preview = [dict(row) for i, row in enumerate(reader) if i < 8]

    return jsonify({'headers': headers, 'preview': preview})


@app.route('/api/map/geocode', methods=['POST'])
def map_geocode():
    upload_dir = get_upload_dir()
    data = request.json
    filename = data['filename']
    name_col = data.get('name_col')
    lat_col = data.get('lat_col')
    lon_col = data.get('lon_col')
    color_col = data.get('color_col')

    path = os.path.join(upload_dir, filename)
    with open(path, 'r', encoding='utf-8-sig', errors='replace') as f:
        reader = csv.DictReader(f)
        headers = list(reader.fieldnames) if reader.fieldnames else []
        rows = list(reader)

    places = []
    failed = []
    for row in rows:
        place = {h: row.get(h, '') for h in headers}
        name = row.get(name_col, '').strip() if name_col else ''
        place['_name'] = name

        lat, lon = None, None
        if lat_col and lon_col:
            try:
                lat = float(row.get(lat_col, ''))
                lon = float(row.get(lon_col, ''))
            except (ValueError, TypeError):
                pass

        if lat is None or lon is None:
            # Try geocoding from name or composite of address fields
            coords = geocode(name)
            if coords:
                lat, lon = coords

        if lat is not None and lon is not None:
            place['_lat'] = lat
            place['_lon'] = lon
            places.append(place)
        else:
            failed.append(name or '(unnamed)')

    return jsonify({'places': places, 'failed': failed, 'headers': headers})


# ── NETWORK ──────────────────────────────────────────────────────────────

@app.route('/api/network/preview', methods=['POST'])
def network_preview():
    upload_dir = get_upload_dir()
    filename = request.json.get('filename')
    path = os.path.join(upload_dir, filename)
    if not os.path.exists(path):
        return jsonify({'error': 'File not found'}), 404

    result = parse_edge_csv(path)
    preview = result['rows'][:8]
    return jsonify({
        'headers': result['headers'],
        'preview': preview,
        'detected_source': result['detected_source'],
        'detected_target': result['detected_target'],
        'detected_weight': result['detected_weight'],
    })


@app.route('/api/network/generate', methods=['POST'])
def network_generate():
    upload_dir = get_upload_dir()
    data = request.json
    filename = data['filename']
    src_col = data['source_col']
    tgt_col = data['target_col']
    wt_col = data.get('weight_col')
    n_iter = int(data.get('iterations', 500))
    gravity = float(data.get('gravity', 1.0))
    scaling = float(data.get('scaling_ratio', 2.0))

    path = os.path.join(upload_dir, filename)
    parsed = parse_edge_csv(path)
    nodes, edges, weights, metadata = build_graph(
        parsed['rows'], src_col, tgt_col, wt_col, parsed['headers'])

    if not nodes:
        return jsonify({'error': 'No valid edges found'}), 400

    fa2 = ForceAtlas2(gravity=gravity, scaling_ratio=scaling, n_iter=n_iter,
                      barnes_hut=len(nodes) > 200)
    positions = fa2.layout(nodes, edges, weights)
    stats = compute_network_stats(nodes, edges, weights)

    # Merge metadata + stats
    node_data = {}
    for n in nodes:
        nd = {'id': n, **stats.get(n, {}), **metadata.get(n, {})}
        nd['x'] = positions[n][0]
        nd['y'] = positions[n][1]
        node_data[n] = nd

    edge_data = [{'source': s, 'target': t, 'weight': w}
                 for (s, t), w in zip(edges, weights)]

    return jsonify({
        'nodes': node_data,
        'edges': edge_data,
        'node_count': len(nodes),
        'edge_count': len(edges),
    })


# ── TOPICS ───────────────────────────────────────────────────────────────

@app.route('/api/topics/generate', methods=['POST'])
def topics_generate():
    upload_dir = get_upload_dir()
    data = request.json
    n_topics = int(data.get('n_topics', 10))
    n_iter = int(data.get('iterations', 300))
    n_words = int(data.get('n_words', 15))
    alpha = float(data.get('alpha', 0.1))
    beta = float(data.get('beta', 0.01))

    corpus_dir = os.path.join(upload_dir, 'corpus')
    if not os.path.isdir(corpus_dir):
        return jsonify({'error': 'No corpus uploaded'}), 400

    filenames, docs, raw_texts = load_corpus(corpus_dir)
    if len(docs) < 2:
        return jsonify({'error': 'Need at least 2 documents'}), 400

    lda = LDA(n_topics=n_topics, alpha=alpha, beta=beta, n_iter=n_iter)
    lda.fit(docs)

    topics = lda.top_words(n_words)
    doc_topics = lda.doc_topic_dist()

    topic_results = []
    for k, words in enumerate(topics):
        topic_results.append({
            'id': k,
            'words': [{'word': w, 'weight': round(p, 4)} for w, p in words],
        })

    doc_results = []
    for d, fn in enumerate(filenames):
        dist = doc_topics[d].tolist()
        doc_results.append({
            'filename': fn,
            'distribution': [round(v, 4) for v in dist],
        })

    # Store raw texts for retrieval
    text_map = {fn: txt for fn, txt in zip(filenames, raw_texts)}
    text_path = os.path.join(upload_dir, '_corpus_texts.json')
    with open(text_path, 'w', encoding='utf-8') as f:
        json.dump(text_map, f)

    return jsonify({
        'topics': topic_results,
        'documents': doc_results,
        'vocab_size': lda.V,
    })


@app.route('/api/topics/text', methods=['POST'])
def topics_text():
    """Retrieve the raw text of a document."""
    upload_dir = get_upload_dir()
    filename = request.json.get('filename')
    text_path = os.path.join(upload_dir, '_corpus_texts.json')
    if not os.path.exists(text_path):
        return jsonify({'error': 'No corpus data'}), 404
    with open(text_path, 'r', encoding='utf-8') as f:
        texts = json.load(f)
    return jsonify({'text': texts.get(filename, '(text not found)')})


@app.route('/api/topics/export', methods=['POST'])
def topics_export():
    """Export topic model results as JSON."""
    data = request.json
    buf = io.BytesIO()
    buf.write(json.dumps(data, indent=2).encode('utf-8'))
    buf.seek(0)
    return send_file(buf, as_attachment=True, download_name='topic_model_results.json',
                     mimetype='application/json')


# ── Main ─────────────────────────────────────────────────────────────────

def main():
    import argparse
    parser = argparse.ArgumentParser(description='DH Toolkit')
    parser.add_argument('--port', type=int, default=5000)
    parser.add_argument('--host', default='127.0.0.1')
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--no-browser', action='store_true',
                        help='Do not auto-open browser')
    args = parser.parse_args()

    if not args.no_browser:
        import threading, webbrowser
        def _open():
            import time; time.sleep(1.2)
            webbrowser.open(f'http://127.0.0.1:{args.port}')
        threading.Thread(target=_open, daemon=True).start()

    print(f"\n  DH Toolkit running at http://{args.host}:{args.port}\n")
    app.run(host=args.host, port=args.port, debug=args.debug)


if __name__ == '__main__':
    main()
