from collections import Counter
import re
from typing import Any
import os

import requests

import sys
from pathlib import Path

sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
from journal.core.models import JournalEntry, Fragment
from journal.core.config import (
    NLP_BACKEND,
    LLAMA_CPP_URL,
    LLAMA_CPP_MODEL,
    LLAMA_CPP_TIMEOUT,
    EMBEDDING_API_URL,
    EMBEDDING_MODEL_NAME,
    MODEL_CONTEXT_TOKENS,
    CHUNK_TOKEN_BUDGET,
)
from journal.ai.api_compat import (
    build_text_payload,
    detect_text_endpoint_kind,
    extract_embedding_response,
    extract_text_response,
    normalize_embedding_url,
    normalize_endpoint_url,
)

_BACKEND_AUTO = "auto"
_BACKEND_SPACY = "spacy"
_BACKEND_FALLBACK = "fallback"
_VALID_BACKENDS = {_BACKEND_AUTO, _BACKEND_SPACY, _BACKEND_FALLBACK}
_backend_name: str | None = None
_spacy_nlp: Any | None = None
_fallback_warning_printed = False
_backend_requested: str | None = None

_STOP_WORDS = {
    "about",
    "after",
    "again",
    "against",
    "also",
    "and",
    "because",
    "before",
    "being",
    "between",
    "both",
    "could",
    "during",
    "from",
    "have",
    "into",
    "just",
    "like",
    "more",
    "most",
    "over",
    "same",
    "some",
    "such",
    "than",
    "that",
    "their",
    "them",
    "then",
    "there",
    "these",
    "they",
    "this",
    "those",
    "through",
    "under",
    "until",
    "very",
    "what",
    "when",
    "where",
    "which",
    "while",
    "with",
    "would",
    "your",
}


def _resolve_backend() -> str:
    global _backend_name, _spacy_nlp, _fallback_warning_printed, _backend_requested

    requested_raw = os.getenv("JOURNAL_NLP_BACKEND", NLP_BACKEND).strip().lower()
    requested = requested_raw if requested_raw in _VALID_BACKENDS else _BACKEND_AUTO

    # Re-resolve if the requested backend changed at runtime via settings.
    if _backend_name is not None and requested == _backend_requested:
        return _backend_name

    _backend_name = None
    _spacy_nlp = None
    _backend_requested = requested
    if requested == _BACKEND_FALLBACK:
        _backend_name = _BACKEND_FALLBACK
        return _backend_name

    try:
        import spacy

        _spacy_nlp = spacy.load("en_core_web_sm")
        _backend_name = _BACKEND_SPACY
        return _backend_name
    except Exception as exc:
        if requested == _BACKEND_SPACY:
            raise RuntimeError(
                "JOURNAL_NLP_BACKEND=spacy but spaCy backend initialization failed. "
                "Install optional NLP deps/model or set JOURNAL_NLP_BACKEND=auto|fallback."
            ) from exc

    _backend_name = _BACKEND_FALLBACK
    if not _fallback_warning_printed:
        print(
            "WARNING: spaCy backend unavailable; using fallback NLP heuristics. "
            "Set JOURNAL_NLP_BACKEND=fallback to silence this warning."
        )
        _fallback_warning_printed = True
    return _backend_name


def get_nlp_backend() -> str:
    """Returns the active NLP backend: 'spacy' or 'fallback'."""
    return _resolve_backend()


def count_tokens(text: str) -> int:
    # Simple token estimator: 1 token ≈ 1-4 char (very rough)

    return max(1, len(text) // 4)


def llama_cpp_generate(
    prompt: str,
    model: str | None = None,
    temperature: float = 0.7,
    max_tokens: int = 2048,
) -> str:
    raw_llama_url = os.getenv("JOURNAL_LLAMA_CPP_URL", LLAMA_CPP_URL).strip() or LLAMA_CPP_URL
    llama_url = normalize_endpoint_url(raw_llama_url, "/v1/completions")
    llama_model = model or os.getenv("JOURNAL_LLAMA_CPP_MODEL", LLAMA_CPP_MODEL).strip() or LLAMA_CPP_MODEL
    timeout_raw = os.getenv("JOURNAL_LLAMA_CPP_TIMEOUT", str(LLAMA_CPP_TIMEOUT)).strip()
    try:
        llama_timeout = int(timeout_raw)
    except ValueError:
        llama_timeout = LLAMA_CPP_TIMEOUT
    if llama_timeout <= 0:
        llama_timeout = LLAMA_CPP_TIMEOUT

    endpoint_kind = detect_text_endpoint_kind(llama_url)
    payload = build_text_payload(
        prompt,
        llama_model,
        endpoint_kind,
        temperature=temperature,
        max_tokens=max_tokens,
    )
    try:
        response = requests.post(llama_url, json=payload, timeout=llama_timeout)
        response.raise_for_status()
        data = response.json()
        result = extract_text_response(data)
        if result:
            print(f"DEBUG: Generated {len(result)} characters")
            if len(result) < 10:
                print(f"DEBUG: Short response: '{result}'")
            return result

        print("DEBUG: No parsable text in response payload")
        return "No response generated."
    except Exception as e:
        print(f"DEBUG: Exception occurred: {e}")
        return f"Error communicating with llama.cpp server: {e}"


def generate_embedding(text: str) -> list[float]:
    """
    Generates an embedding for the given text using the configured embedding model.
    """
    raw_embedding_url = os.getenv("JOURNAL_EMBEDDING_API_URL", EMBEDDING_API_URL).strip() or EMBEDDING_API_URL
    embedding_url = normalize_embedding_url(raw_embedding_url)
    embedding_model = (
        os.getenv("JOURNAL_EMBEDDING_MODEL_NAME", EMBEDDING_MODEL_NAME).strip()
        or EMBEDDING_MODEL_NAME
    )
    timeout_raw = os.getenv("JOURNAL_LLAMA_CPP_TIMEOUT", str(LLAMA_CPP_TIMEOUT)).strip()
    try:
        llama_timeout = int(timeout_raw)
    except ValueError:
        llama_timeout = LLAMA_CPP_TIMEOUT
    if llama_timeout <= 0:
        llama_timeout = LLAMA_CPP_TIMEOUT

    payload = {
        "model": embedding_model,
        "input": text,
    }
    try:
        response = requests.post(
            embedding_url, json=payload, timeout=llama_timeout
        )  # Reusing LLAMA_CPP_TIMEOUT for now
        response.raise_for_status()
        data = response.json()
        embedding = extract_embedding_response(data)
        if embedding:
            return embedding
        print("DEBUG: No embedding data in response")
        return []
    except Exception as e:
        print(f"DEBUG: Exception occurred during embedding generation: {e}")
        return []


def synthesize_summaries(chunk_summaries: list[str]) -> str:
    combined = "\n\n---\n\n".join(chunk_summaries)
    print(
        f"DEBUG: Synthesizing {len(chunk_summaries)} summaries, total chars: {len(combined)}"
    )

    # Try a much simpler prompt first
    prompt = (
        "Please analyze and summarize the following Journals as a professional Psychologist:\n\n"
        f"{combined}\n\n"
        "Summary:"
    )
    print(f"DEBUG: Synthesis prompt length: {len(prompt)} characters")
    result = llama_cpp_generate(prompt, max_tokens=2048)
    print(f"DEBUG: Final synthesis result: '{result[:100]}...'")  # Show first 100 chars
    return result


def summarize_chunk(entries: list[JournalEntry]) -> str:
    combined_text = """

---

""".join([entry.raw_content for entry in entries])
    prompt = (
        "You are a psychological analysis agent. Given the following journal entries, "
        "analyze and report on:\n"
        "- Recurring psychological themes\n"
        "- Behavioral patterns\n"
        "- Emotional trends\n"
        "- Coping mechanisms\n"
        "- Notable changes over time\n\n"
        "Journal entries:\n"
        f"{combined_text}\n\n"
        "Respond with a concise, insightful analysis for this batch."
    )
    return llama_cpp_generate(prompt, max_tokens=2048)


def extract_themes(text: str) -> list[str]:
    backend = _resolve_backend()
    if backend == _BACKEND_SPACY and _spacy_nlp is not None:
        try:
            doc = _spacy_nlp(text)
            themes = []
            for ent in doc.ents:
                if ent.label_ in [
                    "PERSON",
                    "ORG",
                    "EVENT",
                    "WORK_OF_ART",
                    "LAW",
                    "LANGUAGE",
                ]:
                    themes.append(ent.text.lower())
            for chunk in doc.noun_chunks:
                if 2 <= len(chunk.text.split()) <= 4:
                    themes.append(chunk.text.lower())
            theme_counts = Counter(themes)
            return [
                theme for theme, count in theme_counts.most_common(10) if count > 1
            ]
        except Exception:
            # Fall through to non-spaCy extraction when model parsing fails at runtime.
            pass

    return _extract_themes_fallback(text)


def _extract_themes_fallback(text: str) -> list[str]:
    words = re.findall(r"[A-Za-z][A-Za-z'-]{2,}", text.lower())
    filtered_words = [w for w in words if w not in _STOP_WORDS]
    if not filtered_words:
        return []

    single_counts = Counter(filtered_words)
    phrase_counts = Counter()
    for first, second in zip(filtered_words, filtered_words[1:]):
        if first == second:
            continue
        phrase_counts[f"{first} {second}"] += 1

    themes: list[str] = []
    for phrase, count in phrase_counts.most_common(20):
        if count > 1:
            themes.append(phrase)
            if len(themes) >= 10:
                return themes

    for word, count in single_counts.most_common(30):
        if count > 1 and word not in themes:
            themes.append(word)
            if len(themes) >= 10:
                break

    return themes


def analyze_fragments(fragments: list[Fragment]) -> str:
    if not fragments:
        return "No fragments recorded."
    fragment_types = Counter([frag.type for frag in fragments])
    all_tags = []
    for frag in fragments:
        all_tags.extend(frag.tags)
    tag_counts = Counter(all_tags)
    analysis = f"{len(fragments)} discrete events recorded. "
    if fragment_types:
        top_type = fragment_types.most_common(1)[0]
        analysis += f"Most frequent: {top_type[0]} ({top_type[1]} times). "
    if tag_counts:
        top_tags = [tag for tag, _ in tag_counts.most_common(3)]
        analysis += f"Key themes: {', '.join(top_tags)}."
    return analysis


def summarize_all_entries(entries: list[JournalEntry]) -> str:
    _ = _resolve_backend()
    if not entries:
        return "No entries found to analyze."

    # Chunk entries to fit model context
    chunks = chunk_journal_entries(entries)
    chunk_summaries = []
    for i, chunk in enumerate(chunks):
        print(f"Analyzing chunk {i + 1}/{len(chunks)} ({len(chunk)} entries)...")
        summary = summarize_chunk(chunk)
        chunk_summaries.append(summary)

    print("Synthesizing final report...")
    final_report = synthesize_summaries(chunk_summaries)
    return final_report


def identify_patterns(entries: list[JournalEntry]) -> list[str]:
    _ = _resolve_backend()
    if not entries:
        return ["No entries to analyze."]
    all_content = [entry.raw_content for entry in entries]
    dates = [entry.date for entry in entries]
    combined_text = " ".join(all_content)
    prompt = (
        f"You are a psychological pattern analysis agent. "
        f"Given the following journal entries, identify:\n"
        f"- Recurring psychological themes\n"
        f"- Behavioral patterns\n"
        f"- Emotional trends\n"
        f"- Coping mechanisms\n"
        f"- Notable changes over time\n\n"
        f"Journal entries span from {dates[0]} to {dates[-1]}.\n"
        f"Entries:\n{combined_text}\n\n"
        f"Respond with a concise, insightful pattern analysis."
    )
    return [llama_cpp_generate(prompt)]


def chunk_journal_entries(
    entries: list[JournalEntry], token_budget: int | None = None
) -> list[list[JournalEntry]]:
    if token_budget is None:
        budget_raw = os.getenv("JOURNAL_CHUNK_TOKEN_BUDGET", str(CHUNK_TOKEN_BUDGET)).strip()
        try:
            token_budget = int(budget_raw)
        except ValueError:
            token_budget = CHUNK_TOKEN_BUDGET
        if token_budget <= 0:
            token_budget = CHUNK_TOKEN_BUDGET

    chunks = []
    current_chunk = []
    current_tokens = 0

    for entry in entries:
        entry_tokens = count_tokens(entry.raw_content)

        if current_tokens + entry_tokens > token_budget and current_chunk:
            chunks.append(current_chunk)
            current_chunk = []
            current_tokens = 0

        current_chunk.append(entry)
        current_tokens += entry_tokens

    if current_chunk:
        chunks.append(current_chunk)
    return chunks


def summarize_entry(entry: JournalEntry) -> str:
    _ = _resolve_backend()
    prompt = (
        "You are a psychological analysis agent. Given the following journal entry, "
        "analyze and report on:\n"
        "- Recurring psychological themes\n"
        "- Behavioral patterns\n\n"
        "Journal entry:\n"
        f"{entry.raw_content}\n\n"
        "Respond with a concise, insightful analysis."
    )
    return llama_cpp_generate(prompt, max_tokens=2048)