from flask import request, jsonify
import chromadb

from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import VectorStoreIndex, StorageContext, Settings
from llama_index.core.prompts import PromptTemplate

from document_readers import (
    load_files,
    load_websites,
    load_text_contents,
    load_youtube_transcripts,
)

def train_vectorstore():
    data = request.get_json()

    if not data:
        return jsonify({"status": "error", "message": "No JSON payload provided."}), 400

    # Required fields
    data_type = data.get("type")
    org_id = data.get("org_id")
    chatbot_id = data.get("chatbot_id")

    # Validate data_type
    if not data_type or data_type not in ["file", "website", "text", "youtube"]:
        return jsonify({
            "status": "error",
            "message": "Invalid or missing 'type'. Must be one of: 'file', 'website', 'text', 'youtube'."
        }), 400

    # Validate org_id
    if not org_id or not isinstance(org_id, str):
        return jsonify({
            "status": "error",
            "message": "'org_id' is required and must be a string."
        }), 400

    # Validate chatbot_id
    if not chatbot_id or not isinstance(chatbot_id, str):
        return jsonify({
            "status": "error",
            "message": "'chatbot_id' is required and must be a string."
        }), 400

    # Optional fields
    similarity_top_k = data.get("similarity_top_k", 3)
    if not isinstance(similarity_top_k, int) or similarity_top_k <= 0:
        return jsonify({
            "status": "error",
            "message": "'similarity_top_k' must be a positive integer."
        }), 400

    system_instructions = data.get(
        "system_instructions",
        "You are an intelligent assistant. Provide helpful responses based on context."
    )
    if not isinstance(system_instructions, str) or len(system_instructions.strip()) == 0:
        return jsonify({
            "status": "error",
            "message": "'system_instructions' must be a non-empty string."
        }), 400

    # Optional test query
    test_query = data.get("query", "")
    if test_query and not isinstance(test_query, str):
        return jsonify({
            "status": "error",
            "message": "'query' must be a string if provided."
        }), 400

    # Setup for later use
    documents = []
    source_info = ""

    system_instructions="I am AskPoint, a smart and friendly chatbot assistant here to help you. I always speak in the first person to feel natural and conversational, like you're chatting with a real assistant. If a user greets me, I reply warmly and ask how I can help — without giving too much information unless asked. When users ask questions, I give thoughtful, accurate, and easy-to-understand answers using the context I’ve been trained on. If something isn’t clear, I’ll kindly ask for more info. My goal is to be helpful, approachable, and supportive."

    if data_type == "file":
        file_path = data.get("file_path")
        if not file_path:
            return jsonify({"status": "error", "message": "file_path is required for type file."}), 400

        # Add "data/" prefix to each filename if it's a list
        if isinstance(file_path, list):
            full_paths = [f"data/{fname}" for fname in file_path]
        else:
            full_paths = f"data/{file_path}"

        docs = load_files(full_paths)
        documents.extend(docs)
        source_info = f"file(s): {file_path}"

    elif data_type == "website":
        urls = data.get("urls")
        if not urls or not isinstance(urls, list):
            return jsonify({"status": "error", "message": "urls must be provided as a list for type website."}), 400
        docs = load_websites(urls)
        documents.extend(docs)
        source_info = f"{len(urls)} URL(s)"

    elif data_type == "text":
        text_contents = data.get("text_contents")
        if not text_contents or not isinstance(text_contents, list):
            return jsonify({"status": "error", "message": "text_contents must be provided as a list for type text."}), 400
        docs = load_text_contents(text_contents)
        documents.extend(docs)
        source_info = "direct text contents"

    elif data_type == "youtube":
        youtube_links = data.get("youtube_links")
        if not youtube_links or not isinstance(youtube_links, list):
            return jsonify({"status": "error", "message": "youtube_links must be provided as a list for type youtube."}), 400
        docs = load_youtube_transcripts(youtube_links)
        documents.extend(docs)
        source_info = f"{len(youtube_links)} YouTube link(s)"
    else:
        return jsonify({"status": "error", "message": "Invalid type. Use 'file', 'website', 'text', or 'youtube'."}), 400

    if not documents:
        return jsonify({"status": "error", "message": "No documents loaded."}), 400

    # Set up ChromaDB using org_id and chatbot_id
    chroma_db_path = f"./chroma/{org_id}"
    chroma_client = chromadb.PersistentClient(path=chroma_db_path)
    chroma_collection = chroma_client.get_or_create_collection(name=chatbot_id)
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context,
        embed_model=Settings.embed_model
    )

    prompt_template = PromptTemplate(
        template=f"""
        System Instructions: {system_instructions}
        ---------------------
        Context Information:
        ---------------------
        {{context_str}}
        ---------------------
        Based on the context above, answer the query below.

        Query: {{query_str}}
        Answer:
        """,
        prompt_type="query"
    )

    if test_query:
        query_engine = index.as_query_engine(
            similarity_top_k=similarity_top_k,
            text_qa_template=prompt_template
        )
        test_response = query_engine.query(test_query)
        return jsonify({
            "status": "success",
            "message": f"Trained vector store with {len(documents)} documents from {source_info}.",
            "test_query": test_query,
            "test_response": str(test_response)
        }), 200

    return jsonify({
        "status": "success",
        "message": f"Trained vector store with {len(documents)} documents from {source_info}."
    }), 200
