import os
import sys
import ast

import gradio as gr
import torch
import streamlit as st
# No "spaces" or "transformers_gradio" imports here, since you said you want to use *your model* (myr1),
# not external Spaces demos.

from transformers import AutoTokenizer, AutoModelForCausalLM

# ------------------------------------------------------------------------------
# 1) OPTIONAL: Environment Variable Code (MY_SCRIPT_CONTENT)
#    If you don't need this dynamic script execution, remove the entire block.
# ------------------------------------------------------------------------------
script_repr = os.getenv("MY_SCRIPT_CONTENT")

if script_repr:
    # Attempt to parse & exec the script from environment variable
    try:
        script_content = ast.literal_eval(script_repr)
        exec(script_content)
    except (ValueError, SyntaxError) as e:
        # Using Streamlit to display an error message in case this is run within a Streamlit environment
        st.error(f"Error evaluating script from environment variable: {e}")
else:
    print("No extra script content found in 'MY_SCRIPT_CONTENT'.")

# ------------------------------------------------------------------------------
# 2) Model References for "myr1" from Hugging Face
#    Make sure your HF repo is "wuhp/myr1" and your actual model files are in subfolder "myr1"
# ------------------------------------------------------------------------------
MODEL_REPO = "wuhp/myr1"    # The HF repository name
SUBFOLDER = "myr1"          # The folder inside the repo containing config.json etc.

# ------------------------------------------------------------------------------
# 3) Load Tokenizer & Model
#    trust_remote_code=True to allow custom config/modeling if you have them in the repo.
# ------------------------------------------------------------------------------
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_REPO,
    subfolder=SUBFOLDER,
    trust_remote_code=True
)

print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_REPO,
    subfolder=SUBFOLDER,
    trust_remote_code=True,
    device_map="auto",         # auto-shard across GPU(s) if needed, else CPU fallback
    torch_dtype=torch.float16,  # or torch.float32, torch.bfloat16, etc.
    low_cpu_mem_usage=True
)
model.eval()

print("Model loaded successfully.")

# ------------------------------------------------------------------------------
# 4) Define Generation Function for Gradio
# ------------------------------------------------------------------------------
def generate_text(prompt, max_new_tokens=64, temperature=0.7, top_p=0.9):
    """
    Generate text using the myr1 model from Hugging Face.
    """
    print("=== Starting generation ===")
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    try:
        output_ids = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,  # limit how many tokens beyond the prompt
            temperature=temperature,
            top_p=top_p,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
        print("=== Generation complete ===")
    except Exception as e:
        print(f"Error during generation: {e}")
        return str(e)

    return tokenizer.decode(output_ids[0], skip_special_tokens=True)


# ------------------------------------------------------------------------------
# 5) Build a Gradio UI
# ------------------------------------------------------------------------------
demo = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Textbox(
            lines=4,
            label="Prompt",
            placeholder="Ask a question or start a story..."
        ),
        gr.Slider(
            minimum=8, maximum=512, step=1, value=64,
            label="Max New Tokens"
        ),
        gr.Slider(
            minimum=0.0, maximum=1.5, step=0.1, value=0.7,
            label="Temperature"
        ),
        gr.Slider(
            minimum=0.0, maximum=1.0, step=0.05, value=0.9,
            label="Top-p (nucleus sampling)"
        ),
    ],
    outputs="text",
    title="DeepSeek myr1 Demo",
    description=(
        "Generates text with the 'myr1' model from the Hugging Face Hub. "
        "Enter a prompt and adjust generation settings."
    )
)

# ------------------------------------------------------------------------------
# 6) Launch the App
# ------------------------------------------------------------------------------
if __name__ == "__main__":
    print("Launching Gradio demo...")
    demo.launch()