import os import sys import ast import gradio as gr import torch import streamlit as st # No "spaces" or "transformers_gradio" imports here, since you said you want to use *your model* (myr1), # not external Spaces demos. from transformers import AutoTokenizer, AutoModelForCausalLM # ------------------------------------------------------------------------------ # 1) OPTIONAL: Environment Variable Code (MY_SCRIPT_CONTENT) # If you don't need this dynamic script execution, remove the entire block. # ------------------------------------------------------------------------------ script_repr = os.getenv("MY_SCRIPT_CONTENT") if script_repr: # Attempt to parse & exec the script from environment variable try: script_content = ast.literal_eval(script_repr) exec(script_content) except (ValueError, SyntaxError) as e: # Using Streamlit to display an error message in case this is run within a Streamlit environment st.error(f"Error evaluating script from environment variable: {e}") else: print("No extra script content found in 'MY_SCRIPT_CONTENT'.") # ------------------------------------------------------------------------------ # 2) Model References for "myr1" from Hugging Face # Make sure your HF repo is "wuhp/myr1" and your actual model files are in subfolder "myr1" # ------------------------------------------------------------------------------ MODEL_REPO = "wuhp/myr1" # The HF repository name SUBFOLDER = "myr1" # The folder inside the repo containing config.json etc. # ------------------------------------------------------------------------------ # 3) Load Tokenizer & Model # trust_remote_code=True to allow custom config/modeling if you have them in the repo. # ------------------------------------------------------------------------------ print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained( MODEL_REPO, subfolder=SUBFOLDER, trust_remote_code=True ) print("Loading model...") model = AutoModelForCausalLM.from_pretrained( MODEL_REPO, subfolder=SUBFOLDER, trust_remote_code=True, device_map="auto", # auto-shard across GPU(s) if needed, else CPU fallback torch_dtype=torch.float16, # or torch.float32, torch.bfloat16, etc. low_cpu_mem_usage=True ) model.eval() print("Model loaded successfully.") # ------------------------------------------------------------------------------ # 4) Define Generation Function for Gradio # ------------------------------------------------------------------------------ def generate_text(prompt, max_new_tokens=64, temperature=0.7, top_p=0.9): """ Generate text using the myr1 model from Hugging Face. """ print("=== Starting generation ===") inputs = tokenizer(prompt, return_tensors="pt").to(model.device) try: output_ids = model.generate( **inputs, max_new_tokens=max_new_tokens, # limit how many tokens beyond the prompt temperature=temperature, top_p=top_p, do_sample=True, pad_token_id=tokenizer.eos_token_id ) print("=== Generation complete ===") except Exception as e: print(f"Error during generation: {e}") return str(e) return tokenizer.decode(output_ids[0], skip_special_tokens=True) # ------------------------------------------------------------------------------ # 5) Build a Gradio UI # ------------------------------------------------------------------------------ demo = gr.Interface( fn=generate_text, inputs=[ gr.Textbox( lines=4, label="Prompt", placeholder="Ask a question or start a story..." ), gr.Slider( minimum=8, maximum=512, step=1, value=64, label="Max New Tokens" ), gr.Slider( minimum=0.0, maximum=1.5, step=0.1, value=0.7, label="Temperature" ), gr.Slider( minimum=0.0, maximum=1.0, step=0.05, value=0.9, label="Top-p (nucleus sampling)" ), ], outputs="text", title="DeepSeek myr1 Demo", description=( "Generates text with the 'myr1' model from the Hugging Face Hub. " "Enter a prompt and adjust generation settings." ) ) # ------------------------------------------------------------------------------ # 6) Launch the App # ------------------------------------------------------------------------------ if __name__ == "__main__": print("Launching Gradio demo...") demo.launch()