File size: 4,579 Bytes
436e3c6 2c4c7b5 436e3c6 2c4c7b5 436e3c6 2c4c7b5 436e3c6 2c4c7b5 5a9af80 2c4c7b5 436e3c6 2c4c7b5 5a9af80 2c4c7b5 436e3c6 2c4c7b5 436e3c6 5a9af80 2c4c7b5 436e3c6 5a9af80 2c4c7b5 436e3c6 2c4c7b5 5a9af80 2c4c7b5 5a9af80 436e3c6 2c4c7b5 436e3c6 eabbd4b 436e3c6 2c4c7b5 5a9af80 436e3c6 2c4c7b5 436e3c6 2c4c7b5 436e3c6 2c4c7b5 436e3c6 2c4c7b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import os
import sys
import ast
import gradio as gr
import torch
import streamlit as st
# No "spaces" or "transformers_gradio" imports here, since you said you want to use *your model* (myr1),
# not external Spaces demos.
from transformers import AutoTokenizer, AutoModelForCausalLM
# ------------------------------------------------------------------------------
# 1) OPTIONAL: Environment Variable Code (MY_SCRIPT_CONTENT)
# If you don't need this dynamic script execution, remove the entire block.
# ------------------------------------------------------------------------------
script_repr = os.getenv("MY_SCRIPT_CONTENT")
if script_repr:
# Attempt to parse & exec the script from environment variable
try:
script_content = ast.literal_eval(script_repr)
exec(script_content)
except (ValueError, SyntaxError) as e:
# Using Streamlit to display an error message in case this is run within a Streamlit environment
st.error(f"Error evaluating script from environment variable: {e}")
else:
print("No extra script content found in 'MY_SCRIPT_CONTENT'.")
# ------------------------------------------------------------------------------
# 2) Model References for "myr1" from Hugging Face
# Make sure your HF repo is "wuhp/myr1" and your actual model files are in subfolder "myr1"
# ------------------------------------------------------------------------------
MODEL_REPO = "wuhp/myr1" # The HF repository name
SUBFOLDER = "myr1" # The folder inside the repo containing config.json etc.
# ------------------------------------------------------------------------------
# 3) Load Tokenizer & Model
# trust_remote_code=True to allow custom config/modeling if you have them in the repo.
# ------------------------------------------------------------------------------
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
MODEL_REPO,
subfolder=SUBFOLDER,
trust_remote_code=True
)
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_REPO,
subfolder=SUBFOLDER,
trust_remote_code=True,
device_map="auto", # auto-shard across GPU(s) if needed, else CPU fallback
torch_dtype=torch.float16, # or torch.float32, torch.bfloat16, etc.
low_cpu_mem_usage=True
)
model.eval()
print("Model loaded successfully.")
# ------------------------------------------------------------------------------
# 4) Define Generation Function for Gradio
# ------------------------------------------------------------------------------
def generate_text(prompt, max_new_tokens=64, temperature=0.7, top_p=0.9):
"""
Generate text using the myr1 model from Hugging Face.
"""
print("=== Starting generation ===")
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
try:
output_ids = model.generate(
**inputs,
max_new_tokens=max_new_tokens, # limit how many tokens beyond the prompt
temperature=temperature,
top_p=top_p,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
print("=== Generation complete ===")
except Exception as e:
print(f"Error during generation: {e}")
return str(e)
return tokenizer.decode(output_ids[0], skip_special_tokens=True)
# ------------------------------------------------------------------------------
# 5) Build a Gradio UI
# ------------------------------------------------------------------------------
demo = gr.Interface(
fn=generate_text,
inputs=[
gr.Textbox(
lines=4,
label="Prompt",
placeholder="Ask a question or start a story..."
),
gr.Slider(
minimum=8, maximum=512, step=1, value=64,
label="Max New Tokens"
),
gr.Slider(
minimum=0.0, maximum=1.5, step=0.1, value=0.7,
label="Temperature"
),
gr.Slider(
minimum=0.0, maximum=1.0, step=0.05, value=0.9,
label="Top-p (nucleus sampling)"
),
],
outputs="text",
title="DeepSeek myr1 Demo",
description=(
"Generates text with the 'myr1' model from the Hugging Face Hub. "
"Enter a prompt and adjust generation settings."
)
)
# ------------------------------------------------------------------------------
# 6) Launch the App
# ------------------------------------------------------------------------------
if __name__ == "__main__":
print("Launching Gradio demo...")
demo.launch()
|