File size: 3,235 Bytes
2c4c7b5 eabbd4b 2c4c7b5 eabbd4b 2c4c7b5 5a9af80 2c4c7b5 eabbd4b 2c4c7b5 5a9af80 2c4c7b5 eabbd4b 2c4c7b5 eabbd4b 2c4c7b5 eabbd4b 5a9af80 eabbd4b 2c4c7b5 eabbd4b 5a9af80 eabbd4b 2c4c7b5 eabbd4b 2c4c7b5 5a9af80 2c4c7b5 5a9af80 eabbd4b 2c4c7b5 eabbd4b 2c4c7b5 5a9af80 2c4c7b5 5a9af80 2c4c7b5 5a9af80 2c4c7b5 eabbd4b 2c4c7b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# ----------------------------------------------------------------
# 1) Points to your Hugging Face repo and subfolder
# (where config.json, tokenizer.json, model safetensors, etc. reside).
# ----------------------------------------------------------------
MODEL_REPO = "wuhp/myr1"
SUBFOLDER = "myr1"
# ----------------------------------------------------------------
# 2) Load the tokenizer
# trust_remote_code=True allows custom code (e.g., DeepSeek config/classes).
# ----------------------------------------------------------------
tokenizer = AutoTokenizer.from_pretrained(
MODEL_REPO,
subfolder=SUBFOLDER,
trust_remote_code=True
)
# ----------------------------------------------------------------
# 3) Load the model
# - device_map="auto" tries to place layers on GPU and offload remainder to CPU if needed
# - torch_dtype can be float16, float32, bfloat16, etc., depending on GPU support
# ----------------------------------------------------------------
model = AutoModelForCausalLM.from_pretrained(
MODEL_REPO,
subfolder=SUBFOLDER,
trust_remote_code=True,
device_map="auto",
torch_dtype=torch.float16,
low_cpu_mem_usage=True
)
# Put model in evaluation mode
model.eval()
# ----------------------------------------------------------------
# 4) Define the generation function
# ----------------------------------------------------------------
def generate_text(prompt, max_length=64, temperature=0.7, top_p=0.9):
print("=== Starting generation ===")
# Move input tokens to the same device as model
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
try:
# Generate tokens
output_ids = model.generate(
**inputs,
max_new_tokens=max_length, # This controls how many tokens beyond the prompt are generated
temperature=temperature,
top_p=top_p,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
print("=== Generation complete ===")
except Exception as e:
print(f"Error during generation: {e}")
return str(e)
# Decode back to text (skipping special tokens)
return tokenizer.decode(output_ids[0], skip_special_tokens=True)
# ----------------------------------------------------------------
# 5) Build a Gradio UI
# ----------------------------------------------------------------
demo = gr.Interface(
fn=generate_text,
inputs=[
gr.Textbox(
lines=4,
label="Prompt",
placeholder="Try a short prompt, e.g., Hello!"
),
gr.Slider(8, 512, value=64, step=1, label="Max New Tokens"),
gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature"),
gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p"),
],
outputs="text",
title="DeepSeek R1 Demo",
description="Generates text using the large DeepSeek model."
)
# ----------------------------------------------------------------
# 6) Run the Gradio app
# ----------------------------------------------------------------
if __name__ == "__main__":
demo.launch()
|