File size: 4,579 Bytes
436e3c6
 
 
 
2c4c7b5
 
436e3c6
 
 
 
2c4c7b5
 
436e3c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c4c7b5
436e3c6
 
 
 
 
 
 
 
 
 
 
 
2c4c7b5
 
5a9af80
2c4c7b5
 
 
436e3c6
2c4c7b5
 
5a9af80
2c4c7b5
436e3c6
 
2c4c7b5
 
 
 
436e3c6
 
 
 
 
 
 
 
 
5a9af80
2c4c7b5
436e3c6
5a9af80
2c4c7b5
 
436e3c6
2c4c7b5
 
 
5a9af80
2c4c7b5
5a9af80
 
 
 
436e3c6
2c4c7b5
 
436e3c6
 
eabbd4b
436e3c6
2c4c7b5
 
 
 
5a9af80
 
436e3c6
 
 
 
 
 
 
 
 
 
 
 
 
2c4c7b5
 
 
436e3c6
 
 
 
 
2c4c7b5
 
436e3c6
 
 
2c4c7b5
436e3c6
2c4c7b5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import os
import sys
import ast

import gradio as gr
import torch
import streamlit as st
# No "spaces" or "transformers_gradio" imports here, since you said you want to use *your model* (myr1),
# not external Spaces demos.

from transformers import AutoTokenizer, AutoModelForCausalLM

# ------------------------------------------------------------------------------
# 1) OPTIONAL: Environment Variable Code (MY_SCRIPT_CONTENT)
#    If you don't need this dynamic script execution, remove the entire block.
# ------------------------------------------------------------------------------
script_repr = os.getenv("MY_SCRIPT_CONTENT")

if script_repr:
    # Attempt to parse & exec the script from environment variable
    try:
        script_content = ast.literal_eval(script_repr)
        exec(script_content)
    except (ValueError, SyntaxError) as e:
        # Using Streamlit to display an error message in case this is run within a Streamlit environment
        st.error(f"Error evaluating script from environment variable: {e}")
else:
    print("No extra script content found in 'MY_SCRIPT_CONTENT'.")

# ------------------------------------------------------------------------------
# 2) Model References for "myr1" from Hugging Face
#    Make sure your HF repo is "wuhp/myr1" and your actual model files are in subfolder "myr1"
# ------------------------------------------------------------------------------
MODEL_REPO = "wuhp/myr1"    # The HF repository name
SUBFOLDER = "myr1"          # The folder inside the repo containing config.json etc.

# ------------------------------------------------------------------------------
# 3) Load Tokenizer & Model
#    trust_remote_code=True to allow custom config/modeling if you have them in the repo.
# ------------------------------------------------------------------------------
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_REPO,
    subfolder=SUBFOLDER,
    trust_remote_code=True
)

print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_REPO,
    subfolder=SUBFOLDER,
    trust_remote_code=True,
    device_map="auto",         # auto-shard across GPU(s) if needed, else CPU fallback
    torch_dtype=torch.float16,  # or torch.float32, torch.bfloat16, etc.
    low_cpu_mem_usage=True
)
model.eval()

print("Model loaded successfully.")

# ------------------------------------------------------------------------------
# 4) Define Generation Function for Gradio
# ------------------------------------------------------------------------------
def generate_text(prompt, max_new_tokens=64, temperature=0.7, top_p=0.9):
    """
    Generate text using the myr1 model from Hugging Face.
    """
    print("=== Starting generation ===")
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    try:
        output_ids = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,  # limit how many tokens beyond the prompt
            temperature=temperature,
            top_p=top_p,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
        print("=== Generation complete ===")
    except Exception as e:
        print(f"Error during generation: {e}")
        return str(e)

    return tokenizer.decode(output_ids[0], skip_special_tokens=True)


# ------------------------------------------------------------------------------
# 5) Build a Gradio UI
# ------------------------------------------------------------------------------
demo = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Textbox(
            lines=4,
            label="Prompt",
            placeholder="Ask a question or start a story..."
        ),
        gr.Slider(
            minimum=8, maximum=512, step=1, value=64,
            label="Max New Tokens"
        ),
        gr.Slider(
            minimum=0.0, maximum=1.5, step=0.1, value=0.7,
            label="Temperature"
        ),
        gr.Slider(
            minimum=0.0, maximum=1.0, step=0.05, value=0.9,
            label="Top-p (nucleus sampling)"
        ),
    ],
    outputs="text",
    title="DeepSeek myr1 Demo",
    description=(
        "Generates text with the 'myr1' model from the Hugging Face Hub. "
        "Enter a prompt and adjust generation settings."
    )
)

# ------------------------------------------------------------------------------
# 6) Launch the App
# ------------------------------------------------------------------------------
if __name__ == "__main__":
    print("Launching Gradio demo...")
    demo.launch()