import os import torch from torch.utils.data import Dataset from transformers import ( AutoConfig, AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, GenerationConfig, pipeline ) import gradio as gr # --------------------------- # A) Dummy training dataset # --------------------------- class MyTextDataset(Dataset): """ Very simple dataset example. In reality: - Use real text data, - Possibly use HF 'datasets' library, - Tokenize in chunks, etc. """ def __init__(self, tokenizer, texts, block_size=128): self.examples = [] for txt in texts: # Tokenize each text tokens = tokenizer(txt, truncation=True, max_length=block_size) self.examples.append(tokens["input_ids"]) def __len__(self): return len(self.examples) def __getitem__(self, idx): return torch.tensor(self.examples[idx], dtype=torch.long) # --------------------------- # B) Training routine # --------------------------- def train_model( model_name_or_path="wuhp/myr1", subfolder="myr1", output_dir="finetuned_myr1", epochs=1 ): """ Demonstrates how to load your custom model from HF, and run a quick 'Trainer' to finetune it on some mock texts. - model_name_or_path: huggingface repo ID (or local folder). - subfolder: if your model config/weights live in a subfolder within that repo, specify it here. - output_dir: where to save final trained model. - epochs: how many epochs for this mock training example. """ # 1) Load config (trust_remote_code=True so we can import custom .py from your repo) config = AutoConfig.from_pretrained( model_name_or_path, subfolder=subfolder, trust_remote_code=True ) # 2) Load tokenizer tokenizer = AutoTokenizer.from_pretrained( model_name_or_path, subfolder=subfolder, trust_remote_code=True ) # 3) Load model # AutoModelForCausalLM will detect your custom architecture from modeling_deepseek.py model = AutoModelForCausalLM.from_pretrained( model_name_or_path, subfolder=subfolder, config=config, torch_dtype=torch.float16, # or "auto", or float32 device_map="auto", # If you have enough GPU memory, or "cpu" trust_remote_code=True ) # 4) Create a tiny training dataset train_texts = [ "Hello from DeepSeek!", "The sky is blue.", "Large language models can do amazing things." ] eval_texts = [ "Testing is essential for robust code.", "Generative AI is fun." ] train_dataset = MyTextDataset(tokenizer, train_texts) eval_dataset = MyTextDataset(tokenizer, eval_texts) # 5) Trainer hyperparams training_args = TrainingArguments( output_dir=output_dir, overwrite_output_dir=True, num_train_epochs=epochs, per_device_train_batch_size=1, per_device_eval_batch_size=1, evaluation_strategy="epoch", save_strategy="epoch", logging_steps=1, gradient_accumulation_steps=1, fp16=True if torch.cuda.is_available() else False, # If you have limited VRAM and can't do FP16, set fp16=False above ) # 6) Define data collator for causal LM. Typically: from transformers import DataCollatorForLanguageModeling data_collator = DataCollatorForLanguageModeling( tokenizer=tokenizer, mlm=False ) # 7) Build trainer trainer = Trainer( model=model, args=training_args, data_collator=data_collator, train_dataset=train_dataset, eval_dataset=eval_dataset ) # 8) Train trainer.train() # 9) Save model & tokenizer trainer.save_model(output_dir) tokenizer.save_pretrained(output_dir) return trainer # --------------------------- # C) Gradio app function # --------------------------- def create_gradio_demo( model_name_or_path="finetuned_myr1", generation_config_path=None ): """ Loads a (fine-tuned) model from local or HF, sets up a text-generation pipeline, and returns a Gradio interface. """ # 1) Load config config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True) # 2) Load model & tokenizer tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_name_or_path, config=config, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto", trust_remote_code=True ) # 3) (Optional) load generation config if present # e.g. custom top_k, top_p, temperature, etc. # If your repo has "generation_config.json" in subfolder="myr1", # you could also do: # GenerationConfig.from_pretrained("wuhp/myr1", subfolder="myr1", ...) # Or from local path if downloaded. if generation_config_path: gen_config = GenerationConfig.from_json_file(generation_config_path) else: # fallback to default or config gen_config = GenerationConfig.from_model_config(config) # 4) Build a text-generation pipeline text_pipeline = pipeline( "text-generation", model=model, tokenizer=tokenizer, generation_config=gen_config, ) # 5) Define Gradio predict function def predict(prompt, max_new_tokens=64, temperature=0.7, top_p=0.95): """ Generates text from the model given a user prompt. """ outputs = text_pipeline( prompt, max_new_tokens=int(max_new_tokens), temperature=float(temperature), top_p=float(top_p) ) # The pipeline returns a list of dicts like [{'generated_text': '...'}] return outputs[0]["generated_text"] # 6) Create the Gradio Interface with gr.Blocks() as demo: gr.Markdown("## DeepSeek LLM Demo") prompt = gr.Textbox(label="Enter your prompt:") max_new_tokens = gr.Slider(1, 512, step=1, value=64, label="Max New Tokens") temperature = gr.Slider(0.0, 1.5, step=0.1, value=0.7, label="Temperature") top_p = gr.Slider(0.0, 1.0, step=0.05, value=0.95, label="Top-p") output = gr.Textbox(label="Generated Text") generate_btn = gr.Button("Generate") generate_btn.click( fn=predict, inputs=[prompt, max_new_tokens, temperature, top_p], outputs=output ) return demo # --------------------------- # D) Main: train + launch # --------------------------- if __name__ == "__main__": # 1) TRAIN (mock demonstration). # If you just want to *load* your existing model, skip this step. print("Starting mock training on wuhp/myr1 (subfolder myr1)...") trainer = train_model( model_name_or_path="wuhp/myr1", subfolder="myr1", output_dir="finetuned_myr1", epochs=1 ) print("Training complete.") # 2) Build Gradio app from the newly saved model in 'finetuned_myr1' # If you want to load the original (un-finetuned) weights, just pass # model_name_or_path="wuhp/myr1" and subfolder="myr1" again. demo = create_gradio_demo( model_name_or_path="finetuned_myr1", generation_config_path=None # or "finetuned_myr1/generation_config.json" ) # 3) Launch print("Launching Gradio demo on http://127.0.0.1:7860 ...") demo.launch()