myr1-2

Sleeping

App Files Files Community

myr1-2 / app.py

wuhp

Update app.py

eccd8f6 verified 5 months ago

raw

history blame

7.64 kB

	import os
	import torch
	from torch.utils.data import Dataset
	from transformers import (
	AutoConfig,
	AutoTokenizer,
	AutoModelForCausalLM,
	Trainer,
	TrainingArguments,
	GenerationConfig,
	pipeline
	)
	import gradio as gr


	# ---------------------------
	# A) Dummy training dataset
	# ---------------------------
	class MyTextDataset(Dataset):
	"""
	Very simple dataset example. In reality:
	- Use real text data,
	- Possibly use HF 'datasets' library,
	- Tokenize in chunks, etc.
	"""
	def __init__(self, tokenizer, texts, block_size=128):
	self.examples = []
	for txt in texts:
	# Tokenize each text
	tokens = tokenizer(txt, truncation=True, max_length=block_size)
	self.examples.append(tokens["input_ids"])

	def __len__(self):
	return len(self.examples)

	def __getitem__(self, idx):
	return torch.tensor(self.examples[idx], dtype=torch.long)


	# ---------------------------
	# B) Training routine
	# ---------------------------
	def train_model(
	model_name_or_path="wuhp/myr1",
	subfolder="myr1",
	output_dir="finetuned_myr1",
	epochs=1
	):
	"""
	Demonstrates how to load your custom model from HF, and run a
	quick 'Trainer' to finetune it on some mock texts.

	- model_name_or_path: huggingface repo ID (or local folder).
	- subfolder: if your model config/weights live in a subfolder
	within that repo, specify it here.
	- output_dir: where to save final trained model.
	- epochs: how many epochs for this mock training example.
	"""

	# 1) Load config (trust_remote_code=True so we can import custom .py from your repo)
	config = AutoConfig.from_pretrained(
	model_name_or_path,
	subfolder=subfolder,
	trust_remote_code=True
	)

	# 2) Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(
	model_name_or_path,
	subfolder=subfolder,
	trust_remote_code=True
	)

	# 3) Load model
	# AutoModelForCausalLM will detect your custom architecture from modeling_deepseek.py
	model = AutoModelForCausalLM.from_pretrained(
	model_name_or_path,
	subfolder=subfolder,
	config=config,
	torch_dtype=torch.float16, # or "auto", or float32
	device_map="auto", # If you have enough GPU memory, or "cpu"
	trust_remote_code=True
	)

	# 4) Create a tiny training dataset
	train_texts = [
	"Hello from DeepSeek!",
	"The sky is blue.",
	"Large language models can do amazing things."
	]
	eval_texts = [
	"Testing is essential for robust code.",
	"Generative AI is fun."
	]
	train_dataset = MyTextDataset(tokenizer, train_texts)
	eval_dataset = MyTextDataset(tokenizer, eval_texts)

	# 5) Trainer hyperparams
	training_args = TrainingArguments(
	output_dir=output_dir,
	overwrite_output_dir=True,
	num_train_epochs=epochs,
	per_device_train_batch_size=1,
	per_device_eval_batch_size=1,
	evaluation_strategy="epoch",
	save_strategy="epoch",
	logging_steps=1,
	gradient_accumulation_steps=1,
	fp16=True if torch.cuda.is_available() else False,
	# If you have limited VRAM and can't do FP16, set fp16=False above
	)

	# 6) Define data collator for causal LM. Typically:
	from transformers import DataCollatorForLanguageModeling
	data_collator = DataCollatorForLanguageModeling(
	tokenizer=tokenizer, mlm=False
	)

	# 7) Build trainer
	trainer = Trainer(
	model=model,
	args=training_args,
	data_collator=data_collator,
	train_dataset=train_dataset,
	eval_dataset=eval_dataset
	)

	# 8) Train
	trainer.train()

	# 9) Save model & tokenizer
	trainer.save_model(output_dir)
	tokenizer.save_pretrained(output_dir)

	return trainer


	# ---------------------------
	# C) Gradio app function
	# ---------------------------
	def create_gradio_demo(
	model_name_or_path="finetuned_myr1",
	generation_config_path=None
	):
	"""
	Loads a (fine-tuned) model from local or HF, sets up
	a text-generation pipeline, and returns a Gradio interface.
	"""

	# 1) Load config
	config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)

	# 2) Load model & tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	model_name_or_path,
	config=config,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto",
	trust_remote_code=True
	)

	# 3) (Optional) load generation config if present
	# e.g. custom top_k, top_p, temperature, etc.
	# If your repo has "generation_config.json" in subfolder="myr1",
	# you could also do:
	# GenerationConfig.from_pretrained("wuhp/myr1", subfolder="myr1", ...)
	# Or from local path if downloaded.
	if generation_config_path:
	gen_config = GenerationConfig.from_json_file(generation_config_path)
	else:
	# fallback to default or config
	gen_config = GenerationConfig.from_model_config(config)

	# 4) Build a text-generation pipeline
	text_pipeline = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	generation_config=gen_config,
	)

	# 5) Define Gradio predict function
	def predict(prompt, max_new_tokens=64, temperature=0.7, top_p=0.95):
	"""
	Generates text from the model given a user prompt.
	"""
	outputs = text_pipeline(
	prompt,
	max_new_tokens=int(max_new_tokens),
	temperature=float(temperature),
	top_p=float(top_p)
	)
	# The pipeline returns a list of dicts like [{'generated_text': '...'}]
	return outputs[0]["generated_text"]

	# 6) Create the Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("## DeepSeek LLM Demo")
	prompt = gr.Textbox(label="Enter your prompt:")
	max_new_tokens = gr.Slider(1, 512, step=1, value=64, label="Max New Tokens")
	temperature = gr.Slider(0.0, 1.5, step=0.1, value=0.7, label="Temperature")
	top_p = gr.Slider(0.0, 1.0, step=0.05, value=0.95, label="Top-p")
	output = gr.Textbox(label="Generated Text")

	generate_btn = gr.Button("Generate")
	generate_btn.click(
	fn=predict,
	inputs=[prompt, max_new_tokens, temperature, top_p],
	outputs=output
	)
	return demo


	# ---------------------------
	# D) Main: train + launch
	# ---------------------------
	if __name__ == "__main__":
	# 1) TRAIN (mock demonstration).
	# If you just want to load your existing model, skip this step.
	print("Starting mock training on wuhp/myr1 (subfolder myr1)...")
	trainer = train_model(
	model_name_or_path="wuhp/myr1",
	subfolder="myr1",
	output_dir="finetuned_myr1",
	epochs=1
	)
	print("Training complete.")

	# 2) Build Gradio app from the newly saved model in 'finetuned_myr1'
	# If you want to load the original (un-finetuned) weights, just pass
	# model_name_or_path="wuhp/myr1" and subfolder="myr1" again.
	demo = create_gradio_demo(
	model_name_or_path="finetuned_myr1",
	generation_config_path=None # or "finetuned_myr1/generation_config.json"
	)

	# 3) Launch
	print("Launching Gradio demo on http://127.0.0.1:7860 ...")
	demo.launch()