Spaces:

Ais203
/

aigen

Sleeping

aigen / app /main.py

Ais

Update app/main.py

0f91c7c verified 24 days ago

1.96 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
	from peft import PeftModel
	import json
	import os

	# Load tokenizer and base model
	base_model = "Qwen/Qwen2-0.5B-Instruct"
	tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	base_model,
	device_map="cuda" if torch.cuda.is_available() else "cpu",
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	trust_remote_code=True
	)

	# Clean up adapter_config.json before loading adapter
	adapter_config_path = "./adapter/adapter_config.json"
	if os.path.exists(adapter_config_path):
	with open(adapter_config_path, "r") as f:
	adapter_config = json.load(f)
	for key in ["corda_config", "eva_config", "megatron_config"]:
	adapter_config.pop(key, None)
	with open(adapter_config_path, "w") as f:
	json.dump(adapter_config, f)

	# Load adapter
	model = PeftModel.from_pretrained(model, "./adapter", is_trainable=False)
	model.eval()

	# Simple chat function
	def chat(prompt):
	messages = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt}
	]
	text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

	streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
	generated_ids = model.generate(
	**model_inputs,
	max_new_tokens=512,
	do_sample=True,
	temperature=0.7,
	streamer=streamer
	)
	output = tokenizer.decode(generated_ids[0][model_inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
	return output

	# Example
	if __name__ == "__main__":
	while True:
	prompt = input("User: ")
	if prompt.lower() in ["exit", "quit"]:
	break
	print("AI:", chat(prompt))