Spaces:

Ais203
/

aigen

Sleeping

File size: 1,964 Bytes

48b2ebf
0f91c7c
 
 
 
48b2ebf
0f91c7c
 
 
48b2ebf
0f91c7c
 
 
392dd49
48b2ebf
0f91c7c
 
 
 
 
 
 
 
 
 
 
 
392dd49
48b2ebf
 
0f91c7c
 
 
 
 
 
 
 
48b2ebf
0f91c7c
 
 
 
48b2ebf
 
0f91c7c
48b2ebf
0f91c7c

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
from peft import PeftModel
import json
import os

# Load tokenizer and base model
base_model = "Qwen/Qwen2-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    device_map="cuda" if torch.cuda.is_available() else "cpu",
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    trust_remote_code=True
)

# Clean up adapter_config.json before loading adapter
adapter_config_path = "./adapter/adapter_config.json"
if os.path.exists(adapter_config_path):
    with open(adapter_config_path, "r") as f:
        adapter_config = json.load(f)
    for key in ["corda_config", "eva_config", "megatron_config"]:
        adapter_config.pop(key, None)
    with open(adapter_config_path, "w") as f:
        json.dump(adapter_config, f)

# Load adapter
model = PeftModel.from_pretrained(model, "./adapter", is_trainable=False)
model.eval()

# Simple chat function
def chat(prompt):
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512,
        do_sample=True,
        temperature=0.7,
        streamer=streamer
    )
    output = tokenizer.decode(generated_ids[0][model_inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
    return output

# Example
if __name__ == "__main__":
    while True:
        prompt = input("User: ")
        if prompt.lower() in ["exit", "quit"]:
            break
        print("AI:", chat(prompt))