Spaces:

joelelangovan
/

tamil-llama-genesis-demo

Runtime error

File size: 2,911 Bytes

8e27d5b
b69fcd8
 
8ff118b
 
b69fcd8
5e6b78a
b69fcd8
5e6b78a
b69fcd8
5e6b78a
 
 
b69fcd8
 
 
 
 
 
 
 
 
 
 
 
8ff118b
b69fcd8
8ff118b
8e27d5b
b69fcd8
 
 
8ff118b
 
 
 
 
 
 
 
 
 
 
 
8e27d5b
8ff118b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e27d5b
8ff118b
 
b69fcd8
8ff118b
 
8e27d5b
 
8ff118b

import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel, PeftConfig
import torch

# Base model and adapter paths
base_model_name = "abhinand/tamil-llama-7b-instruct-v0.1"
adapter_name = "joelelangovan/tamil-llama-genesis-finetuned"

# Load base tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
tokenizer.pad_token = tokenizer.eos_token

# Setup quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

# Load and apply LoRA adapter
model = PeftModel.from_pretrained(base_model, adapter_name)

def generate_response(instruction, temperature=0.7, max_length=512):
    # Format the input text
    input_text = f"### Instruction: {instruction}\n\n### Response:"
    
    # Tokenize
    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
    
    # Generate
    outputs = model.generate(
        **inputs,
        max_length=max_length,
        num_return_sequences=1,
        temperature=temperature,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )
    
    # Decode and return response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Remove the instruction part from response
    response = response.split("### Response:")[-1].strip()
    return response

# Example prompts
example_prompts = [
    ["ஆதியாகமம் 1:1 வசனத்தின் பொருளை விளக்குங்கள்"],
    ["ஆதியாகமம் 1:2 வசனத்தை தமிழில் விவரிக்கவும்"],
    ["ஆதியாகமம் 1:3 வசனத்தின் முக்கிய கருத்து என்ன?"]
]

# Create Gradio interface
demo = gr.Interface(
    fn=generate_response,
    inputs=[
        gr.Textbox(label="கேள்வி / வினா", placeholder="உங்கள் கேள்வியை இங்கே உள்ளிடவும்..."),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature"),
        gr.Slider(minimum=64, maximum=1024, value=512, step=64, label="Max Length"),
    ],
    outputs=gr.Textbox(label="பதில்"),
    title="Tamil LLaMA - ஆதியாகமம் விளக்க உதவி",
    description="ஆதியாகமம் முதல் அதிகாரம் பற்றிய கேள்விகளுக்கு விளக்கம் அளிக்கும் AI மாதிரி",
    examples=example_prompts,
    allow_flagging="never",
)

# Launch the demo
demo.launch()