Spaces:

SivaResearch
/

re

Paused

App Files Files Community

SivaResearch commited on Jan 29, 2024

Commit

5808915

verified ·

1 Parent(s): fa7bdde

Create app.py

Browse files

Files changed (1) hide show

app.py +87 -0

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM,
+import gradio as gr
+model = "ai4bharat/Airavata"
+tokenizer = AutoTokenizer.from_pretrained(model, padding_side="left")
+# tokenizer.pad_token = tokenizer.eos_token
+# model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)
+llama_pipeline = pipeline(
+    "text-generation",
+    model=model,
+    torch_dtype=torch.float16,
+    device_map="auto",
+)
+SYSTEM_PROMPT = """<s>[INST] <<SYS>>
+You are a helpful bot. Your answers are clear and concise.
+<</SYS>>
+"""
+# Formatting function for message and history
+def format_message(message: str, history: list, memory_limit: int = 3) -> str:
+    """
+    Formats the message and history for the Llama model.
+    Parameters:
+        message (str): Current message to send.
+        history (list): Past conversation history.
+        memory_limit (int): Limit on how many past interactions to consider.
+    Returns:
+        str: Formatted message string
+    """
+    # always keep len(history) <= memory_limit
+    if len(history) > memory_limit:
+        history = history[-memory_limit:]
+    if len(history) == 0:
+        return SYSTEM_PROMPT + f"{message} [/INST]"
+    formatted_message = SYSTEM_PROMPT + f"{history[0][0]} [/INST] {history[0][1]} </s>"
+    # Handle conversation history
+    for user_msg, model_answer in history[1:]:
+        formatted_message += f"<s>[INST] {user_msg} [/INST] {model_answer} </s>"
+    # Handle the current message
+    formatted_message += f"<s>[INST] {message} [/INST]"
+    return formatted_message
+# Generate a response from the Llama model
+def get_llama_response(message: str, history: list) -> str:
+    """
+    Generates a conversational response from the Llama model.
+    Parameters:
+        message (str): User's input message.
+        history (list): Past conversation history.
+    Returns:
+        str: Generated response from the Llama model.
+    """
+    query = format_message(message, history)
+    response = ""
+    sequences = llama_pipeline(
+        query,
+        do_sample=True,
+        top_k=10,
+        num_return_sequences=1,
+        eos_token_id=tokenizer.eos_token_id,
+        max_length=1024,
+    )
+    generated_text = sequences[0]['generated_text']
+    response = generated_text[len(query):]  # Remove the prompt from the output
+    print("Chatbot:", response.strip())
+    return response.strip()
+gr.ChatInterface(get_llama_response).launch()