Spaces:

SivaResearch
/

re

Paused

App Files Files Community

SivaResearch commited on Jan 29, 2024

Commit

4ca0de5

verified ·

1 Parent(s): c7eca26

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -30

app.py CHANGED Viewed

@@ -24,43 +24,62 @@ SYSTEM_PROMPT = """<s>[INST] <<SYS>>
 आपका प्रमुख लक्ष्य है यह है कि आप कृषि क्षेत्र में उपयुक्त ज्ञान प्रदान करें। आपके ज्ञान का धन्यवाद।
 <</SYS>>
 """
-# Formatting function for message and history
-def format_message(message: str, history: list, memory_limit: int = 3) -> str:
-    if len(history) > memory_limit:
-        history = history[-memory_limit:]
-    if len(history) == 0:
-        return SYSTEM_PROMPT + f"{message} [/INST]"
-    formatted_message = SYSTEM_PROMPT + f"{history[0][0]} [/INST] {history[0][1]} </s>"
-    for user_msg, model_answer in history[1:]:
-        formatted_message += f"<s>[INST] {user_msg} [/INST] {model_answer} </s>"
-    formatted_message += f"<s>[INST] {message} [/INST]"
-    return formatted_message
-def inference(input_prompts, model, tokenizer):
-    input_prompts = [
-        tokenizer.encode(input_prompt, return_tensors="pt", max_length=1024, truncation=True)
-        for input_prompt in input_prompts
-    ]
-    with torch.inference_mode():
-        outputs = model.generate(input_prompts[0], do_sample=True, top_k=10, max_length=1024)
-    output_texts = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return output_texts
-def get_llama_response(message: str, history: list) -> str:
-    query = format_message(message, history)
-    response = inference([query], model, tokenizer)
-    print("Chatbot:", response.strip())
-    return response.strip()
-gr.ChatInterface(get_llama_response).launch()

 आपका प्रमुख लक्ष्य है यह है कि आप कृषि क्षेत्र में उपयुक्त ज्ञान प्रदान करें। आपके ज्ञान का धन्यवाद।
 <</SYS>>
 """
+device = "cuda" if torch.cuda.is_available() else "cpu"
+def create_prompt_with_chat_format(messages, bos="<s>", eos="</s>", add_bos=True, system_prompt="System: "):
+    formatted_text = ""
+    for message in messages:
+        if message["role"] == "system":
+            formatted_text += system_prompt + message["content"] + "\n"
+        elif message["role"] == "user":
+            formatted_text += "\n" + message["content"] + "\n"
+        elif message["role"] == "assistant":
+            formatted_text += "\n" + message["content"].strip() + eos + "\n"
+        else:
+            raise ValueError(
+                "Chat template only supports 'system', 'user', and 'assistant' roles. Invalid role: {}.".format(
+                    message["role"]
+                )
+            )
+    formatted_text += "\n"
+    formatted_text = bos + formatted_text if add_bos else formatted_text
+    return formatted_text
+def inference(input_prompts, model, tokenizer, system_prompt="System: "):
+    output_texts = []
+    for input_prompt in input_prompts:
+        formatted_query = create_prompt_with_chat_format([{"role": "user", "content": input_prompt}], add_bos=False, system_prompt=system_prompt)
+        encodings = tokenizer(formatted_query, padding=True, return_tensors="pt")
+        encodings = encodings.to(device)
+        with torch.no_grad():
+            outputs = model.generate(encodings.input_ids, do_sample=False, max_length=250)
+        output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        output_texts.append(output_text[len(input_prompt):])
+    return output_texts
+examples = [
+    ["मुझे अपने करियर के बारे में सुझाव दो", "मैं कैसे अध्ययन कर सकता हूँ?"],
+    ["कृपया मुझे एक कहानी सुनाएं", "ताजमहल के बारे में कुछ बताएं"],
+    ["मेरा नाम क्या है?", "आपका पसंदीदा फिल्म कौन सी है?"],
+]
+def get_llama_response(message: str, history: list, system_prompt=SYSTEM_PROMPT) -> str:
+    formatted_history = [{"role": "user", "content": hist} for hist in history]
+    formatted_message = {"role": "user", "content": message}
+    formatted_query = create_prompt_with_chat_format(formatted_history + [formatted_message], add_bos=False, system_prompt=system_prompt)
+    response = inference([formatted_query], model, tokenizer)
+    print("Chatbot:", response[0].strip())
+    return response[0].strip()
+gr.ChatInterface(fn=get_llama_response, inputs=["text", "text", "text"], outputs="text").launch()