Spaces:

Mahadih534
/

Open-Source_LLM_ChatBot

Runtime error

App Files Files Community

Mahadih534 commited on Jan 17, 2024

Commit

330d308

verified ·

1 Parent(s): a3847ed

Create app.py

Browse files

Files changed (1) hide show

app.py +135 -0

app.py ADDED Viewed

	@@ -0,0 +1,135 @@

+from huggingface_hub import hf_hub_download
+import logging
+import sys
+import torch
+from torch import cuda
+import gradio as gr
+from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
+from llama_index.llms import LlamaCPP
+from llama_index.llms.llama_utils import (
+    messages_to_prompt,
+    completion_to_prompt,
+)
+MODELS_PATH = "./models"
+mistral_model_path = hf_hub_download(
+    repo_id= "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
+    filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf",
+    resume_download=True,
+    cache_dir=MODELS_PATH,)
+"""Step 3 : if you use GPU then make sure ( n_gpu_layers":1) at least 1, you can increase or decrease it based on your GPU performance"""
+llm = LlamaCPP(
+    # You can pass in the URL to a GGML model to download it automatically
+    # model_url=model_url,
+    # optionally, you can set the path to a pre-downloaded model instead of model_url
+    model_path=mistral_model_path,
+    temperature=0.1,
+    max_new_tokens=256,
+    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
+    context_window=3900,
+    # kwargs to pass to __call__()
+    generate_kwargs={},
+    # kwargs to pass to __init__()
+    # set to at least 1 to use GPU
+    model_kwargs={"n_gpu_layers": -1},
+    # transform inputs into Llama2 format
+    messages_to_prompt=messages_to_prompt,
+    completion_to_prompt=completion_to_prompt,
+    verbose=True,
+)
+def model_initialization(model):
+    if(model !=""):
+       gr.Info("model downloading and configuration process has been started, please wait...")
+    MODELS_PATH = "./models"
+    repo_id=""
+    filename=""
+    if(model=="Llama-2-13B-chat"):
+      repo_id="TheBloke/Llama-2-13B-chat-GGUF"
+      filename="llama-2-13b-chat.Q4_K_M.gguf"
+    elif(model=="Mistral-7B-Instruct-v0.2") :
+      repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
+      filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+    elif(model=="zephyr-7B-beta"):
+      repo_id="TheBloke/zephyr-7B-beta-GGUF "
+      filename="zephyr-7b-beta.Q4_K_M.gguf"
+    elif(model=="vicuna-7B-v1.5"):
+      repo_id="TheBloke/vicuna-7B-v1.5-GGUF"
+      filename="vicuna-7b-v1.5.Q4_K_M.gguf"
+    elif(model=="Falcon-7B-Instruct"):
+      repo_id="TheBloke/Falcon-7B-Instruct-GGML"
+      filename="falcon-7b-instruct.ggccv1.q4_1.bin"
+    elif(model=="CodeLlama-7B"):
+      repo_id="TheBloke/CodeLlama-7B-GGUF"
+      filename="codellama-7b.Q4_K_M.gguf"
+    else:
+      gr.Warning("please select at least one model")
+    mistral_model_path = hf_hub_download(
+    repo_id= repo_id,
+    filename= filename,
+    resume_download=True,
+    cache_dir=MODELS_PATH,)
+    llm = LlamaCPP(
+    # You can pass in the URL to a GGML model to download it automatically
+    # model_url=model_url,
+    # optionally, you can set the path to a pre-downloaded model instead of model_url
+    model_path=mistral_model_path,
+    temperature=0.1,
+    max_new_tokens=256,
+    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
+    context_window=3900,
+    # kwargs to pass to __call__()
+    generate_kwargs={},
+    # set to at least 1 to use GPU
+    model_kwargs={"n_gpu_layers": -1},
+    # transform inputs into Llama2 format
+    messages_to_prompt=messages_to_prompt,
+    completion_to_prompt=completion_to_prompt,
+    verbose=True,
+)
+    gr.Info("model has been configured and ready to chat")
+    return "model has been configured and ready to chat, your current model is "+model
+def predict(message, history):
+    messages = []
+    answer = []
+    response = llm.stream_complete(message)
+    for bot_response in response:
+        token = bot_response.delta
+        answer.append(token)
+        final_answer = " ".join(answer)
+        yield final_answer
+with gr.Blocks() as UI:
+         models=gr.Dropdown(["CodeLlama-7B","Llama-2-13B-chat","Falcon-7B-Instruct" "Mistral-7B-Instruct-v0.2", "zephyr-7B-beta",
+                       "vicuna-7B-v1.5"],value=["CodeLlama-7B","Llama-2-13B-chat","Falcon-7B-Instruct" "Mistral-7B-Instruct-v0.2", "zephyr-7B-beta",
+                       "vicuna-7B-v1.5"], label="please select at least one model", info="default model is Mistral-7B-Instruct-v0.2")
+         textInfo = gr.Textbox(value="current model is Mistral-7B-Instruct-v0.2",label="Model Status");
+          # Chatbot interface
+         chatUI= gr.ChatInterface(
+                            predict,
+                            title="Open Source LLM ChatBot",
+                            description="Ask any question",
+                            theme="soft",
+                            examples=["Hello", "are you LLM model?", "how can i finetune a pre-trained LLM model?","How can i build a chatbot using local open-souce LLM ?"],
+                            cache_examples=False,
+                            submit_btn="Send Message",
+                            retry_btn=None,
+                            undo_btn="Delete Previous",
+                            clear_btn="Clear",
+                        )
+         models.change(fn=model_initialization,inputs=[models],outputs=[textInfo])
+if __name__ == "__main__":
+    UI.launch(debug=True) # launch app