Spaces:

BroBro87
/

CloudChat

Sleeping

App Files Files Community

BroBro87 commited on Jan 26, 2024

Commit

fa83d85

verified ·

1 Parent(s): f44efda

Create app.py

Browse files

Files changed (1) hide show

app.py +56 -0

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# this is the pdf
+#https://docs.google.com/document/d/1hY5ItC8Mewyk-90Q--CGr50wBbZBjPrkYu4NtiBVre4/edit?usp=sharing
+#Inference takes 6-7 mins per query
+import logging
+import sys
+import gradio as gr
+from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
+from llama_index.llms import LlamaCPP
+from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
+from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+# Set up logging
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
+def configure_llama_model():
+    model_url = 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf'
+    llm = LlamaCPP(
+        model_url=model_url,
+        temperature=0.3,
+        max_new_tokens=256,
+        context_window=3900,
+        model_kwargs={"n_gpu_layers": -1},
+        messages_to_prompt=messages_to_prompt,
+        completion_to_prompt=completion_to_prompt,
+        verbose=True,
+    )
+    return llm
+def configure_embeddings():
+    embed_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+    return embed_model
+def configure_service_context(llm, embed_model):
+    return ServiceContext.from_defaults(chunk_size=250, llm=llm, embed_model=embed_model)
+def initialize_vector_store_index(data_path, service_context):
+    documents = SimpleDirectoryReader("./").load_data()
+    index = VectorStoreIndex.from_documents(documents, service_context=service_context)
+    return index
+# Configure and initialize components
+llm = configure_llama_model()
+embed_model = configure_embeddings()
+service_context = configure_service_context(llm, embed_model)
+index = initialize_vector_store_index("./", service_context)
+query_engine = index.as_query_engine()
+# Define a function for Gradio to use
+def get_response(text, username):
+    # For simplicity, we are only using the 'text' argument
+    response = str(query_engine.query(text))
+    return response
+gr.ChatInterface(get_response).launch(debug=True)