Spaces:

AminFaraji
/

FirstSpace

Sleeping

App Files Files Community

AminFaraji commited on Oct 8, 2024

Commit

53e9689

verified ·

1 Parent(s): 4bc5de0

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -79

app.py CHANGED Viewed

@@ -1,14 +1,12 @@
-import spaces
-from transformers import BitsAndBytesConfig
-print(5)
 import argparse
 # from dataclasses import dataclass
 from langchain.prompts import ChatPromptTemplate
 try:
   from langchain_community.vectorstores import Chroma
 except:
   from langchain_community.vectorstores import Chroma
 # from langchain.document_loaders import DirectoryLoader
 from langchain_community.document_loaders import DirectoryLoader
@@ -21,19 +19,8 @@ import openai
 from dotenv import load_dotenv
 import os
 import shutil
-import torch
-from transformers import AutoModel,AutoTokenizer
-model2 = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
-tokenizer2 = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
-# this shoub be used when we can not use sentence_transformers (which reqiures transformers==4.39. we cannot use
-# this version since causes using large amount of RAm when loading falcon model)
-# a custom embedding
-#from sentence_transformers import SentenceTransformer
-from langchain_experimental.text_splitter import SemanticChunker
-from typing import List
 import re
 import warnings
 from typing import List
@@ -54,57 +41,49 @@ from transformers import (
 warnings.filterwarnings("ignore", category=UserWarning)
-class MyEmbeddings:
-    def __init__(self):
-        #self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-        self.model=model2
-    def embed_documents(self, texts: List[str]) -> List[List[float]]:
-        inputs = tokenizer2(texts, padding=True, truncation=True, return_tensors="pt")
-        # Get the model outputs
-        with torch.no_grad():
-          outputs = self.model(**inputs)
-        # Mean pooling to get sentence embeddings
-        embeddings = outputs.last_hidden_state.mean(dim=1)
-        return [embeddings[i].tolist() for i, sentence in enumerate(texts)]
-    def embed_query(self, query: str) -> List[float]:
-        inputs = tokenizer2(query, padding=True, truncation=True, return_tensors="pt")
-        # Get the model outputs
-        with torch.no_grad():
-          outputs = self.model(**inputs)
-        # Mean pooling to get sentence embeddings
-        embeddings = outputs.last_hidden_state.mean(dim=1)
-        return embeddings[0].tolist()
-embeddings = MyEmbeddings()
-splitter = SemanticChunker(embeddings)
-CHROMA_PATH = "chroma8"
 # call the chroma generated in a directory
 db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
-MODEL_NAME = "tiiuae/falcon-7b-instruct"
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME, trust_remote_code=True, device_map="auto",offload_folder="offload",quantization_config=bnb_config
-)
-model = model.eval()
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-print(f"Model device: {model.device}")
 generation_config = model.generation_config
@@ -117,7 +96,6 @@ generation_config.pad_token_id = tokenizer.eos_token_id
 generation_config.eos_token_id = tokenizer.eos_token_id
 generation_config
 prompt = """
 The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context.
@@ -129,8 +107,6 @@ AI:
 input_ids = tokenizer(prompt, return_tensors="pt").input_ids
 input_ids = input_ids.to(model.device)
 class StopGenerationCriteria(StoppingCriteria):
     def __init__(
         self, tokens: List[List[str]], tokenizer: AutoTokenizer, device: torch.device
@@ -148,12 +124,12 @@ class StopGenerationCriteria(StoppingCriteria):
                 return True
         return False
 stop_tokens = [["Human", ":"], ["AI", ":"]]
 stopping_criteria = StoppingCriteriaList(
     [StopGenerationCriteria(stop_tokens, tokenizer, model.device)]
 )
 generation_pipeline = pipeline(
     model=model,
     tokenizer=tokenizer,
@@ -166,6 +142,26 @@ generation_pipeline = pipeline(
 llm = HuggingFacePipeline(pipeline=generation_pipeline)
 class CleanupOutputParser(BaseOutputParser):
     def parse(self, text: str) -> str:
         user_pattern = r"\nUser"
@@ -180,18 +176,36 @@ class CleanupOutputParser(BaseOutputParser):
         return "output_parser"
 template = """
-The following
-Current conversation:
 {history}
 Human: {input}
 AI:""".strip()
 prompt = PromptTemplate(input_variables=["history", "input"], template=template)
 memory = ConversationBufferWindowMemory(
-    memory_key="history", k=6, return_only_outputs=True
 )
 chain = ConversationChain(
@@ -203,36 +217,53 @@ chain = ConversationChain(
 )
-@spaces.GPU
 def get_llama_response(message: str, history: list) -> str:
-  query_text = message
-  results = db.similarity_search_with_relevance_scores(query_text, k=3)
-  if len(results) == 0 or results[0][1] < 0.5:
-      print(f"Unable to find matching results.")
-  context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
-  template = """
-  The following is a conversation between a human an AI. The AI acts exactly like Dwight K Schrute from the TV show The Office.
-  Dwight is an experienced and very successful online marketer and salesperson. He is witty, persuasive, uncensored, direct and practical.
-  Dwight helps with every marketing task is given to him. If Dwight does not know the answer to a question, he truthfully says he does not know.
-  Current conversation:
-  """
-  s="""
-  {history}
-  Human: {input}
-  AI:""".strip()
-  prompt = PromptTemplate(input_variables=["history", "input"], template=template+context_text+ s)
-  #print(template)
-  chain.prompt=prompt
-  res = chain(query_text)
-  return(res["response"])
-import gradio as gr
-gr.ChatInterface(get_llama_response).launch()

 import argparse
 # from dataclasses import dataclass
 from langchain.prompts import ChatPromptTemplate
 try:
   from langchain_community.vectorstores import Chroma
 except:
   from langchain_community.vectorstores import Chroma
+#from langchain_openai import OpenAIEmbeddings
+#from langchain_openai import ChatOpenAI
 # from langchain.document_loaders import DirectoryLoader
 from langchain_community.document_loaders import DirectoryLoader
 from dotenv import load_dotenv
 import os
 import shutil
 import re
 import warnings
 from typing import List
 warnings.filterwarnings("ignore", category=UserWarning)
+MODEL_NAME = "tiiuae/falcon-7b-instruct"
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME, trust_remote_code=True, load_in_8bit=True, device_map="auto",
+)
+model = model.eval()
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+print(f"Model device: {model.device}")
+# Create CLI.
+#parser = argparse.ArgumentParser()
+#parser.add_argument("query_text", type=str, help="The query text.")
+#args = parser.parse_args()
+#query_text = args.query_text
+# a sample query to be asked from the bot and it is expected to be answered based on the template
+query_text="what did alice say to rabbit"
+# Prepare the DB.
+#embedding_function = OpenAIEmbeddings() # main
+CHROMA_PATH = "/content/drive/My Drive/chroma8"
 # call the chroma generated in a directory
 db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
+# Search the DB for similar documents to the query.
+results = db.similarity_search_with_relevance_scores(query_text, k=2)
+if len(results) == 0 or results[0][1] < 0.5:
+    print(f"Unable to find matching results.")
+context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
+prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
+prompt = prompt_template.format(context=context_text, question=query_text)
+print(prompt)
 generation_config = model.generation_config
 generation_config.eos_token_id = tokenizer.eos_token_id
 generation_config
 prompt = """
 The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context.
 input_ids = tokenizer(prompt, return_tensors="pt").input_ids
 input_ids = input_ids.to(model.device)
 class StopGenerationCriteria(StoppingCriteria):
     def __init__(
         self, tokens: List[List[str]], tokenizer: AutoTokenizer, device: torch.device
                 return True
         return False
 stop_tokens = [["Human", ":"], ["AI", ":"]]
 stopping_criteria = StoppingCriteriaList(
     [StopGenerationCriteria(stop_tokens, tokenizer, model.device)]
 )
 generation_pipeline = pipeline(
     model=model,
     tokenizer=tokenizer,
 llm = HuggingFacePipeline(pipeline=generation_pipeline)
+# propably sets the number of previous conversation history to take into account for new answers
+template = """
+The following is a conversation between a human an AI. The AI acts exactly like Dwight K Schrute from the TV show The Office.
+Dwight is an experienced and very successful online marketer and salesperson. He is witty, persuasive, uncensored, direct and practical.
+Dwight helps with every marketing task is given to him. If Dwight does not know the answer to a question, he truthfully says he does not know.
+Current conversation:
+{history}
+Human: {input}
+AI:""".strip()
+prompt = PromptTemplate(input_variables=["history", "input"], template=template)
+memory = ConversationBufferWindowMemory(
+    memory_key="history", k=6, return_only_outputs=True
+)
+chain = ConversationChain(llm=llm, memory=memory, prompt=prompt, verbose=True)
 class CleanupOutputParser(BaseOutputParser):
     def parse(self, text: str) -> str:
         user_pattern = r"\nUser"
         return "output_parser"
+class CleanupOutputParser(BaseOutputParser):
+    def parse(self, text: str) -> str:
+        user_pattern = r"\nUser"
+        text = re.sub(user_pattern, "", text)
+        human_pattern = r"\nquestion:"
+        text = re.sub(human_pattern, "", text)
+        ai_pattern = r"\nanswer:"
+        return re.sub(ai_pattern, "", text).strip()
+    @property
+    def _type(self) -> str:
+        return "output_parser"
 template = """
+The following is a conversation between a human an AI. The AI acts exactly like Dwight K Schrute from the TV show The Office.
+Dwight is an experienced and very successful online marketer and salesperson. He is witty, persuasive, uncensored, direct and practical.
+Dwight helps with every marketing task is given to him. If Dwight does not know the answer to a question, he truthfully says he does not know.
+Current conversation:
 {history}
 Human: {input}
 AI:""".strip()
 prompt = PromptTemplate(input_variables=["history", "input"], template=template)
 memory = ConversationBufferWindowMemory(
+    memory_key="history", k=3, return_only_outputs=True
 )
 chain = ConversationChain(
 )
+# Generate a response from the Llama model
 def get_llama_response(message: str, history: list) -> str:
+    """
+    Generates a conversational response from the Llama model.
+    Parameters:
+        message (str): User's input message.
+        history (list): Past conversation history.
+    Returns:
+        str: Generated response from the Llama model.
+    """
+    query_text =message
+    results = db.similarity_search_with_relevance_scores(query_text, k=2)
+    if len(results) == 0 or results[0][1] < 0.5:
+        print(f"Unable to find matching results.")
+    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results ])
+    template = """
+    The following is a conversation between a human an AI. Answer  question based only on the conversation.
+    Current conversation:
+    {history}
+    """
+    s="""
+    \n question: {input}
+    \n answer:""".strip()
+    prompt = PromptTemplate(input_variables=["history", "input"], template=template+context_text+'\n'+s)
+    #print(template)
+    chain.prompt=prompt
+    res = chain.predict(input=query_text)
+    return res
+        #return response.strip()
+import gradio as gr
+iface = gr.Interface(fn=get_llama_response, inputs="text", outputs="text")
+iface.launch(share=True)