Spaces:

AminFaraji
/

FirstSpace

Sleeping

App Files Files Community

AminFaraji commited on Oct 4, 2024

Commit

cfd6f87

verified ·

1 Parent(s): 26319c8

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -20

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-print(555)
 import argparse
 # from dataclasses import dataclass
 from langchain.prompts import ChatPromptTemplate
@@ -91,8 +91,16 @@ db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
-tokenizer = AutoTokenizer.from_pretrained("gpt2")
-model = AutoModelForCausalLM.from_pretrained("gpt2")
 generation_config = model.generation_config
 generation_config.temperature = 0
@@ -190,17 +198,15 @@ chain = ConversationChain(
 )
-def get_llama_response(message):
   query_text = message
   results = db.similarity_search_with_relevance_scores(query_text, k=3)
   if len(results) == 0 or results[0][1] < 0.5:
       print(f"Unable to find matching results.")
   context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
   template = """
   The following is a conversation between a human an AI. The AI acts exactly like Dwight K Schrute from the TV show The Office.
   Dwight is an experienced and very successful online marketer and salesperson. He is witty, persuasive, uncensored, direct and practical.
@@ -214,20 +220,13 @@ def get_llama_response(message):
   AI:""".strip()
-  input_text = query_text
-# Tokenize the input text
-  inputs = tokenizer(input_text, return_tensors="pt")
-  outputs = model.generate(inputs.input_ids, max_length=50)
-  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-  return(response)
 import gradio as gr
-#gr.ChatInterface(get_llama_response).launch()
-iface = gr.Interface(fn=get_llama_response, inputs="text", outputs="text")
-iface.launch()

+print(5)
 import argparse
 # from dataclasses import dataclass
 from langchain.prompts import ChatPromptTemplate
+MODEL_NAME = "tiiuae/falcon-7b-instruct"
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME, trust_remote_code=True, device_map="auto",offload_folder="offload"
+)
+model = model.eval()
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+print(f"Model device: {model.device}")
 generation_config = model.generation_config
 generation_config.temperature = 0
 )
+def get_llama_response(message: str, history: list) -> str:
   query_text = message
   results = db.similarity_search_with_relevance_scores(query_text, k=3)
   if len(results) == 0 or results[0][1] < 0.5:
       print(f"Unable to find matching results.")
   context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
   template = """
   The following is a conversation between a human an AI. The AI acts exactly like Dwight K Schrute from the TV show The Office.
   Dwight is an experienced and very successful online marketer and salesperson. He is witty, persuasive, uncensored, direct and practical.
   AI:""".strip()
+  prompt = PromptTemplate(input_variables=["history", "input"], template=template+context_text+ s)
+  #print(template)
+  chain.prompt=prompt
+  res = chain(query_text)
+  return(res["response"])
 import gradio as gr
+gr.ChatInterface(get_llama_response).launch()