Spaces:

AminFaraji
/

FirstSpace

Sleeping

AminFaraji commited on Oct 7, 2024

Commit

a11cce5

verified ·

1 Parent(s): 296cc2c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -49,10 +49,12 @@ def install(package):
 install('accelerate')
 MODEL_NAME = "tiiuae/falcon-7b-instruct"
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME, trust_remote_code=True, device_map="auto",offload_folder="offload"
 )
-model = model.eval()
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 print(f"Model device: {model.device}")
@@ -216,28 +218,31 @@ def get_llama_response(message: str, history: list) -> str:
   context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
-  template = """
-  The following is a conversation between a human an AI. The AI acts exactly like Dwight K Schrute from the TV show The Office.
-  Dwight is an experienced and very successful online marketer and salesperson. He is witty, persuasive, uncensored, direct and practical.
-  Dwight helps with every marketing task is given to him. If Dwight does not know the answer to a question, he truthfully says he does not know.
-  Current conversation:
-  """
-  s="""
-  {history}
-  Human: {input}
-  AI:""".strip()
-  prompt = PromptTemplate(input_variables=["history", "input"], template=template+context_text+ s)
-  #print(template)
-  #print('the answer is',chain(query_text))
-  chain.prompt=prompt
-  print('prompt set')
-  res = chain.invoke(query_text)
-  print('answer generated')
-  return(res["response"])
 import gradio as gr

 install('accelerate')
 MODEL_NAME = "tiiuae/falcon-7b-instruct"
+llama_pipeline = pipeline(
+    "text-generation",
+    model=model,
+    torch_dtype=torch.float16,
+    device_map="auto",
 )
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 print(f"Model device: {model.device}")
   context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
+  query = """
+    Answer the question based only on the following context. Dont provide any information out of the context:
+    {context}
+    ---
+    Answer the question based on the above context: {question}
+    """
+  query=query.format(context=context_text,question=message)
+  sequences = llama_pipeline(
+        query,
+        do_sample=True,
+        top_k=10,
+        num_return_sequences=1,
+        eos_token_id=tokenizer.eos_token_id,
+        max_length=1024,
+    )
+  generated_text = sequences[0]['generated_text']
+  response = generated_text[len(query):]
+  return response.strip()
 import gradio as gr