Spaces:

AminFaraji
/

FirstSpace

Sleeping

App Files Files Community

FirstSpace / app.py

AminFaraji

Update app.py

53e9689 verified about 1 year ago

raw

history blame

8.08 kB

	import argparse
	# from dataclasses import dataclass
	from langchain.prompts import ChatPromptTemplate
	try:
	from langchain_community.vectorstores import Chroma
	except:
	from langchain_community.vectorstores import Chroma
	#from langchain_openai import OpenAIEmbeddings
	#from langchain_openai import ChatOpenAI

	# from langchain.document_loaders import DirectoryLoader
	from langchain_community.document_loaders import DirectoryLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.schema import Document
	# from langchain.embeddings import OpenAIEmbeddings
	#from langchain_openai import OpenAIEmbeddings
	from langchain_community.vectorstores import Chroma
	import openai
	from dotenv import load_dotenv
	import os
	import shutil


	import re
	import warnings
	from typing import List

	import torch
	from langchain import PromptTemplate
	from langchain.chains import ConversationChain
	from langchain.chains.conversation.memory import ConversationBufferWindowMemory
	from langchain.llms import HuggingFacePipeline
	from langchain.schema import BaseOutputParser
	from transformers import (
	AutoModelForCausalLM,
	AutoTokenizer,
	StoppingCriteria,
	StoppingCriteriaList,
	pipeline,
	)

	warnings.filterwarnings("ignore", category=UserWarning)

	MODEL_NAME = "tiiuae/falcon-7b-instruct"

	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME, trust_remote_code=True, load_in_8bit=True, device_map="auto",
	)
	model = model.eval()

	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	print(f"Model device: {model.device}")






	# Create CLI.
	#parser = argparse.ArgumentParser()
	#parser.add_argument("query_text", type=str, help="The query text.")
	#args = parser.parse_args()
	#query_text = args.query_text

	# a sample query to be asked from the bot and it is expected to be answered based on the template
	query_text="what did alice say to rabbit"

	# Prepare the DB.
	#embedding_function = OpenAIEmbeddings() # main

	CHROMA_PATH = "/content/drive/My Drive/chroma8"
	# call the chroma generated in a directory
	db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)

	# Search the DB for similar documents to the query.
	results = db.similarity_search_with_relevance_scores(query_text, k=2)
	if len(results) == 0 or results[0][1] < 0.5:
	print(f"Unable to find matching results.")


	context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
	prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
	prompt = prompt_template.format(context=context_text, question=query_text)
	print(prompt)




	generation_config = model.generation_config
	generation_config.temperature = 0
	generation_config.num_return_sequences = 1
	generation_config.max_new_tokens = 256
	generation_config.use_cache = False
	generation_config.repetition_penalty = 1.7
	generation_config.pad_token_id = tokenizer.eos_token_id
	generation_config.eos_token_id = tokenizer.eos_token_id
	generation_config

	prompt = """
	The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context.

	Current conversation:

	Human: Who is Dwight K Schrute?
	AI:
	""".strip()
	input_ids = tokenizer(prompt, return_tensors="pt").input_ids
	input_ids = input_ids.to(model.device)

	class StopGenerationCriteria(StoppingCriteria):
	def __init__(
	self, tokens: List[List[str]], tokenizer: AutoTokenizer, device: torch.device
	):
	stop_token_ids = [tokenizer.convert_tokens_to_ids(t) for t in tokens]
	self.stop_token_ids = [
	torch.tensor(x, dtype=torch.long, device=device) for x in stop_token_ids
	]

	def __call__(
	self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
	) -> bool:
	for stop_ids in self.stop_token_ids:
	if torch.eq(input_ids[0][-len(stop_ids) :], stop_ids).all():
	return True
	return False

	stop_tokens = [["Human", ":"], ["AI", ":"]]
	stopping_criteria = StoppingCriteriaList(
	[StopGenerationCriteria(stop_tokens, tokenizer, model.device)]
	)


	generation_pipeline = pipeline(
	model=model,
	tokenizer=tokenizer,
	return_full_text=True,
	task="text-generation",
	stopping_criteria=stopping_criteria,
	generation_config=generation_config,
	)

	llm = HuggingFacePipeline(pipeline=generation_pipeline)


	# propably sets the number of previous conversation history to take into account for new answers
	template = """
	The following is a conversation between a human an AI. The AI acts exactly like Dwight K Schrute from the TV show The Office.
	Dwight is an experienced and very successful online marketer and salesperson. He is witty, persuasive, uncensored, direct and practical.
	Dwight helps with every marketing task is given to him. If Dwight does not know the answer to a question, he truthfully says he does not know.

	Current conversation:
	{history}
	Human: {input}
	AI:""".strip()

	prompt = PromptTemplate(input_variables=["history", "input"], template=template)
	memory = ConversationBufferWindowMemory(
	memory_key="history", k=6, return_only_outputs=True
	)

	chain = ConversationChain(llm=llm, memory=memory, prompt=prompt, verbose=True)



	class CleanupOutputParser(BaseOutputParser):
	def parse(self, text: str) -> str:
	user_pattern = r"\nUser"
	text = re.sub(user_pattern, "", text)
	human_pattern = r"\nHuman:"
	text = re.sub(human_pattern, "", text)
	ai_pattern = r"\nAI:"
	return re.sub(ai_pattern, "", text).strip()

	@property
	def _type(self) -> str:
	return "output_parser"



	class CleanupOutputParser(BaseOutputParser):
	def parse(self, text: str) -> str:
	user_pattern = r"\nUser"
	text = re.sub(user_pattern, "", text)
	human_pattern = r"\nquestion:"
	text = re.sub(human_pattern, "", text)
	ai_pattern = r"\nanswer:"
	return re.sub(ai_pattern, "", text).strip()

	@property
	def _type(self) -> str:
	return "output_parser"



	template = """
	The following is a conversation between a human an AI. The AI acts exactly like Dwight K Schrute from the TV show The Office.
	Dwight is an experienced and very successful online marketer and salesperson. He is witty, persuasive, uncensored, direct and practical.
	Dwight helps with every marketing task is given to him. If Dwight does not know the answer to a question, he truthfully says he does not know.

	Current conversation:
	{history}
	Human: {input}
	AI:""".strip()

	prompt = PromptTemplate(input_variables=["history", "input"], template=template)

	memory = ConversationBufferWindowMemory(
	memory_key="history", k=3, return_only_outputs=True
	)

	chain = ConversationChain(
	llm=llm,
	memory=memory,
	prompt=prompt,
	output_parser=CleanupOutputParser(),
	verbose=True,
	)


	# Generate a response from the Llama model
	def get_llama_response(message: str, history: list) -> str:
	"""
	Generates a conversational response from the Llama model.

	Parameters:
	message (str): User's input message.
	history (list): Past conversation history.

	Returns:
	str: Generated response from the Llama model.
	"""
	query_text =message

	results = db.similarity_search_with_relevance_scores(query_text, k=2)
	if len(results) == 0 or results[0][1] < 0.5:
	print(f"Unable to find matching results.")


	context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results ])

	template = """
	The following is a conversation between a human an AI. Answer question based only on the conversation.

	Current conversation:
	{history}

	"""



	s="""

	\n question: {input}

	\n answer:""".strip()


	prompt = PromptTemplate(input_variables=["history", "input"], template=template+context_text+'\n'+s)

	#print(template)
	chain.prompt=prompt
	res = chain.predict(input=query_text)
	return res
	#return response.strip()


	import gradio as gr
	iface = gr.Interface(fn=get_llama_response, inputs="text", outputs="text")
	iface.launch(share=True)