Spaces:

Danielrahmai1991
/

findemov3

Sleeping

App Files Files Community

findemov3 / app.py

Danielrahmai1991

Update app.py

4eeec68 verified 10 months ago

raw

history blame

2.91 kB

	# import gradio as gr

	# from langchain_community.llms import LlamaCpp
	# from langchain.prompts import PromptTemplate
	# from langchain.chains import LLMChain
	# from langchain_core.callbacks import StreamingStdOutCallbackHandler
	# from langchain.retrievers import TFIDFRetriever
	# from langchain.chains import RetrievalQA
	# from langchain.memory import ConversationBufferMemory
	# from langchain_community.chat_models import ChatLlamaCpp


	# callbacks = [StreamingStdOutCallbackHandler()]
	# print("creating ll started")
	# llm = ChatLlamaCpp(
	# model_path="finbro-v0.1.0-llama-3-8B-instruct-1m.gguf",
	# n_batch=8,
	# temperature=0.85,
	# max_tokens=256,
	# top_p=0.95,
	# top_k = 10,
	# callback_manager=callbacks,
	# n_ctx=2048,
	# verbose=True, # Verbose is required to pass to the callback manager
	# )
	# print("creating llm ended")






	# def greet(question, model_type):
	# print(f"question is {question}")
	# out_gen = "testsetestestetsetsets"
	# return out_gen

	# demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown(
	# ["With memory", "Without memory"], label="Memory status", info="With using memory, the output will be slow but strong"
	# ),], outputs="text")
	# demo.launch(debug=True, share=True)


	import gradio as gr

	from langchain_community.llms import LlamaCpp
	from langchain.prompts import PromptTemplate
	from langchain.chains import LLMChain
	from langchain_core.callbacks import StreamingStdOutCallbackHandler
	from langchain.retrievers import TFIDFRetriever
	from langchain.chains import RetrievalQA
	from langchain.memory import ConversationBufferMemory
	from langchain_community.chat_models import ChatLlamaCpp

	callbacks = [StreamingStdOutCallbackHandler()]
	print("creating ll started")
	M_NAME = "finbro-v0.1.0-llama-3-8B-instruct-1m.gguf"
	llm = ChatLlamaCpp(
	model_path=M_NAME,
	n_batch=8,
	temperature=0.85,
	max_tokens=256,
	top_p=0.95,
	top_k = 10,
	callback_manager=callbacks,
	n_ctx=2048,
	verbose=True, # Verbose is required to pass to the callback manager
	)
	# print("creating ll ended")






	def greet(question, model_type):
	print("prompt started ")
	print(f"question is {question}")
	template = """You are the Finiantial expert:

	### Instruction:
	{question}

	### Input:


	### Response:
	"""
	print("test1")
	prompt = PromptTemplate(template=template, input_variables=["question"])
	print("test2")
	llm_chain_model = LLMChain(prompt=prompt, llm=llm)
	print("test3")
	out_gen = llm_chain_model.run(question)
	print("test4")
	print(f"out is: {out_gen}")
	return out_gen

	demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown(
	["Without memory", "With memory"], label="Memory status", info="With using memory, the output will be slow but strong"
	),], outputs="text")
	demo.launch(debug=True, share=True)