Spaces:

kingabzpro
/

Real-Time-RAG

Runtime error

Real-Time-RAG / app.py

Abid Ali Awan

working on the live version

dfabd41 9 months ago

2.98 kB

	import os
	import gradio as gr
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnablePassthrough
	from langchain_groq import ChatGroq
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_chroma import Chroma
	from langchain_core.prompts import PromptTemplate

	# Load the API key from environment variables
	groq_api_key = os.getenv("Groq_API_Key")

	# Initialize the language model with the specified model and API key
	llm = ChatGroq(model="llama-3.1-70b-versatile", api_key=groq_api_key)

	# Initialize the embedding model
	embed_model = HuggingFaceEmbeddings(
	model_name="mixedbread-ai/mxbai-embed-large-v1", model_kwargs={"device": "cpu"}
	)

	# Load the vector store from a local directory
	vectorstore = Chroma(
	"Starwars_Vectordb",
	embedding_function=embed_model,
	)

	# Convert the vector store to a retriever
	retriever = vectorstore.as_retriever()

	# Define the prompt template for the language model
	template = """You are a Star Wars assistant for answering questions.
	Use the provided context to answer the question.
	If you don't know the answer, say so. Explain your answer in detail.
	Do not discuss the context in your response; just provide the answer directly.

	Context: {context}

	Question: {question}

	Answer:"""

	rag_prompt = PromptTemplate.from_template(template)

	# Create the RAG (Retrieval-Augmented Generation) chain
	rag_chain = (
	{"context": retriever, "question": RunnablePassthrough()}
	\| rag_prompt
	\| llm
	\| StrOutputParser()
	)

	# Global variable to store the last input text
	last_input_text = ""


	# Define the function to stream the RAG memory
	def rag_memory_stream(text):
	global last_input_text
	partial_text = ""
	last_input_text = text # Set the initial text

	for new_text in rag_chain.stream(text):
	# Check if the text has changed
	if text != last_input_text:
	# If input has changed, break the loop to stop generation
	break
	partial_text += new_text
	# Yield the updated conversation history
	yield partial_text

	# Update last_input_text after processing
	last_input_text = text


	# Function to update the last input text
	def update_input(text):
	global last_input_text
	last_input_text = text


	# Set up the Gradio interface
	title = "Real-time AI App with Groq API and LangChain"
	description = """
	<center>
	<img src="https://huggingface.co/spaces/kingabzpro/Real-Time-RAG/resolve/main/Images/cover.png" alt="logo" width="550"/>
	</center>
	"""

	demo = gr.Interface(
	title=title,
	description=description,
	fn=rag_memory_stream,
	inputs="text",
	outputs="text",
	live=True,
	batch=True,
	max_batch_size=10000,
	concurrency_limit=12,
	allow_flagging="never",
	theme=gr.themes.Soft(),
	)

	# Register the input update function
	demo.input_event(update_input)

	# Launch the Gradio interface
	demo.queue()
	demo.launch()