Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import torch | |
| from transformers import BitsAndBytesConfig | |
| # Import llama-index and langchain modules | |
| from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, PromptTemplate | |
| from llama_index.llms.huggingface import HuggingFaceLLM | |
| from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI | |
| from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from llama_index.embeddings.langchain import LangchainEmbedding | |
| # --------------------------- | |
| # Configure your LLM and embeddings | |
| # --------------------------- | |
| system_prompt = """ | |
| You are a Q&A assistant. Your goal is to answer questions as | |
| accurately as possible based on the instructions and context provided. | |
| """ | |
| query_wrapper_prompt = PromptTemplate("<|USER|>{query_str}<|ASSISTANT|>") | |
| # Configure BitsAndBytes for quantization | |
| quantization_config = BitsAndBytesConfig( | |
| load_in_8bit=True, | |
| bnb_4bit_compute_dtype=torch.float16 | |
| ) | |
| # Initialize the HuggingFaceLLM with your model settings | |
| llm = HuggingFaceLLM( | |
| context_window=4096, | |
| max_new_tokens=256, | |
| generate_kwargs={"temperature": 0.0, "do_sample": False}, | |
| system_prompt=system_prompt, | |
| query_wrapper_prompt=query_wrapper_prompt, | |
| tokenizer_name="meta-llama/Llama-2-7b-chat-hf", | |
| model_name="meta-llama/Llama-2-7b-chat-hf", | |
| device_map="auto", | |
| model_kwargs={ | |
| "torch_dtype": torch.float16, | |
| "quantization_config": quantization_config | |
| } | |
| ) | |
| # Set up the embedding model using Langchain's HuggingFaceEmbeddings | |
| lc_embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") | |
| embed_model = LangchainEmbedding(lc_embed_model) | |
| # Apply global settings for llama-index | |
| Settings.llm = llm | |
| Settings.embed_model = embed_model | |
| Settings.chunk_size = 1024 | |
| # --------------------------- | |
| # Load documents from repository | |
| # --------------------------- | |
| # The "data" folder should be part of your repository with your documents. | |
| DATA_DIR = "data" # Ensure this folder exists and contains your documents. | |
| try: | |
| documents = SimpleDirectoryReader(DATA_DIR).load_data() | |
| except Exception as e: | |
| st.error(f"Error loading documents from '{DATA_DIR}': {e}") | |
| documents = [] | |
| if not documents: | |
| st.warning("No documents found in the data folder. Please add your documents and redeploy.") | |
| else: | |
| # Create the vector store index | |
| index = VectorStoreIndex.from_documents(documents) | |
| query_engine = index.as_query_engine() | |
| # --------------------------- | |
| # Streamlit Interface | |
| # --------------------------- | |
| st.title("LLama Index Q&A Assistant") | |
| user_query = st.text_input("Enter your question:") | |
| if user_query: | |
| with st.spinner("Querying..."): | |
| response = query_engine.query(user_query) | |
| st.markdown("### Response:") | |
| st.write(response) | |