Spaces:

Rulga
/

Doc-chat

Sleeping

App Files Files Community

Rulga commited on Mar 6

Commit

0e8391a

1 Parent(s): 437fe85

Refactor run script and update requirements for API integration

Browse files

Files changed (3) hide show

app.py +155 -215
requirements.txt +6 -11
run.sh +1 -4

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import os
 import time
-import streamlit as st
 from dotenv import load_dotenv
 from langchain_groq import ChatGroq
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
@@ -12,255 +13,194 @@ from langchain_core.output_parsers import StrOutputParser
 from datetime import datetime
 import json
 import traceback
 # Initialize environment variables
 load_dotenv()
-# --------------- Session State Initialization ---------------
-def init_session_state():
-    """Initialize all required session state variables"""
-    defaults = {
-        'kb_info': {
-            'build_time': None,
-            'size': None,
-            'version': '1.1'
-        },
-        'messages': [],
-        'vector_store': None,
-        'models_initialized': False
-    }
-    for key, value in defaults.items():
-        if key not in st.session_state:
-            st.session_state[key] = value
-# --------------- Enhanced Logging ---------------
-def log_interaction(user_input: str, bot_response: str, context: str):
-    """Log interactions with error handling"""
-    try:
         log_entry = {
             "timestamp": datetime.now().isoformat(),
-            "user_input": user_input,
-            "bot_response": bot_response,
-            "context": context[:500],  # Store first 500 chars of context
-            "kb_version": st.session_state.kb_info['version']
         }
         os.makedirs("chat_history", exist_ok=True)
-        log_path = os.path.join("chat_history", "chat_logs.json")
-        with open(log_path, "a", encoding="utf-8") as f:
-            f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
-    except Exception as e:
-        st.error(f"Logging error: {str(e)}")
-        print(traceback.format_exc())
-# --------------- Model Initialization ---------------
-@st.cache_resource
 def init_models():
-    """Initialize AI models with caching"""
     try:
         llm = ChatGroq(
             model_name="llama-3.3-70b-versatile",
             temperature=0.6,
-            api_key=os.getenv("GROQ_API_KEY")
         )
         embeddings = HuggingFaceEmbeddings(
             model_name="intfloat/multilingual-e5-large-instruct"
         )
-        st.session_state.models_initialized = True
         return llm, embeddings
     except Exception as e:
-        st.error(f"Model initialization failed: {str(e)}")
-        st.stop()
-# --------------- Knowledge Base Management ---------------
-VECTOR_STORE_PATH = "vector_store"
-URLS = [
-    "https://status.law",
-    "https://status.law/about",
-    "https://status.law/careers",
-    "https://status.law/tariffs-for-services-of-protection-against-extradition",
-    "https://status.law/challenging-sanctions",
-    "https://status.law/law-firm-contact-legal-protection"
-    "https://status.law/cross-border-banking-legal-issues",
-    "https://status.law/extradition-defense",
-    "https://status.law/international-prosecution-protection",
-    "https://status.law/interpol-red-notice-removal",
-    "https://status.law/practice-areas",
-    "https://status.law/reputation-protection",
-    "https://status.law/faq"
-]
-def build_knowledge_base(_embeddings):
-    """Build or update the knowledge base"""
     try:
-        start_time = time.time()
         documents = []
-        with st.status("Building knowledge base..."):
-            # Создаем папку заранее
-            os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
-            # Загрузка документов
-            for url in URLS:
-                try:
-                    loader = WebBaseLoader(url)
-                    docs = loader.load()
-                    documents.extend(docs)
-                    st.write(f"✓ Loaded {url}")
-                except Exception as e:
-                    st.error(f"Failed to load {url}: {str(e)}")
-                    continue  # Продолжаем при ошибках загрузки
-            if not documents:
-                st.error("No documents loaded!")
-                return None
-            # Разделение на чанки
-            text_splitter = RecursiveCharacterTextSplitter(
-                chunk_size=500,
-                chunk_overlap=100
-            )
-            chunks = text_splitter.split_documents(documents)
-            # Явное сохранение
-            vector_store = FAISS.from_documents(chunks, _embeddings)
-            vector_store.save_local(
-                folder_path=VECTOR_STORE_PATH,
-                index_name="index"
-            )
-            # Проверка создания файлов
-            if not os.path.exists(os.path.join(VECTOR_STORE_PATH, "index.faiss")):
-                raise RuntimeError("FAISS index file not created!")
-            # Обновление информации
-            st.session_state.kb_info.update({
-                'build_time': time.time() - start_time,
-                'size': sum(
-                    os.path.getsize(os.path.join(VECTOR_STORE_PATH, f))
-                    for f in ["index.faiss", "index.pkl"]
-                ) / (1024 ** 2),
-                'version': datetime.now().strftime("%Y%m%d-%H%M%S")
-            })
-            st.success("Knowledge base successfully created!")
-            return vector_store
     except Exception as e:
-        st.error(f"Knowledge base creation failed: {str(e)}")
-        # Отладочная информация
-        st.write("Debug info:")
-        st.write(f"Documents loaded: {len(documents)}")
-        st.write(f"Chunks created: {len(chunks) if 'chunks' in locals() else 0}")
-        st.write(f"Vector store path exists: {os.path.exists(VECTOR_STORE_PATH)}")
-        st.stop()
-# --------------- Main Application ---------------
-def main():
-    # Initialize session state first
-    init_session_state()
-    # Page configuration
-    st.set_page_config(
-        page_title="Status Law Assistant",
-        page_icon="⚖️",
-        layout="wide"
-    )
-    # Display header
-    st.markdown('''
-        <h1 style="border-bottom: 2px solid #444; padding-bottom: 10px;">
-            ⚖️ <a href="https://status.law/" style="text-decoration: none; color: #2B5876;">Status.Law</a> Legal Assistant
-        </h1>
-    ''', unsafe_allow_html=True)
-    # Initialize models
-    llm, embeddings = init_models()
-    # Knowledge base initialization
-    if not os.path.exists(VECTOR_STORE_PATH):
-        st.warning("Knowledge base not initialized")
-        if st.button("Create Knowledge Base"):
-            st.session_state.vector_store = build_knowledge_base(embeddings)
-            st.rerun()
-        return
-    if not st.session_state.vector_store:
-        try:
-            st.session_state.vector_store = FAISS.load_local(
-                VECTOR_STORE_PATH,
-                embeddings,
-                allow_dangerous_deserialization=True
-            )
-        except Exception as e:
-            st.error(f"Failed to load knowledge base: {str(e)}")
-            st.stop()
-    # Chat interface
-    for message in st.session_state.messages:
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
-    if prompt := st.chat_input("Ask your legal question"):
-        # Add user message to chat history
-        st.session_state.messages.append({"role": "user", "content": prompt})
-        with st.chat_message("user"):
-            st.markdown(prompt)
         # Generate response
-        with st.chat_message("assistant"):
-            try:
-                # Retrieve context
-                context_docs = st.session_state.vector_store.similarity_search(prompt)
-                context_text = "\n".join([d.page_content for d in context_docs])
-                # Generate response
-                prompt_template = PromptTemplate.from_template('''
-                    You are a helpful and polite legal assistant at Status Law.
-                    You answer in the language in which the question was asked.
-                    Answer the question based on the context provided.
-                    If you cannot answer based on the context, say so politely and offer to contact Status Law directly via the following channels:
-                    - For all users: +32465594521 (landline phone).
-                    - For English and Swedish speakers only: +46728495129 (available on WhatsApp, Telegram, Signal, IMO).
-                    - Provide a link to the contact form: [Contact Form](https://status.law/law-firm-contact-legal-protection/).
-                    If the user has questions about specific services and their costs, suggest they visit the page https://status.law/tariffs-for-services-of-protection-against-extradition-and-international-prosecution/ for detailed information.
-                    Ask the user additional questions to understand which service to recommend and provide an estimated cost. For example, clarify their situation and needs to suggest the most appropriate options.
-                    Also, offer free consultations if they are available and suitable for the user's request.
-                    Answer professionally but in a friendly manner.
-                    Example:
-                    Q: How can I challenge the sanctions?
-                    A: To challenge the sanctions, you should consult with our legal team, who specialize in this area. Please contact us directly for detailed advice. You can fill out our contact form here: [Contact Form](https://status.law/law-firm-contact-legal-protection/).
-                    Context: {context}
-                    Question: {question}
-                    Response Guidelines:
-                    1. Answer in the user's language
-                    2. Cite sources when possible
-                    3. Offer contact options if unsure
-                    ''')
-                chain = prompt_template | llm | StrOutputParser()
-                response = chain.invoke({
-                    "context": context_text,
-                    "question": prompt
-                })
-                # Display and log
-                st.markdown(response)
-                log_interaction(prompt, response, context_text)
-                st.session_state.messages.append({"role": "assistant", "content": response})
-            except Exception as e:
-                error_msg = f"Error generating response: {str(e)}"
-                st.error(error_msg)
-                log_interaction(prompt, error_msg, "")
-                print(traceback.format_exc())
 if __name__ == "__main__":
-    main()

 import os
 import time
 from dotenv import load_dotenv
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
 from langchain_groq import ChatGroq
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from datetime import datetime
 import json
 import traceback
+from typing import Optional, List, Dict
+from langchain_core.tracers import ConsoleCallbackHandler
+from langchain_core.callbacks import CallbackManager
 # Initialize environment variables
 load_dotenv()
+# Initialize FastAPI app
+app = FastAPI(title="Status Law Assistant API")
+# Models for request/response
+class ChatRequest(BaseModel):
+    message: str
+class ChatResponse(BaseModel):
+    response: str
+    context: Optional[str] = None
+# Global variables
+VECTOR_STORE_PATH = "vector_store"
+URLS = [
+    "https://status.law",
+    "https://status.law/about",
+    "https://status.law/careers",
+    "https://status.law/tariffs-for-services-of-protection-against-extradition",
+    "https://status.law/challenging-sanctions",
+    "https://status.law/law-firm-contact-legal-protection"
+    "https://status.law/cross-border-banking-legal-issues",
+    "https://status.law/extradition-defense",
+    "https://status.law/international-prosecution-protection",
+    "https://status.law/interpol-red-notice-removal",
+    "https://status.law/practice-areas",
+    "https://status.law/reputation-protection",
+    "https://status.law/faq"
+]
+# Enhanced logging
+class CustomCallbackHandler(ConsoleCallbackHandler):
+    def on_chain_end(self, run):
         log_entry = {
             "timestamp": datetime.now().isoformat(),
+            "run_id": str(run.id),
+            "inputs": run.inputs,
+            "outputs": run.outputs,
+            "execution_time": run.end_time - run.start_time if run.end_time else None,
+            "metadata": run.metadata
         }
         os.makedirs("chat_history", exist_ok=True)
+        with open("chat_history/detailed_logs.json", "a", encoding="utf-8") as f:
+            json.dump(log_entry, f, ensure_ascii=False)
+            f.write("\n")
+# Initialize models
 def init_models():
     try:
+        callback_handler = CustomCallbackHandler()
+        callback_manager = CallbackManager([callback_handler])
         llm = ChatGroq(
             model_name="llama-3.3-70b-versatile",
             temperature=0.6,
+            api_key=os.getenv("GROQ_API_KEY"),
+            callback_manager=callback_manager
         )
         embeddings = HuggingFaceEmbeddings(
             model_name="intfloat/multilingual-e5-large-instruct"
         )
         return llm, embeddings
     except Exception as e:
+        raise Exception(f"Model initialization failed: {str(e)}")
+# Knowledge base management
+def build_knowledge_base(embeddings):
     try:
         documents = []
+        os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
+        for url in URLS:
+            try:
+                loader = WebBaseLoader(url)
+                docs = loader.load()
+                documents.extend(docs)
+            except Exception as e:
+                print(f"Failed to load {url}: {str(e)}")
+                continue
+        if not documents:
+            raise Exception("No documents loaded!")
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=500,
+            chunk_overlap=100
+        )
+        chunks = text_splitter.split_documents(documents)
+        vector_store = FAISS.from_documents(chunks, embeddings)
+        vector_store.save_local(folder_path=VECTOR_STORE_PATH, index_name="index")
+        return vector_store
     except Exception as e:
+        raise Exception(f"Knowledge base creation failed: {str(e)}")
+# Initialize models and knowledge base on startup
+llm, embeddings = init_models()
+vector_store = None
+if os.path.exists(VECTOR_STORE_PATH):
+    try:
+        vector_store = FAISS.load_local(
+            VECTOR_STORE_PATH,
+            embeddings,
+            allow_dangerous_deserialization=True
+        )
+    except Exception as e:
+        print(f"Failed to load existing knowledge base: {str(e)}")
+if vector_store is None:
+    vector_store = build_knowledge_base(embeddings)
+# API endpoints
+@app.post("/chat", response_model=ChatResponse)
+async def chat_endpoint(request: ChatRequest):
+    try:
+        # Retrieve context
+        context_docs = vector_store.similarity_search(request.message)
+        context_text = "\n".join([d.page_content for d in context_docs])
         # Generate response
+        prompt_template = PromptTemplate.from_template('''
+            You are a helpful and polite legal assistant at Status Law.
+            You answer in the language in which the question was asked.
+            Answer the question based on the context provided.
+            If you cannot answer based on the context, say so politely and offer to contact Status Law directly via the following channels:
+            - For all users: +32465594521 (landline phone).
+            - For English and Swedish speakers only: +46728495129 (available on WhatsApp, Telegram, Signal, IMO).
+            - Provide a link to the contact form: [Contact Form](https://status.law/law-firm-contact-legal-protection/).
+            Context: {context}
+            Question: {question}
+            Response Guidelines:
+            1. Answer in the user's language
+            2. Cite sources when possible
+            3. Offer contact options if unsure
+        ''')
+        chain = prompt_template | llm | StrOutputParser()
+        response = chain.invoke({
+            "context": context_text,
+            "question": request.message
+        })
+        # Log interaction
+        log_interaction(request.message, response, context_text)
+        return ChatResponse(response=response, context=context_text)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/rebuild-kb")
+async def rebuild_knowledge_base():
+    try:
+        global vector_store
+        vector_store = build_knowledge_base(embeddings)
+        return {"status": "success", "message": "Knowledge base rebuilt successfully"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+def log_interaction(user_input: str, bot_response: str, context: str):
+    try:
+        log_entry = {
+            "timestamp": datetime.now().isoformat(),
+            "user_input": user_input,
+            "bot_response": bot_response,
+            "context": context[:500],
+            "kb_version": "1.1"  # You might want to implement version tracking
+        }
+        os.makedirs("chat_history", exist_ok=True)
+        with open("chat_history/chat_logs.json", "a", encoding="utf-8") as f:
+            f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
+    except Exception as e:
+        print(f"Logging error: {str(e)}")
+        print(traceback.format_exc())
 if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements.txt CHANGED Viewed

@@ -1,23 +1,18 @@
-streamlit
 langchain-community
 langchain-core
 langchain-huggingface
 langchain-groq
 python-dotenv
-beautifulsoup4
 faiss-cpu
 requests
-langgraph
-langchain-anthropic
 fastapi
 uvicorn[standard]
 pydantic
-python-multipart
 pandas
-langchain
-plotly

+# Основные компоненты для работы с LLM и базой знаний
 langchain-community
 langchain-core
 langchain-huggingface
 langchain-groq
 python-dotenv
 faiss-cpu
 requests
+# Для API и логирования
 fastapi
 uvicorn[standard]
 pydantic
 pandas
+# Для LangChain логирования
+langgraph
+langchain-core[tracing]

run.sh CHANGED Viewed

@@ -1,5 +1,2 @@
 #!/bin/bash
-# Запуск Streamlit и FastAPI параллельно
-streamlit run app.py &          # Запуск чат-бота
-uvicorn api.main:app --reload   # Запуск API для анализа логов


1	#!/bin/bash
2	+ uvicorn app:app --host 0.0.0.0 --port 8000 --reload