Rulga commited on
Commit
0e8391a
·
1 Parent(s): 437fe85

Refactor run script and update requirements for API integration

Browse files
Files changed (3) hide show
  1. app.py +155 -215
  2. requirements.txt +6 -11
  3. run.sh +1 -4
app.py CHANGED
@@ -1,7 +1,8 @@
1
  import os
2
  import time
3
- import streamlit as st
4
  from dotenv import load_dotenv
 
 
5
  from langchain_groq import ChatGroq
6
  from langchain_huggingface import HuggingFaceEmbeddings
7
  from langchain_community.vectorstores import FAISS
@@ -12,255 +13,194 @@ from langchain_core.output_parsers import StrOutputParser
12
  from datetime import datetime
13
  import json
14
  import traceback
 
 
 
15
 
16
  # Initialize environment variables
17
  load_dotenv()
18
 
19
- # --------------- Session State Initialization ---------------
20
- def init_session_state():
21
- """Initialize all required session state variables"""
22
- defaults = {
23
- 'kb_info': {
24
- 'build_time': None,
25
- 'size': None,
26
- 'version': '1.1'
27
- },
28
- 'messages': [],
29
- 'vector_store': None,
30
- 'models_initialized': False
31
- }
32
-
33
- for key, value in defaults.items():
34
- if key not in st.session_state:
35
- st.session_state[key] = value
36
 
37
- # --------------- Enhanced Logging ---------------
38
- def log_interaction(user_input: str, bot_response: str, context: str):
39
- """Log interactions with error handling"""
40
- try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  log_entry = {
42
  "timestamp": datetime.now().isoformat(),
43
- "user_input": user_input,
44
- "bot_response": bot_response,
45
- "context": context[:500], # Store first 500 chars of context
46
- "kb_version": st.session_state.kb_info['version']
 
47
  }
48
 
49
  os.makedirs("chat_history", exist_ok=True)
50
- log_path = os.path.join("chat_history", "chat_logs.json")
51
-
52
- with open(log_path, "a", encoding="utf-8") as f:
53
- f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
54
-
55
- except Exception as e:
56
- st.error(f"Logging error: {str(e)}")
57
- print(traceback.format_exc())
58
 
59
- # --------------- Model Initialization ---------------
60
- @st.cache_resource
61
  def init_models():
62
- """Initialize AI models with caching"""
63
  try:
 
 
 
64
  llm = ChatGroq(
65
  model_name="llama-3.3-70b-versatile",
66
  temperature=0.6,
67
- api_key=os.getenv("GROQ_API_KEY")
 
68
  )
69
  embeddings = HuggingFaceEmbeddings(
70
  model_name="intfloat/multilingual-e5-large-instruct"
71
  )
72
- st.session_state.models_initialized = True
73
  return llm, embeddings
74
  except Exception as e:
75
- st.error(f"Model initialization failed: {str(e)}")
76
- st.stop()
77
-
78
- # --------------- Knowledge Base Management ---------------
79
- VECTOR_STORE_PATH = "vector_store"
80
- URLS = [
81
- "https://status.law",
82
- "https://status.law/about",
83
- "https://status.law/careers",
84
- "https://status.law/tariffs-for-services-of-protection-against-extradition",
85
- "https://status.law/challenging-sanctions",
86
- "https://status.law/law-firm-contact-legal-protection"
87
- "https://status.law/cross-border-banking-legal-issues",
88
- "https://status.law/extradition-defense",
89
- "https://status.law/international-prosecution-protection",
90
- "https://status.law/interpol-red-notice-removal",
91
- "https://status.law/practice-areas",
92
- "https://status.law/reputation-protection",
93
- "https://status.law/faq"
94
- ]
95
 
96
- def build_knowledge_base(_embeddings):
97
- """Build or update the knowledge base"""
98
  try:
99
- start_time = time.time()
100
  documents = []
 
101
 
102
- with st.status("Building knowledge base..."):
103
- # Создаем папку заранее
104
- os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
105
-
106
- # Загрузка документов
107
- for url in URLS:
108
- try:
109
- loader = WebBaseLoader(url)
110
- docs = loader.load()
111
- documents.extend(docs)
112
- st.write(f"✓ Loaded {url}")
113
- except Exception as e:
114
- st.error(f"Failed to load {url}: {str(e)}")
115
- continue # Продолжаем при ошибках загрузки
116
 
117
- if not documents:
118
- st.error("No documents loaded!")
119
- return None
120
 
121
- # Разделение на чанки
122
- text_splitter = RecursiveCharacterTextSplitter(
123
- chunk_size=500,
124
- chunk_overlap=100
125
- )
126
- chunks = text_splitter.split_documents(documents)
127
-
128
- # Явное сохранение
129
- vector_store = FAISS.from_documents(chunks, _embeddings)
130
- vector_store.save_local(
131
- folder_path=VECTOR_STORE_PATH,
132
- index_name="index"
133
- )
134
-
135
- # Проверка создания файлов
136
- if not os.path.exists(os.path.join(VECTOR_STORE_PATH, "index.faiss")):
137
- raise RuntimeError("FAISS index file not created!")
138
-
139
- # Обновление информации
140
- st.session_state.kb_info.update({
141
- 'build_time': time.time() - start_time,
142
- 'size': sum(
143
- os.path.getsize(os.path.join(VECTOR_STORE_PATH, f))
144
- for f in ["index.faiss", "index.pkl"]
145
- ) / (1024 ** 2),
146
- 'version': datetime.now().strftime("%Y%m%d-%H%M%S")
147
- })
148
-
149
- st.success("Knowledge base successfully created!")
150
- return vector_store
151
-
152
  except Exception as e:
153
- st.error(f"Knowledge base creation failed: {str(e)}")
154
- # Отладочная информация
155
- st.write("Debug info:")
156
- st.write(f"Documents loaded: {len(documents)}")
157
- st.write(f"Chunks created: {len(chunks) if 'chunks' in locals() else 0}")
158
- st.write(f"Vector store path exists: {os.path.exists(VECTOR_STORE_PATH)}")
159
- st.stop()
160
- # --------------- Main Application ---------------
161
- def main():
162
- # Initialize session state first
163
- init_session_state()
164
-
165
- # Page configuration
166
- st.set_page_config(
167
- page_title="Status Law Assistant",
168
- page_icon="⚖️",
169
- layout="wide"
170
- )
171
-
172
- # Display header
173
- st.markdown('''
174
- <h1 style="border-bottom: 2px solid #444; padding-bottom: 10px;">
175
- ⚖️ <a href="https://status.law/" style="text-decoration: none; color: #2B5876;">Status.Law</a> Legal Assistant
176
- </h1>
177
- ''', unsafe_allow_html=True)
178
 
179
- # Initialize models
180
- llm, embeddings = init_models()
181
-
182
- # Knowledge base initialization
183
- if not os.path.exists(VECTOR_STORE_PATH):
184
- st.warning("Knowledge base not initialized")
185
- if st.button("Create Knowledge Base"):
186
- st.session_state.vector_store = build_knowledge_base(embeddings)
187
- st.rerun()
188
- return
189
-
190
- if not st.session_state.vector_store:
191
- try:
192
- st.session_state.vector_store = FAISS.load_local(
193
- VECTOR_STORE_PATH,
194
- embeddings,
195
- allow_dangerous_deserialization=True
196
- )
197
- except Exception as e:
198
- st.error(f"Failed to load knowledge base: {str(e)}")
199
- st.stop()
200
 
201
- # Chat interface
202
- for message in st.session_state.messages:
203
- with st.chat_message(message["role"]):
204
- st.markdown(message["content"])
 
 
 
 
 
205
 
206
- if prompt := st.chat_input("Ask your legal question"):
207
- # Add user message to chat history
208
- st.session_state.messages.append({"role": "user", "content": prompt})
209
- with st.chat_message("user"):
210
- st.markdown(prompt)
211
 
 
 
 
 
 
 
 
 
212
  # Generate response
213
- with st.chat_message("assistant"):
214
- try:
215
- # Retrieve context
216
- context_docs = st.session_state.vector_store.similarity_search(prompt)
217
- context_text = "\n".join([d.page_content for d in context_docs])
218
-
219
- # Generate response
220
- prompt_template = PromptTemplate.from_template('''
221
- You are a helpful and polite legal assistant at Status Law.
222
- You answer in the language in which the question was asked.
223
- Answer the question based on the context provided.
224
- If you cannot answer based on the context, say so politely and offer to contact Status Law directly via the following channels:
225
- - For all users: +32465594521 (landline phone).
226
- - For English and Swedish speakers only: +46728495129 (available on WhatsApp, Telegram, Signal, IMO).
227
- - Provide a link to the contact form: [Contact Form](https://status.law/law-firm-contact-legal-protection/).
228
- If the user has questions about specific services and their costs, suggest they visit the page https://status.law/tariffs-for-services-of-protection-against-extradition-and-international-prosecution/ for detailed information.
229
-
230
- Ask the user additional questions to understand which service to recommend and provide an estimated cost. For example, clarify their situation and needs to suggest the most appropriate options.
231
-
232
- Also, offer free consultations if they are available and suitable for the user's request.
233
- Answer professionally but in a friendly manner.
 
 
 
 
 
 
 
 
 
 
234
 
235
- Example:
236
- Q: How can I challenge the sanctions?
237
- A: To challenge the sanctions, you should consult with our legal team, who specialize in this area. Please contact us directly for detailed advice. You can fill out our contact form here: [Contact Form](https://status.law/law-firm-contact-legal-protection/).
 
 
 
 
 
238
 
239
- Context: {context}
240
- Question: {question}
241
-
242
- Response Guidelines:
243
- 1. Answer in the user's language
244
- 2. Cite sources when possible
245
- 3. Offer contact options if unsure
246
- ''')
247
-
248
- chain = prompt_template | llm | StrOutputParser()
249
- response = chain.invoke({
250
- "context": context_text,
251
- "question": prompt
252
- })
253
-
254
- # Display and log
255
- st.markdown(response)
256
- log_interaction(prompt, response, context_text)
257
- st.session_state.messages.append({"role": "assistant", "content": response})
258
-
259
- except Exception as e:
260
- error_msg = f"Error generating response: {str(e)}"
261
- st.error(error_msg)
262
- log_interaction(prompt, error_msg, "")
263
- print(traceback.format_exc())
264
 
265
  if __name__ == "__main__":
266
- main()
 
 
1
  import os
2
  import time
 
3
  from dotenv import load_dotenv
4
+ from fastapi import FastAPI, HTTPException
5
+ from pydantic import BaseModel
6
  from langchain_groq import ChatGroq
7
  from langchain_huggingface import HuggingFaceEmbeddings
8
  from langchain_community.vectorstores import FAISS
 
13
  from datetime import datetime
14
  import json
15
  import traceback
16
+ from typing import Optional, List, Dict
17
+ from langchain_core.tracers import ConsoleCallbackHandler
18
+ from langchain_core.callbacks import CallbackManager
19
 
20
  # Initialize environment variables
21
  load_dotenv()
22
 
23
+ # Initialize FastAPI app
24
+ app = FastAPI(title="Status Law Assistant API")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ # Models for request/response
27
+ class ChatRequest(BaseModel):
28
+ message: str
29
+
30
+ class ChatResponse(BaseModel):
31
+ response: str
32
+ context: Optional[str] = None
33
+
34
+ # Global variables
35
+ VECTOR_STORE_PATH = "vector_store"
36
+ URLS = [
37
+ "https://status.law",
38
+ "https://status.law/about",
39
+ "https://status.law/careers",
40
+ "https://status.law/tariffs-for-services-of-protection-against-extradition",
41
+ "https://status.law/challenging-sanctions",
42
+ "https://status.law/law-firm-contact-legal-protection"
43
+ "https://status.law/cross-border-banking-legal-issues",
44
+ "https://status.law/extradition-defense",
45
+ "https://status.law/international-prosecution-protection",
46
+ "https://status.law/interpol-red-notice-removal",
47
+ "https://status.law/practice-areas",
48
+ "https://status.law/reputation-protection",
49
+ "https://status.law/faq"
50
+ ]
51
+
52
+ # Enhanced logging
53
+ class CustomCallbackHandler(ConsoleCallbackHandler):
54
+ def on_chain_end(self, run):
55
  log_entry = {
56
  "timestamp": datetime.now().isoformat(),
57
+ "run_id": str(run.id),
58
+ "inputs": run.inputs,
59
+ "outputs": run.outputs,
60
+ "execution_time": run.end_time - run.start_time if run.end_time else None,
61
+ "metadata": run.metadata
62
  }
63
 
64
  os.makedirs("chat_history", exist_ok=True)
65
+ with open("chat_history/detailed_logs.json", "a", encoding="utf-8") as f:
66
+ json.dump(log_entry, f, ensure_ascii=False)
67
+ f.write("\n")
 
 
 
 
 
68
 
69
+ # Initialize models
 
70
  def init_models():
 
71
  try:
72
+ callback_handler = CustomCallbackHandler()
73
+ callback_manager = CallbackManager([callback_handler])
74
+
75
  llm = ChatGroq(
76
  model_name="llama-3.3-70b-versatile",
77
  temperature=0.6,
78
+ api_key=os.getenv("GROQ_API_KEY"),
79
+ callback_manager=callback_manager
80
  )
81
  embeddings = HuggingFaceEmbeddings(
82
  model_name="intfloat/multilingual-e5-large-instruct"
83
  )
 
84
  return llm, embeddings
85
  except Exception as e:
86
+ raise Exception(f"Model initialization failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ # Knowledge base management
89
+ def build_knowledge_base(embeddings):
90
  try:
 
91
  documents = []
92
+ os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
93
 
94
+ for url in URLS:
95
+ try:
96
+ loader = WebBaseLoader(url)
97
+ docs = loader.load()
98
+ documents.extend(docs)
99
+ except Exception as e:
100
+ print(f"Failed to load {url}: {str(e)}")
101
+ continue
 
 
 
 
 
 
102
 
103
+ if not documents:
104
+ raise Exception("No documents loaded!")
 
105
 
106
+ text_splitter = RecursiveCharacterTextSplitter(
107
+ chunk_size=500,
108
+ chunk_overlap=100
109
+ )
110
+ chunks = text_splitter.split_documents(documents)
111
+
112
+ vector_store = FAISS.from_documents(chunks, embeddings)
113
+ vector_store.save_local(folder_path=VECTOR_STORE_PATH, index_name="index")
114
+
115
+ return vector_store
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  except Exception as e:
117
+ raise Exception(f"Knowledge base creation failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
+ # Initialize models and knowledge base on startup
120
+ llm, embeddings = init_models()
121
+ vector_store = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
+ if os.path.exists(VECTOR_STORE_PATH):
124
+ try:
125
+ vector_store = FAISS.load_local(
126
+ VECTOR_STORE_PATH,
127
+ embeddings,
128
+ allow_dangerous_deserialization=True
129
+ )
130
+ except Exception as e:
131
+ print(f"Failed to load existing knowledge base: {str(e)}")
132
 
133
+ if vector_store is None:
134
+ vector_store = build_knowledge_base(embeddings)
 
 
 
135
 
136
+ # API endpoints
137
+ @app.post("/chat", response_model=ChatResponse)
138
+ async def chat_endpoint(request: ChatRequest):
139
+ try:
140
+ # Retrieve context
141
+ context_docs = vector_store.similarity_search(request.message)
142
+ context_text = "\n".join([d.page_content for d in context_docs])
143
+
144
  # Generate response
145
+ prompt_template = PromptTemplate.from_template('''
146
+ You are a helpful and polite legal assistant at Status Law.
147
+ You answer in the language in which the question was asked.
148
+ Answer the question based on the context provided.
149
+ If you cannot answer based on the context, say so politely and offer to contact Status Law directly via the following channels:
150
+ - For all users: +32465594521 (landline phone).
151
+ - For English and Swedish speakers only: +46728495129 (available on WhatsApp, Telegram, Signal, IMO).
152
+ - Provide a link to the contact form: [Contact Form](https://status.law/law-firm-contact-legal-protection/).
153
+
154
+ Context: {context}
155
+ Question: {question}
156
+
157
+ Response Guidelines:
158
+ 1. Answer in the user's language
159
+ 2. Cite sources when possible
160
+ 3. Offer contact options if unsure
161
+ ''')
162
+
163
+ chain = prompt_template | llm | StrOutputParser()
164
+ response = chain.invoke({
165
+ "context": context_text,
166
+ "question": request.message
167
+ })
168
+
169
+ # Log interaction
170
+ log_interaction(request.message, response, context_text)
171
+
172
+ return ChatResponse(response=response, context=context_text)
173
+
174
+ except Exception as e:
175
+ raise HTTPException(status_code=500, detail=str(e))
176
 
177
+ @app.post("/rebuild-kb")
178
+ async def rebuild_knowledge_base():
179
+ try:
180
+ global vector_store
181
+ vector_store = build_knowledge_base(embeddings)
182
+ return {"status": "success", "message": "Knowledge base rebuilt successfully"}
183
+ except Exception as e:
184
+ raise HTTPException(status_code=500, detail=str(e))
185
 
186
+ def log_interaction(user_input: str, bot_response: str, context: str):
187
+ try:
188
+ log_entry = {
189
+ "timestamp": datetime.now().isoformat(),
190
+ "user_input": user_input,
191
+ "bot_response": bot_response,
192
+ "context": context[:500],
193
+ "kb_version": "1.1" # You might want to implement version tracking
194
+ }
195
+
196
+ os.makedirs("chat_history", exist_ok=True)
197
+ with open("chat_history/chat_logs.json", "a", encoding="utf-8") as f:
198
+ f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
199
+
200
+ except Exception as e:
201
+ print(f"Logging error: {str(e)}")
202
+ print(traceback.format_exc())
 
 
 
 
 
 
 
 
203
 
204
  if __name__ == "__main__":
205
+ import uvicorn
206
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt CHANGED
@@ -1,23 +1,18 @@
1
- streamlit
2
  langchain-community
3
  langchain-core
4
  langchain-huggingface
5
  langchain-groq
6
  python-dotenv
7
- beautifulsoup4
8
  faiss-cpu
9
  requests
10
- langgraph
11
- langchain-anthropic
12
  fastapi
13
  uvicorn[standard]
14
  pydantic
15
- python-multipart
16
  pandas
17
- langchain
18
- plotly
19
-
20
-
21
-
22
-
23
 
 
 
 
 
1
+ # Основные компоненты для работы с LLM и базой знаний
2
  langchain-community
3
  langchain-core
4
  langchain-huggingface
5
  langchain-groq
6
  python-dotenv
 
7
  faiss-cpu
8
  requests
9
+
10
+ # Для API и логирования
11
  fastapi
12
  uvicorn[standard]
13
  pydantic
 
14
  pandas
 
 
 
 
 
 
15
 
16
+ # Для LangChain логирования
17
+ langgraph
18
+ langchain-core[tracing]
run.sh CHANGED
@@ -1,5 +1,2 @@
1
  #!/bin/bash
2
-
3
- # Запуск Streamlit и FastAPI параллельно
4
- streamlit run app.py & # Запуск чат-бота
5
- uvicorn api.main:app --reload # Запуск API для анализа логов
 
1
  #!/bin/bash
2
+ uvicorn app:app --host 0.0.0.0 --port 8000 --reload