reichaves commited on
Commit
81f9a82
·
unverified ·
1 Parent(s): 94a00f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -23
app.py CHANGED
@@ -4,7 +4,6 @@
4
  # usando Streamlit, LangChain, e modelos de linguagem de grande escala - para entrevistar conteúdo de URLs
5
  # Geração de respostas usando o modelo llama-3.2-90b-text-preview da Meta
6
  # Embeddings de texto usando o modelo all-MiniLM-L6-v2 do Hugging Face
7
- ##
8
 
9
  import streamlit as st
10
  from langchain.chains import create_history_aware_retriever, create_retrieval_chain
@@ -17,11 +16,11 @@ from langchain_core.runnables.history import RunnableWithMessageHistory
17
  from langchain_community.embeddings import HuggingFaceEmbeddings
18
  from langchain_text_splitters import RecursiveCharacterTextSplitter
19
  from langchain_community.vectorstores import FAISS
 
20
  import os
21
  import requests
22
  from bs4 import BeautifulSoup
23
  from langchain_core.documents import Document
24
- import time
25
  from tenacity import retry, wait_exponential, stop_after_attempt, retry_if_exception_type
26
 
27
  # Configurar o tema para dark
@@ -166,31 +165,36 @@ st.write("Insira uma URL e converse com o conteúdo dela - aqui é usado o model
166
  groq_api_key = st.text_input("Insira sua chave de API Groq:", type="password")
167
  huggingface_api_token = st.text_input("Insira seu token de API Hugging Face:", type="password")
168
 
169
- @retry(
170
- retry=retry_if_exception_type(Exception),
171
- wait=wait_exponential(multiplier=1, min=4, max=60),
172
- stop=stop_after_attempt(5)
173
- )
174
- def rate_limited_llm_call(llm, **kwargs):
175
- try:
176
- return llm(**kwargs)
177
- except Exception as e:
178
- if "rate limit" in str(e).lower():
179
- st.error(f"Rate limit reached. Please try again in a few moments. Error: {str(e)}")
180
- raise e
181
- else:
182
- st.error(f"An error occurred while processing your request: {str(e)}")
 
 
 
 
183
  raise e
184
 
 
 
 
 
185
  if groq_api_key and huggingface_api_token:
186
  # Configurar o token da API do Hugging Face
187
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = huggingface_api_token
188
 
189
  # Inicializar o modelo de linguagem e embeddings
190
- # Initialize the LLM with rate limiting
191
- llm = ChatGroq(groq_api_key=groq_api_key, model_name="llama-3.2-90b-text-preview", temperature=0)
192
- def rate_limited_llm(**kwargs):
193
- return rate_limited_llm_call(llm, **kwargs)
194
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
195
 
196
  session_id = st.text_input("Session ID", value="default_session")
@@ -218,7 +222,7 @@ if groq_api_key and huggingface_api_token:
218
  # Create a Document object
219
  document = Document(page_content=text, metadata={"source": url})
220
 
221
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500) # Reduced chunk size
222
  splits = text_splitter.split_documents([document])
223
 
224
  # Create FAISS vector store
@@ -241,7 +245,7 @@ if groq_api_key and huggingface_api_token:
241
  ("human", "{input}"),
242
  ])
243
 
244
- history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)
245
 
246
  system_prompt = (
247
  "Você é um assistente especializado em analisar conteúdo de páginas web. "
@@ -272,7 +276,6 @@ if groq_api_key and huggingface_api_token:
272
  ("human", "{input}"),
273
  ])
274
 
275
- # Modify the conversational_rag_chain to use the rate_limited_llm
276
  question_answer_chain = create_stuff_documents_chain(rate_limited_llm, qa_prompt)
277
  rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
278
 
 
4
  # usando Streamlit, LangChain, e modelos de linguagem de grande escala - para entrevistar conteúdo de URLs
5
  # Geração de respostas usando o modelo llama-3.2-90b-text-preview da Meta
6
  # Embeddings de texto usando o modelo all-MiniLM-L6-v2 do Hugging Face
 
7
 
8
  import streamlit as st
9
  from langchain.chains import create_history_aware_retriever, create_retrieval_chain
 
16
  from langchain_community.embeddings import HuggingFaceEmbeddings
17
  from langchain_text_splitters import RecursiveCharacterTextSplitter
18
  from langchain_community.vectorstores import FAISS
19
+ from langchain_core.language_models.chat_models import BaseChatModel
20
  import os
21
  import requests
22
  from bs4 import BeautifulSoup
23
  from langchain_core.documents import Document
 
24
  from tenacity import retry, wait_exponential, stop_after_attempt, retry_if_exception_type
25
 
26
  # Configurar o tema para dark
 
165
  groq_api_key = st.text_input("Insira sua chave de API Groq:", type="password")
166
  huggingface_api_token = st.text_input("Insira seu token de API Hugging Face:", type="password")
167
 
168
+ # Wrapper personalizado para ChatGroq com rate limiting
169
+ class RateLimitedChatGroq(BaseChatModel):
170
+ def __init__(self, groq_api_key, model_name, temperature=0):
171
+ self.llm = ChatGroq(groq_api_key=groq_api_key, model_name=model_name, temperature=temperature)
172
+
173
+ @retry(
174
+ retry=retry_if_exception_type(Exception),
175
+ wait=wait_exponential(multiplier=1, min=4, max=60),
176
+ stop=stop_after_attempt(5)
177
+ )
178
+ def _call(self, messages, stop=None, run_manager=None, **kwargs):
179
+ try:
180
+ return self.llm._call(messages, stop=stop, run_manager=run_manager, **kwargs)
181
+ except Exception as e:
182
+ if "rate limit" in str(e).lower():
183
+ st.error(f"Rate limit reached. Please try again in a few moments. Error: {str(e)}")
184
+ else:
185
+ st.error(f"An error occurred while processing your request: {str(e)}")
186
  raise e
187
 
188
+ @property
189
+ def _llm_type(self):
190
+ return "rate_limited_chat_groq"
191
+
192
  if groq_api_key and huggingface_api_token:
193
  # Configurar o token da API do Hugging Face
194
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = huggingface_api_token
195
 
196
  # Inicializar o modelo de linguagem e embeddings
197
+ rate_limited_llm = RateLimitedChatGroq(groq_api_key=groq_api_key, model_name="llama-3.2-90b-text-preview", temperature=0)
 
 
 
198
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
199
 
200
  session_id = st.text_input("Session ID", value="default_session")
 
222
  # Create a Document object
223
  document = Document(page_content=text, metadata={"source": url})
224
 
225
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
226
  splits = text_splitter.split_documents([document])
227
 
228
  # Create FAISS vector store
 
245
  ("human", "{input}"),
246
  ])
247
 
248
+ history_aware_retriever = create_history_aware_retriever(rate_limited_llm, retriever, contextualize_q_prompt)
249
 
250
  system_prompt = (
251
  "Você é um assistente especializado em analisar conteúdo de páginas web. "
 
276
  ("human", "{input}"),
277
  ])
278
 
 
279
  question_answer_chain = create_stuff_documents_chain(rate_limited_llm, qa_prompt)
280
  rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
281