Fecalisboa commited on
Commit
660ad36
·
verified ·
1 Parent(s): 09c43ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -42
app.py CHANGED
@@ -45,15 +45,26 @@ from llama_index.core.node_parser.relational.base_element import (
45
  from llama_index.core.schema import BaseNode, TextNode
46
 
47
 
 
48
  api_token = os.getenv("HF_TOKEN")
49
- # Implementations
 
 
 
 
 
 
 
 
 
 
50
 
51
  # default_persist_directory = './chroma_HF/'
52
- list_llm = ["mistralai/Mistral-7B-Instruct-v0.3", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
53
- "google/gemma-7b-it","google/gemma-2b-it", \
54
- "HuggingFaceH4/zephyr-7b-beta", "HuggingFaceH4/zephyr-7b-gemma-v0.1", \
55
- "meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2", \
56
- "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct", "tiiuae/falcon-7b-instruct", \
57
  "google/flan-t5-xxl"
58
  ]
59
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
@@ -96,7 +107,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
96
  if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.3":
97
  llm = HuggingFaceEndpoint(
98
  repo_id=llm_model,
99
- huggingfacehub_api_token = api_token,
100
  temperature = temperature,
101
  max_new_tokens = max_tokens,
102
  top_k = top_k,
@@ -106,7 +117,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
106
  raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
107
  llm = HuggingFaceEndpoint(
108
  repo_id=llm_model,
109
- huggingfacehub_api_token = api_token,
110
  temperature = temperature,
111
  max_new_tokens = max_tokens,
112
  top_k = top_k,
@@ -114,7 +125,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
114
  elif llm_model == "microsoft/phi-2":
115
  llm = HuggingFaceEndpoint(
116
  repo_id=llm_model,
117
- huggingfacehub_api_token = api_token,
118
  temperature = temperature,
119
  max_new_tokens = max_tokens,
120
  top_k = top_k,
@@ -124,7 +135,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
124
  elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
125
  llm = HuggingFaceEndpoint(
126
  repo_id=llm_model,
127
- huggingfacehub_api_token = api_token,
128
  temperature = temperature,
129
  max_new_tokens = 250,
130
  top_k = top_k,
@@ -133,7 +144,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
133
  raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
134
  llm = HuggingFaceEndpoint(
135
  repo_id=llm_model,
136
- huggingfacehub_api_token = api_token,
137
  temperature = temperature,
138
  max_new_tokens = max_tokens,
139
  top_k = top_k,
@@ -141,7 +152,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
141
  else:
142
  llm = HuggingFaceEndpoint(
143
  repo_id=llm_model,
144
- huggingfacehub_api_token = api_token,
145
  temperature = temperature,
146
  max_new_tokens = max_tokens,
147
  top_k = top_k,
@@ -235,11 +246,11 @@ def upload_file(file_obj):
235
 
236
  # Initialize LlamaIndex parsing
237
  def initialize_llama_index(file_obj):
238
- documents = LlamaParse(result_type="markdown",api_key=secret_value_0).load_data(file_obj.name)
239
- node_parser = MarkdownElementNodeParser(llm = None, num_workers=8)
240
  nodes = node_parser.get_nodes_from_documents(documents)
241
  base_nodes, objects = node_parser.get_nodes_and_objects(nodes)
242
- index_with_obj = VectorStoreIndex(nodes=base_nodes+objects)
243
  index_ret = index_with_obj.as_retriever(top_k=15)
244
  recursive_query_engine = RetrieverQueryEngine.from_args(index_ret, node_postprocessors=[FlagEmbeddingReranker(
245
  top_n=5,
@@ -268,12 +279,12 @@ def demo():
268
 
269
  with gr.Tab("Step 2 - Process document"):
270
  with gr.Row():
271
- db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value = "ChromaDB", type="index", info="Choose your vector database")
272
  with gr.Accordion("Advanced options - Document text splitter", open=False):
273
  with gr.Row():
274
- slider_chunk_size = gr.Slider(minimum = 100, maximum = 1000, value=600, step=20, label="Chunk size", info="Chunk size", interactive=True)
275
  with gr.Row():
276
- slider_chunk_overlap = gr.Slider(minimum = 10, maximum = 200, value=40, step=10, label="Chunk overlap", info="Chunk overlap", interactive=True)
277
  with gr.Row():
278
  db_progress = gr.Textbox(label="Vector database initialization", value="None")
279
  with gr.Row():
@@ -281,17 +292,17 @@ def demo():
281
 
282
  with gr.Tab("Step 3 - Initialize QA chain"):
283
  with gr.Row():
284
- llm_btn = gr.Radio(list_llm_simple, \
285
- label="LLM models", value = list_llm_simple[0], type="index", info="Choose your LLM model")
286
  with gr.Accordion("Advanced options - LLM model", open=False):
287
  with gr.Row():
288
- slider_temperature = gr.Slider(minimum = 0.01, maximum = 1.0, value=0.7, step=0.1, label="Temperature", info="Model temperature", interactive=True)
289
  with gr.Row():
290
- slider_maxtokens = gr.Slider(minimum = 224, maximum = 4096, value=1024, step=32, label="Max Tokens", info="Model max tokens", interactive=True)
291
  with gr.Row():
292
- slider_topk = gr.Slider(minimum = 1, maximum = 10, value=3, step=1, label="top-k samples", info="Model top-k samples", interactive=True)
293
  with gr.Row():
294
- llm_progress = gr.Textbox(value="None",label="QA chain initialization")
295
  with gr.Row():
296
  qachain_btn = gr.Button("Initialize Question Answering chain")
297
 
@@ -320,31 +331,31 @@ def demo():
320
  clear_btn = gr.ClearButton([msg, chatbot], value="Clear conversation")
321
 
322
  # Preprocessing events
323
- db_btn.click(initialize_database, \
324
- inputs=[document, slider_chunk_size, slider_chunk_overlap], \
325
  outputs=[vector_db, collection_name, db_progress])
326
- qachain_btn.click(initialize_LLM, \
327
- inputs=[llm_btn, slider_temperature, slider_maxtokens, slider_topk, vector_db], \
328
- outputs=[qa_chain, llm_progress]).then(lambda:[None,"",0,"",0,"",0], \
329
- inputs=None, \
330
- outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
331
  queue=False)
332
- llama_index_btn.click(initialize_llama_index, \
333
- inputs=[document], \
334
  outputs=[llama_index_engine, llama_index_progress])
335
 
336
  # Chatbot events
337
- msg.submit(conversation, \
338
- inputs=[qa_chain, msg, chatbot], \
339
- outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
340
  queue=False)
341
- submit_btn.click(conversation, \
342
- inputs=[qa_chain, msg, chatbot], \
343
- outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
344
  queue=False)
345
- clear_btn.click(lambda:[None,"",0,"",0,"",0], \
346
- inputs=None, \
347
- outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
348
  queue=False)
349
  demo.queue().launch(debug=True)
350
 
 
45
  from llama_index.core.schema import BaseNode, TextNode
46
 
47
 
48
+ # Obtenha o token da variável de ambiente
49
  api_token = os.getenv("HF_TOKEN")
50
+
51
+ # Verifique se o token foi obtido corretamente
52
+ if api_token is None:
53
+ raise ValueError("O token de API não foi encontrado. Verifique se a variável de ambiente HF_TOKEN está configurada corretamente.")
54
+
55
+ # Função para ofuscar o token
56
+ def obscure_token(token, num_visible=4):
57
+ return '*' * (len(token) - num_visible) + token[-num_visible:]
58
+
59
+ # Exibir o token de API ofuscado (apenas para debug; remova em produção)
60
+ print(f"Token de API: {obscure_token(api_token)}")
61
 
62
  # default_persist_directory = './chroma_HF/'
63
+ list_llm = ["mistralai/Mistral-7B-Instruct-v0.3", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1",
64
+ "google/gemma-7b-it","google/gemma-2b-it",
65
+ "HuggingFaceH4/zephyr-7b-beta", "HuggingFaceH4/zephyr-7b-gemma-v0.1",
66
+ "meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2",
67
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct", "tiiuae/falcon-7b-instruct",
68
  "google/flan-t5-xxl"
69
  ]
70
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
 
107
  if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.3":
108
  llm = HuggingFaceEndpoint(
109
  repo_id=llm_model,
110
+ api_key=api_token,
111
  temperature = temperature,
112
  max_new_tokens = max_tokens,
113
  top_k = top_k,
 
117
  raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
118
  llm = HuggingFaceEndpoint(
119
  repo_id=llm_model,
120
+ api_key=api_token,
121
  temperature = temperature,
122
  max_new_tokens = max_tokens,
123
  top_k = top_k,
 
125
  elif llm_model == "microsoft/phi-2":
126
  llm = HuggingFaceEndpoint(
127
  repo_id=llm_model,
128
+ api_key=api_token,
129
  temperature = temperature,
130
  max_new_tokens = max_tokens,
131
  top_k = top_k,
 
135
  elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
136
  llm = HuggingFaceEndpoint(
137
  repo_id=llm_model,
138
+ api_key=api_token,
139
  temperature = temperature,
140
  max_new_tokens = 250,
141
  top_k = top_k,
 
144
  raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
145
  llm = HuggingFaceEndpoint(
146
  repo_id=llm_model,
147
+ api_key=api_token,
148
  temperature = temperature,
149
  max_new_tokens = max_tokens,
150
  top_k = top_k,
 
152
  else:
153
  llm = HuggingFaceEndpoint(
154
  repo_id=llm_model,
155
+ api_key=api_token,
156
  temperature = temperature,
157
  max_new_tokens = max_tokens,
158
  top_k = top_k,
 
246
 
247
  # Initialize LlamaIndex parsing
248
  def initialize_llama_index(file_obj):
249
+ documents = LlamaParse(result_type="markdown", api_key=api_token).load_data(file_obj[0].name)
250
+ node_parser = MarkdownElementNodeParser(llm=None, num_workers=8)
251
  nodes = node_parser.get_nodes_from_documents(documents)
252
  base_nodes, objects = node_parser.get_nodes_and_objects(nodes)
253
+ index_with_obj = VectorStoreIndex(nodes=base_nodes + objects)
254
  index_ret = index_with_obj.as_retriever(top_k=15)
255
  recursive_query_engine = RetrieverQueryEngine.from_args(index_ret, node_postprocessors=[FlagEmbeddingReranker(
256
  top_n=5,
 
279
 
280
  with gr.Tab("Step 2 - Process document"):
281
  with gr.Row():
282
+ db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value="ChromaDB", type="index", info="Choose your vector database")
283
  with gr.Accordion("Advanced options - Document text splitter", open=False):
284
  with gr.Row():
285
+ slider_chunk_size = gr.Slider(minimum=100, maximum=1000, value=600, step=20, label="Chunk size", info="Chunk size", interactive=True)
286
  with gr.Row():
287
+ slider_chunk_overlap = gr.Slider(minimum=10, maximum=200, value=40, step=10, label="Chunk overlap", info="Chunk overlap", interactive=True)
288
  with gr.Row():
289
  db_progress = gr.Textbox(label="Vector database initialization", value="None")
290
  with gr.Row():
 
292
 
293
  with gr.Tab("Step 3 - Initialize QA chain"):
294
  with gr.Row():
295
+ llm_btn = gr.Radio(list_llm_simple,
296
+ label="LLM models", value=list_llm_simple[0], type="index", info="Choose your LLM model")
297
  with gr.Accordion("Advanced options - LLM model", open=False):
298
  with gr.Row():
299
+ slider_temperature = gr.Slider(minimum=0.01, maximum=1.0, value=0.7, step=0.1, label="Temperature", info="Model temperature", interactive=True)
300
  with gr.Row():
301
+ slider_maxtokens = gr.Slider(minimum=224, maximum=4096, value=1024, step=32, label="Max Tokens", info="Model max tokens", interactive=True)
302
  with gr.Row():
303
+ slider_topk = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="top-k samples", info="Model top-k samples", interactive=True)
304
  with gr.Row():
305
+ llm_progress = gr.Textbox(value="None", label="QA chain initialization")
306
  with gr.Row():
307
  qachain_btn = gr.Button("Initialize Question Answering chain")
308
 
 
331
  clear_btn = gr.ClearButton([msg, chatbot], value="Clear conversation")
332
 
333
  # Preprocessing events
334
+ db_btn.click(initialize_database,
335
+ inputs=[document, slider_chunk_size, slider_chunk_overlap],
336
  outputs=[vector_db, collection_name, db_progress])
337
+ qachain_btn.click(initialize_LLM,
338
+ inputs=[llm_btn, slider_temperature, slider_maxtokens, slider_topk, vector_db],
339
+ outputs=[qa_chain, llm_progress]).then(lambda:[None,"",0,"",0,"",0],
340
+ inputs=None,
341
+ outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
342
  queue=False)
343
+ llama_index_btn.click(initialize_llama_index,
344
+ inputs=[document],
345
  outputs=[llama_index_engine, llama_index_progress])
346
 
347
  # Chatbot events
348
+ msg.submit(conversation,
349
+ inputs=[qa_chain, msg, chatbot],
350
+ outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
351
  queue=False)
352
+ submit_btn.click(conversation,
353
+ inputs=[qa_chain, msg, chatbot],
354
+ outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
355
  queue=False)
356
+ clear_btn.click(lambda:[None,"",0,"",0,"",0],
357
+ inputs=None,
358
+ outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
359
  queue=False)
360
  demo.queue().launch(debug=True)
361