Spaces:
Running
Running
Clement Vachet
commited on
Commit
·
577e81d
1
Parent(s):
799497c
style: clean code
Browse files
app.py
CHANGED
@@ -36,14 +36,10 @@ list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
|
36 |
|
37 |
# Load PDF document and create doc splits
|
38 |
def load_doc(list_file_path, chunk_size, chunk_overlap):
|
39 |
-
# Processing for one document only
|
40 |
-
# loader = PyPDFLoader(file_path)
|
41 |
-
# pages = loader.load()
|
42 |
loaders = [PyPDFLoader(x) for x in list_file_path]
|
43 |
pages = []
|
44 |
for loader in loaders:
|
45 |
pages.extend(loader.load())
|
46 |
-
# text_splitter = RecursiveCharacterTextSplitter(chunk_size = 600, chunk_overlap = 50)
|
47 |
text_splitter = RecursiveCharacterTextSplitter(
|
48 |
chunk_size = chunk_size,
|
49 |
chunk_overlap = chunk_overlap)
|
@@ -77,26 +73,6 @@ def load_db():
|
|
77 |
# Initialize langchain LLM chain
|
78 |
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
79 |
progress(0.1, desc="Initializing HF tokenizer...")
|
80 |
-
# HuggingFacePipeline uses local model
|
81 |
-
# Note: it will download model locally...
|
82 |
-
# tokenizer=AutoTokenizer.from_pretrained(llm_model)
|
83 |
-
# progress(0.5, desc="Initializing HF pipeline...")
|
84 |
-
# pipeline=transformers.pipeline(
|
85 |
-
# "text-generation",
|
86 |
-
# model=llm_model,
|
87 |
-
# tokenizer=tokenizer,
|
88 |
-
# torch_dtype=torch.bfloat16,
|
89 |
-
# trust_remote_code=True,
|
90 |
-
# device_map="auto",
|
91 |
-
# # max_length=1024,
|
92 |
-
# max_new_tokens=max_tokens,
|
93 |
-
# do_sample=True,
|
94 |
-
# top_k=top_k,
|
95 |
-
# num_return_sequences=1,
|
96 |
-
# eos_token_id=tokenizer.eos_token_id
|
97 |
-
# )
|
98 |
-
# llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature': temperature})
|
99 |
-
|
100 |
# HuggingFaceHub uses HF inference endpoints
|
101 |
progress(0.5, desc="Initializing HF Hub...")
|
102 |
# Use of trust_remote_code as model_kwargs
|
@@ -268,16 +244,6 @@ def conversation(qa_chain, message, history):
|
|
268 |
return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
|
269 |
|
270 |
|
271 |
-
def upload_file(file_obj):
|
272 |
-
list_file_path = []
|
273 |
-
for idx, file in enumerate(file_obj):
|
274 |
-
file_path = file_obj.name
|
275 |
-
list_file_path.append(file_path)
|
276 |
-
# print(file_path)
|
277 |
-
# initialize_database(file_path, progress)
|
278 |
-
return list_file_path
|
279 |
-
|
280 |
-
|
281 |
def demo():
|
282 |
with gr.Blocks(theme="base") as demo:
|
283 |
vector_db = gr.State()
|
@@ -297,7 +263,6 @@ def demo():
|
|
297 |
with gr.Tab("Step 1 - Upload PDF"):
|
298 |
with gr.Row():
|
299 |
document = gr.File(height=200, file_count="multiple", file_types=[".pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
|
300 |
-
# upload_btn = gr.UploadButton("Loading document...", height=100, file_count="multiple", file_types=["pdf"], scale=1)
|
301 |
|
302 |
with gr.Tab("Step 2 - Process document"):
|
303 |
with gr.Row():
|
@@ -347,7 +312,6 @@ def demo():
|
|
347 |
clear_btn = gr.ClearButton(components=[msg, chatbot], value="Clear conversation")
|
348 |
|
349 |
# Preprocessing events
|
350 |
-
#upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
|
351 |
db_btn.click(initialize_database, \
|
352 |
inputs=[document, slider_chunk_size, slider_chunk_overlap], \
|
353 |
outputs=[vector_db, collection_name, db_progress])
|
|
|
36 |
|
37 |
# Load PDF document and create doc splits
|
38 |
def load_doc(list_file_path, chunk_size, chunk_overlap):
|
|
|
|
|
|
|
39 |
loaders = [PyPDFLoader(x) for x in list_file_path]
|
40 |
pages = []
|
41 |
for loader in loaders:
|
42 |
pages.extend(loader.load())
|
|
|
43 |
text_splitter = RecursiveCharacterTextSplitter(
|
44 |
chunk_size = chunk_size,
|
45 |
chunk_overlap = chunk_overlap)
|
|
|
73 |
# Initialize langchain LLM chain
|
74 |
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
75 |
progress(0.1, desc="Initializing HF tokenizer...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
# HuggingFaceHub uses HF inference endpoints
|
77 |
progress(0.5, desc="Initializing HF Hub...")
|
78 |
# Use of trust_remote_code as model_kwargs
|
|
|
244 |
return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
|
245 |
|
246 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
def demo():
|
248 |
with gr.Blocks(theme="base") as demo:
|
249 |
vector_db = gr.State()
|
|
|
263 |
with gr.Tab("Step 1 - Upload PDF"):
|
264 |
with gr.Row():
|
265 |
document = gr.File(height=200, file_count="multiple", file_types=[".pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
|
|
|
266 |
|
267 |
with gr.Tab("Step 2 - Process document"):
|
268 |
with gr.Row():
|
|
|
312 |
clear_btn = gr.ClearButton(components=[msg, chatbot], value="Clear conversation")
|
313 |
|
314 |
# Preprocessing events
|
|
|
315 |
db_btn.click(initialize_database, \
|
316 |
inputs=[document, slider_chunk_size, slider_chunk_overlap], \
|
317 |
outputs=[vector_db, collection_name, db_progress])
|