Clement Vachet commited on
Commit
577e81d
·
1 Parent(s): 799497c

style: clean code

Browse files
Files changed (1) hide show
  1. app.py +0 -36
app.py CHANGED
@@ -36,14 +36,10 @@ list_llm_simple = [os.path.basename(llm) for llm in list_llm]
36
 
37
  # Load PDF document and create doc splits
38
  def load_doc(list_file_path, chunk_size, chunk_overlap):
39
- # Processing for one document only
40
- # loader = PyPDFLoader(file_path)
41
- # pages = loader.load()
42
  loaders = [PyPDFLoader(x) for x in list_file_path]
43
  pages = []
44
  for loader in loaders:
45
  pages.extend(loader.load())
46
- # text_splitter = RecursiveCharacterTextSplitter(chunk_size = 600, chunk_overlap = 50)
47
  text_splitter = RecursiveCharacterTextSplitter(
48
  chunk_size = chunk_size,
49
  chunk_overlap = chunk_overlap)
@@ -77,26 +73,6 @@ def load_db():
77
  # Initialize langchain LLM chain
78
  def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
79
  progress(0.1, desc="Initializing HF tokenizer...")
80
- # HuggingFacePipeline uses local model
81
- # Note: it will download model locally...
82
- # tokenizer=AutoTokenizer.from_pretrained(llm_model)
83
- # progress(0.5, desc="Initializing HF pipeline...")
84
- # pipeline=transformers.pipeline(
85
- # "text-generation",
86
- # model=llm_model,
87
- # tokenizer=tokenizer,
88
- # torch_dtype=torch.bfloat16,
89
- # trust_remote_code=True,
90
- # device_map="auto",
91
- # # max_length=1024,
92
- # max_new_tokens=max_tokens,
93
- # do_sample=True,
94
- # top_k=top_k,
95
- # num_return_sequences=1,
96
- # eos_token_id=tokenizer.eos_token_id
97
- # )
98
- # llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature': temperature})
99
-
100
  # HuggingFaceHub uses HF inference endpoints
101
  progress(0.5, desc="Initializing HF Hub...")
102
  # Use of trust_remote_code as model_kwargs
@@ -268,16 +244,6 @@ def conversation(qa_chain, message, history):
268
  return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
269
 
270
 
271
- def upload_file(file_obj):
272
- list_file_path = []
273
- for idx, file in enumerate(file_obj):
274
- file_path = file_obj.name
275
- list_file_path.append(file_path)
276
- # print(file_path)
277
- # initialize_database(file_path, progress)
278
- return list_file_path
279
-
280
-
281
  def demo():
282
  with gr.Blocks(theme="base") as demo:
283
  vector_db = gr.State()
@@ -297,7 +263,6 @@ def demo():
297
  with gr.Tab("Step 1 - Upload PDF"):
298
  with gr.Row():
299
  document = gr.File(height=200, file_count="multiple", file_types=[".pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
300
- # upload_btn = gr.UploadButton("Loading document...", height=100, file_count="multiple", file_types=["pdf"], scale=1)
301
 
302
  with gr.Tab("Step 2 - Process document"):
303
  with gr.Row():
@@ -347,7 +312,6 @@ def demo():
347
  clear_btn = gr.ClearButton(components=[msg, chatbot], value="Clear conversation")
348
 
349
  # Preprocessing events
350
- #upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
351
  db_btn.click(initialize_database, \
352
  inputs=[document, slider_chunk_size, slider_chunk_overlap], \
353
  outputs=[vector_db, collection_name, db_progress])
 
36
 
37
  # Load PDF document and create doc splits
38
  def load_doc(list_file_path, chunk_size, chunk_overlap):
 
 
 
39
  loaders = [PyPDFLoader(x) for x in list_file_path]
40
  pages = []
41
  for loader in loaders:
42
  pages.extend(loader.load())
 
43
  text_splitter = RecursiveCharacterTextSplitter(
44
  chunk_size = chunk_size,
45
  chunk_overlap = chunk_overlap)
 
73
  # Initialize langchain LLM chain
74
  def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
75
  progress(0.1, desc="Initializing HF tokenizer...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  # HuggingFaceHub uses HF inference endpoints
77
  progress(0.5, desc="Initializing HF Hub...")
78
  # Use of trust_remote_code as model_kwargs
 
244
  return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
245
 
246
 
 
 
 
 
 
 
 
 
 
 
247
  def demo():
248
  with gr.Blocks(theme="base") as demo:
249
  vector_db = gr.State()
 
263
  with gr.Tab("Step 1 - Upload PDF"):
264
  with gr.Row():
265
  document = gr.File(height=200, file_count="multiple", file_types=[".pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
 
266
 
267
  with gr.Tab("Step 2 - Process document"):
268
  with gr.Row():
 
312
  clear_btn = gr.ClearButton(components=[msg, chatbot], value="Clear conversation")
313
 
314
  # Preprocessing events
 
315
  db_btn.click(initialize_database, \
316
  inputs=[document, slider_chunk_size, slider_chunk_overlap], \
317
  outputs=[vector_db, collection_name, db_progress])