nightfury commited on
Commit
1415cc2
·
verified ·
1 Parent(s): af8e721

Update app1.py

Browse files
Files changed (1) hide show
  1. app1.py +35 -3
app1.py CHANGED
@@ -8,10 +8,42 @@ async()=>{
8
  }
9
  """
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def test(x, request: gr.Request, progress=gr.Progress()):
12
  progress(0, desc="Test", unit = "Files")
13
- print("request", request)
14
- a = "abcdefghijklmnopqrstuv"
15
  for letter in progress.tqdm(a, desc = "TEST", unit = "Files"):
16
  time.sleep(0.1)
17
  return a
@@ -31,6 +63,6 @@ with gr.Blocks() as demo:
31
 
32
  b.click(test, selected, prog)
33
 
34
- demo.load(test, inputs=None, outputs=prog) #, _js=on_load)
35
 
36
  demo.launch()
 
8
  }
9
  """
10
 
11
+ def get_documents():
12
+ return PyPDFLoader("AI Guide for Government - AI CoE.pdf","Ethics_of_Artificial_Intelligence-2.pdf","IPOL_BRI(2016)571380_EN.pdf").load()
13
+ #17357182991031590738file.pdf
14
+
15
+ def extract_pdfs(x, request: gr.Request, progress=gr.Progress()):
16
+ progress(0, desc="Test", unit = "Files")
17
+ print("request", request)
18
+
19
+ # Delete existing index directory and recreate the directory
20
+ if os.path.exists(DB_DIR):
21
+ import shutil
22
+ shutil.rmtree(DB_DIR, ignore_errors=True)
23
+ os.mkdir(DB_DIR)
24
+
25
+ documents = []
26
+ all_text = ""
27
+ for num, doc in enumerate(progress.tqdm(get_documents())):
28
+ doc.page_content = replace_newlines_and_spaces(doc.page_content)
29
+ documents.append(doc)
30
+ all_text += doc.page_content
31
+ time.sleep(0.1)
32
+
33
+ return documents, all_text
34
+
35
+ def replace_newlines_and_spaces(text):
36
+ # Replace all newline characters with spaces
37
+ text = text.replace("\n", " ")
38
+ # Replace multiple spaces with a single space
39
+ text = re.sub(r'\s+', ' ', text)
40
+ return text
41
+
42
+
43
  def test(x, request: gr.Request, progress=gr.Progress()):
44
  progress(0, desc="Test", unit = "Files")
45
+ print("request", request)
46
+ a = "abcdefghijklmnopqrstuv"
47
  for letter in progress.tqdm(a, desc = "TEST", unit = "Files"):
48
  time.sleep(0.1)
49
  return a
 
63
 
64
  b.click(test, selected, prog)
65
 
66
+ demo.load(extract_pdfs, inputs=None, outputs=[prog, selected]) #, _js=on_load)
67
 
68
  demo.launch()