Update app1.py
Browse files
app1.py
CHANGED
@@ -8,10 +8,42 @@ async()=>{
|
|
8 |
}
|
9 |
"""
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
def test(x, request: gr.Request, progress=gr.Progress()):
|
12 |
progress(0, desc="Test", unit = "Files")
|
13 |
-
print("request", request)
|
14 |
-
a = "abcdefghijklmnopqrstuv"
|
15 |
for letter in progress.tqdm(a, desc = "TEST", unit = "Files"):
|
16 |
time.sleep(0.1)
|
17 |
return a
|
@@ -31,6 +63,6 @@ with gr.Blocks() as demo:
|
|
31 |
|
32 |
b.click(test, selected, prog)
|
33 |
|
34 |
-
demo.load(
|
35 |
|
36 |
demo.launch()
|
|
|
8 |
}
|
9 |
"""
|
10 |
|
11 |
+
def get_documents():
|
12 |
+
return PyPDFLoader("AI Guide for Government - AI CoE.pdf","Ethics_of_Artificial_Intelligence-2.pdf","IPOL_BRI(2016)571380_EN.pdf").load()
|
13 |
+
#17357182991031590738file.pdf
|
14 |
+
|
15 |
+
def extract_pdfs(x, request: gr.Request, progress=gr.Progress()):
|
16 |
+
progress(0, desc="Test", unit = "Files")
|
17 |
+
print("request", request)
|
18 |
+
|
19 |
+
# Delete existing index directory and recreate the directory
|
20 |
+
if os.path.exists(DB_DIR):
|
21 |
+
import shutil
|
22 |
+
shutil.rmtree(DB_DIR, ignore_errors=True)
|
23 |
+
os.mkdir(DB_DIR)
|
24 |
+
|
25 |
+
documents = []
|
26 |
+
all_text = ""
|
27 |
+
for num, doc in enumerate(progress.tqdm(get_documents())):
|
28 |
+
doc.page_content = replace_newlines_and_spaces(doc.page_content)
|
29 |
+
documents.append(doc)
|
30 |
+
all_text += doc.page_content
|
31 |
+
time.sleep(0.1)
|
32 |
+
|
33 |
+
return documents, all_text
|
34 |
+
|
35 |
+
def replace_newlines_and_spaces(text):
|
36 |
+
# Replace all newline characters with spaces
|
37 |
+
text = text.replace("\n", " ")
|
38 |
+
# Replace multiple spaces with a single space
|
39 |
+
text = re.sub(r'\s+', ' ', text)
|
40 |
+
return text
|
41 |
+
|
42 |
+
|
43 |
def test(x, request: gr.Request, progress=gr.Progress()):
|
44 |
progress(0, desc="Test", unit = "Files")
|
45 |
+
print("request", request)
|
46 |
+
a = "abcdefghijklmnopqrstuv"
|
47 |
for letter in progress.tqdm(a, desc = "TEST", unit = "Files"):
|
48 |
time.sleep(0.1)
|
49 |
return a
|
|
|
63 |
|
64 |
b.click(test, selected, prog)
|
65 |
|
66 |
+
demo.load(extract_pdfs, inputs=None, outputs=[prog, selected]) #, _js=on_load)
|
67 |
|
68 |
demo.launch()
|