dammy commited on
Commit
bb53d04
·
1 Parent(s): 065d9fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -19
app.py CHANGED
@@ -44,29 +44,37 @@ def local_query(query, context):
44
 
45
  def run_query(btn, history, query):
46
 
47
- file_name = btn.name
48
-
49
- loader = PDFMinerLoader(file_name)
50
- doc = loader.load()
51
 
52
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
53
- texts = text_splitter.split_documents(doc)
54
 
55
- texts = [i.page_content for i in texts]
56
 
57
- doc_emb = st_model.encode(texts)
58
- doc_emb = doc_emb.tolist()
59
-
60
- ids = [str(uuid.uuid1()) for _ in doc_emb]
61
-
62
- client = chromadb.Client()
63
- collection = client.create_collection("test_db")
 
 
 
64
 
65
- collection.add(
66
- embeddings=doc_emb,
67
- documents=texts,
68
- ids=ids
69
- )
 
 
 
 
 
 
 
 
 
 
70
 
71
 
72
 
 
44
 
45
  def run_query(btn, history, query):
46
 
 
 
 
 
47
 
48
+ global count
 
49
 
50
+ count = 1
51
 
52
+ if count ==1:
53
+ file_name = btn.name
54
+
55
+ loader = PDFMinerLoader(file_name)
56
+ doc = loader.load()
57
+
58
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
59
+ texts = text_splitter.split_documents(doc)
60
+
61
+ texts = [i.page_content for i in texts]
62
 
63
+ doc_emb = st_model.encode(texts)
64
+ doc_emb = doc_emb.tolist()
65
+
66
+ ids = [str(uuid.uuid1()) for _ in doc_emb]
67
+
68
+ client = chromadb.Client()
69
+ collection = client.create_collection("test_db")
70
+
71
+ collection.add(
72
+ embeddings=doc_emb,
73
+ documents=texts,
74
+ ids=ids
75
+ )
76
+
77
+ count+=1
78
 
79
 
80