Spaces:
Build error
Build error
FauziIsyrinApridal
commited on
Commit
·
82031c8
1
Parent(s):
498342f
perbaiki plaholsdr input dan hapus chunking
Browse files- app/chat.py +1 -1
- app/document_processor.py +18 -10
app/chat.py
CHANGED
@@ -50,7 +50,7 @@ def display_chat_history(chain):
|
|
50 |
|
51 |
# Input teks biasa
|
52 |
user_input_obj = st.chat_input(
|
53 |
-
"Masukkan pertanyaan
|
54 |
key="chat_input_field"
|
55 |
)
|
56 |
|
|
|
50 |
|
51 |
# Input teks biasa
|
52 |
user_input_obj = st.chat_input(
|
53 |
+
"Masukkan pertanyaan",
|
54 |
key="chat_input_field"
|
55 |
)
|
56 |
|
app/document_processor.py
CHANGED
@@ -3,8 +3,9 @@ from langchain_huggingface import HuggingFaceEmbeddings
|
|
3 |
from langchain_community.vectorstores import FAISS
|
4 |
import os
|
5 |
import tempfile
|
6 |
-
import zipfile
|
7 |
import streamlit as st
|
|
|
|
|
8 |
|
9 |
def save_vector_store_to_supabase(vector_store, supabase, bucket_name, file_prefix="vector_store"):
|
10 |
"""Save vector store to Supabase storage as separate files."""
|
@@ -94,18 +95,25 @@ def load_vector_store_from_supabase(supabase, bucket_name, file_prefix="vector_s
|
|
94 |
st.error(f"Error loading from Supabase: {e}")
|
95 |
return None
|
96 |
|
|
|
97 |
def process_documents(docs):
|
98 |
embeddings = HuggingFaceEmbeddings(
|
99 |
model_name="LazarusNLP/all-indo-e5-small-v4",
|
100 |
model_kwargs={"device": "cpu"},
|
101 |
encode_kwargs={"normalize_embeddings": True}
|
102 |
)
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from langchain_community.vectorstores import FAISS
|
4 |
import os
|
5 |
import tempfile
|
|
|
6 |
import streamlit as st
|
7 |
+
from langchain.schema import Document
|
8 |
+
|
9 |
|
10 |
def save_vector_store_to_supabase(vector_store, supabase, bucket_name, file_prefix="vector_store"):
|
11 |
"""Save vector store to Supabase storage as separate files."""
|
|
|
95 |
st.error(f"Error loading from Supabase: {e}")
|
96 |
return None
|
97 |
|
98 |
+
|
99 |
def process_documents(docs):
|
100 |
embeddings = HuggingFaceEmbeddings(
|
101 |
model_name="LazarusNLP/all-indo-e5-small-v4",
|
102 |
model_kwargs={"device": "cpu"},
|
103 |
encode_kwargs={"normalize_embeddings": True}
|
104 |
)
|
105 |
+
|
106 |
+
combined_docs = []
|
107 |
+
for doc in docs:
|
108 |
+
# Gunakan page_content langsung tanpa split
|
109 |
+
text = doc.page_content if hasattr(doc, "page_content") else doc
|
110 |
+
metadata = doc.metadata if hasattr(doc, "metadata") else {}
|
111 |
+
|
112 |
+
combined_doc = Document(
|
113 |
+
page_content=text,
|
114 |
+
metadata=metadata
|
115 |
+
)
|
116 |
+
combined_docs.append(combined_doc)
|
117 |
+
|
118 |
+
vector_store = FAISS.from_documents(combined_docs, embeddings)
|
119 |
+
return vector_store
|