Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,8 @@ import streamlit as st
|
|
3 |
import PyPDF2
|
4 |
import matplotlib.pyplot as plt
|
5 |
from io import BytesIO
|
6 |
-
from
|
|
|
7 |
from sklearn.metrics.pairwise import cosine_similarity
|
8 |
import numpy as np
|
9 |
import dotenv
|
@@ -17,8 +18,8 @@ dotenv.load_dotenv()
|
|
17 |
API_URL = "https://api-inference.huggingface.co/models/sarvamai/sarvam-2b-v0.5"
|
18 |
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"}
|
19 |
|
20 |
-
#
|
21 |
-
embed_model =
|
22 |
|
23 |
def query_huggingface_api(payload):
|
24 |
response = requests.post(API_URL, headers=headers, json=payload)
|
@@ -102,9 +103,12 @@ def search_similar_sections(document_text, query, top_k=3):
|
|
102 |
# Split the document into sections (you may need to adjust this based on your document structure)
|
103 |
sections = document_text.split('\n\n')
|
104 |
|
|
|
|
|
|
|
105 |
# Compute embeddings for the query and all sections
|
106 |
-
query_embedding = embed_model.
|
107 |
-
section_embeddings = embed_model.
|
108 |
|
109 |
# Compute cosine similarities
|
110 |
similarities = cosine_similarity([query_embedding], section_embeddings)[0]
|
|
|
3 |
import PyPDF2
|
4 |
import matplotlib.pyplot as plt
|
5 |
from io import BytesIO
|
6 |
+
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
7 |
+
from llama_index import Document
|
8 |
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
import numpy as np
|
10 |
import dotenv
|
|
|
18 |
API_URL = "https://api-inference.huggingface.co/models/sarvamai/sarvam-2b-v0.5"
|
19 |
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"}
|
20 |
|
21 |
+
# Configure embedding model
|
22 |
+
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
|
23 |
|
24 |
def query_huggingface_api(payload):
|
25 |
response = requests.post(API_URL, headers=headers, json=payload)
|
|
|
103 |
# Split the document into sections (you may need to adjust this based on your document structure)
|
104 |
sections = document_text.split('\n\n')
|
105 |
|
106 |
+
# Create Document objects for each section
|
107 |
+
documents = [Document(text=section) for section in sections]
|
108 |
+
|
109 |
# Compute embeddings for the query and all sections
|
110 |
+
query_embedding = embed_model.get_text_embedding(query)
|
111 |
+
section_embeddings = [embed_model.get_text_embedding(doc.text) for doc in documents]
|
112 |
|
113 |
# Compute cosine similarities
|
114 |
similarities = cosine_similarity([query_embedding], section_embeddings)[0]
|