ryota39 commited on
Commit
972c3ed
·
verified ·
1 Parent(s): 15a6e60

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -73
app.py DELETED
@@ -1,73 +0,0 @@
1
- import os
2
- import pandas as pd
3
- import streamlit as st
4
- from langchain_community.vectorstores.faiss import FAISS
5
- from langchain_huggingface import HuggingFaceEmbeddings
6
-
7
-
8
- st.set_page_config(page_title="ICLR2025 Paper Search", layout="wide")
9
- os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
10
-
11
-
12
- @st.cache_resource
13
- def create_vector_store(
14
- vector_store_path: str,
15
- embedding_model_name: str,
16
- ) -> FAISS:
17
- embedding_model = HuggingFaceEmbeddings(model_name=embedding_model_name)
18
- vector_store = FAISS.load_local(
19
- folder_path=vector_store_path,
20
- embeddings=embedding_model,
21
- allow_dangerous_deserialization=True,
22
- )
23
- return vector_store
24
-
25
-
26
- def grab_topk(
27
- input_text: str,
28
- vector_store: FAISS,
29
- top_k: int,
30
- ) -> pd.DataFrame:
31
- retriever = vector_store.as_retriever(search_kwargs={"k": top_k + 1})
32
- relevant_docs = retriever.get_relevant_documents(input_text)
33
-
34
- abstracts = list()
35
- titles = list()
36
- urls = list()
37
- for relevant_doc in relevant_docs:
38
- content = relevant_doc.page_content
39
- url = content.split("<BEGIN_URL>")[-1].split("<END_URL>")[0]
40
- abstract = content.split("\\n")[-1].split("<BEGIN_URL>")[0]
41
- title = content.split("\\n")[0]
42
-
43
- abstracts.append(abstract + "...")
44
- titles.append(title)
45
- urls.append(url)
46
- return pd.DataFrame({"title": titles, "abstract": abstracts, "url": urls})
47
-
48
-
49
- if __name__ == "__main__":
50
- vector_store_path = "db"
51
- embedding_model_name = "intfloat/multilingual-e5-large-instruct"
52
- vector_store = create_vector_store(
53
- vector_store_path,
54
- embedding_model_name,
55
- )
56
-
57
- st.markdown("## ICLR2025")
58
- st.markdown("- list of papers (https://iclr.cc/Downloads/2025)")
59
- input_text = st.text_input(
60
- "query",
61
- "",
62
- placeholder="Enter the keywords you are interested in...",
63
- )
64
- top_k = st.number_input("top_k", min_value=1, value=10, step=1)
65
-
66
- if st.button("検索"):
67
- stripped_input_text = input_text.strip()
68
- df = grab_topk(
69
- stripped_input_text,
70
- vector_store,
71
- top_k,
72
- )
73
- st.table(df)