Weedoo commited on
Commit
d4177a4
·
verified ·
1 Parent(s): dacd607

add async and add default values

Browse files
Files changed (1) hide show
  1. app.py +125 -55
app.py CHANGED
@@ -3,75 +3,118 @@ import os
3
  import gradio as gr
4
  import pandas as pd
5
  from pinecone import Pinecone
6
- from utils import get_zotero_ids, get_arxiv_papers, get_hf_embeddings, upload_to_pinecone, get_new_papers, recommend_papers
7
-
8
- HF_API_KEY = os.getenv('HF_API_KEY')
9
- PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
10
- INDEX_NAME = os.getenv('INDEX_NAME')
11
- NAMESPACE_NAME = os.getenv('NAMESPACE_NAME')
 
 
 
 
 
 
 
 
 
12
 
13
  script_dir = os.path.dirname(os.path.abspath(__file__))
14
- os.chdir(script_dir)
 
15
 
16
  def category_radio(cat):
17
- if cat == 'Computer Vision and Pattern Recognition':
18
- return 'cs.CV'
19
- elif cat == 'Computation and Language':
20
- return 'cs.CL'
21
- elif cat == 'Artificial Intelligence':
22
- return 'cs.AI'
23
- elif cat == 'Robotics':
24
- return 'cs.RO'
 
25
 
26
  def comment_radio(com):
27
- if com == 'None':
28
  return None
29
  else:
30
  return com
31
-
 
32
  def reset_project():
33
- file_path = 'arxiv-scrape.csv'
34
  if os.path.exists(file_path):
35
  os.remove(file_path)
36
- logging.info(f"{file_path} has been deleted. Delete reset_project() if you want to persist recommended papers.")
 
 
37
 
38
- api_key = os.getenv('PINECONE_API_KEY')
39
- index = os.getenv('INDEX_NAME')
40
- pc = Pinecone(api_key = api_key)
41
  if index in pc.list_indexes().names():
42
  pc.delete_index(index)
43
- logging.info(f"{index} index has been deleted from the vectordb. Delete reset_project() if you want to persist recommended papers.")
 
 
44
  return f"{file_path} has been deleted.<br />{index} index has been deleted from the vectordb.<br />"
45
 
 
46
  def reset_csv():
47
- file_path = 'arxiv-scrape.csv'
48
  if os.path.exists(file_path):
49
  os.remove(file_path)
50
- logging.info(f"{file_path} has been deleted. Delete reset_project() if you want to persist recommended papers.")
 
 
 
51
 
52
  with gr.Blocks() as demo:
53
 
54
- zotero_api_key = gr.Textbox(label="Zotero API Key")
 
 
55
 
56
- zotero_library_id = gr.Textbox(label="Zotero Library ID")
 
 
57
 
58
- zotero_tag = gr.Textbox(label="Zotero Tag")
59
 
60
  arxiv_category_name = gr.State([])
61
- radio_arxiv_category_name = gr.Radio(['Computer Vision and Pattern Recognition', 'Computation and Language', 'Artificial Intelligence', 'Robotics'], value= ['Computer Vision and Pattern Recognition'], label="ArXiv Category Query")
62
- radio_arxiv_category_name.change(fn = category_radio, inputs= radio_arxiv_category_name, outputs= arxiv_category_name)
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  arxiv_comment_query = gr.State([])
65
- radio_arxiv_comment_query = gr.Radio(['CVPR', 'ACL', 'TACL', 'JAIR', 'IJRR', 'None'], value=['CVPR'], label="ArXiv Comment Query")
66
- radio_arxiv_comment_query.change(fn = comment_radio, inputs= radio_arxiv_comment_query, outputs= arxiv_comment_query)
67
-
68
- threshold = gr.Slider(minimum= 0.70, maximum= 0.99, value = 0.80, label="Similarity Score Threshold")
 
 
 
 
 
 
 
 
69
 
70
  init_output = gr.Textbox(label="Project Initialization Result")
71
 
72
- rec_output = gr.Markdown(label = "Recommended Papers")
73
 
74
- reset_output = gr.Markdown(label = "Reset Declaration")
75
 
76
  init_btn = gr.Button("Initialize")
77
 
@@ -79,46 +122,73 @@ with gr.Blocks() as demo:
79
 
80
  reset_btn = gr.Button("Reset")
81
 
82
- timer = gr.Timer(value=600)
83
- timer.tick(reset_project)
84
-
85
- reset_btn.click(fn = reset_project, inputs= [], outputs= [reset_output])
86
-
87
- @init_btn.click(inputs= [zotero_api_key, zotero_library_id, zotero_tag], outputs= [init_output])
88
- def init(zotero_api_key, zotero_library_id, zotero_tag, hf_api_key = HF_API_KEY, pinecone_api_key = PINECONE_API_KEY, index_name = INDEX_NAME, namespace_name = NAMESPACE_NAME):
89
-
90
- logging.basicConfig(filename= 'logfile.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
 
 
 
 
 
 
 
 
 
91
  logging.info("Project Initialization Script Started (Serverless)")
92
-
93
  ids = get_zotero_ids(zotero_api_key, zotero_library_id, zotero_tag)
94
 
95
  df = get_arxiv_papers(ids)
96
 
97
  embeddings, dim = get_hf_embeddings(hf_api_key, df)
98
 
99
- feedback = upload_to_pinecone(pinecone_api_key, index_name, namespace_name, embeddings, dim, df)
 
 
100
 
101
  logging.info(feedback)
102
  if isinstance(feedback, dict):
103
  return f"Retrieved {len(ids)} papers from Zotero. Successfully upserted {feedback['upserted_count']} embeddings in {namespace_name} namespace."
104
- else :
105
  return feedback
106
-
107
- @rec_btn.click(inputs= [arxiv_category_name, arxiv_comment_query, threshold], outputs= [rec_output])
108
- def recs(arxiv_category_name, arxiv_comment_query, threshold, hf_api_key = HF_API_KEY, pinecone_api_key = PINECONE_API_KEY, index_name = INDEX_NAME, namespace_name = NAMESPACE_NAME):
 
 
 
 
 
 
 
 
 
 
 
109
  logging.info("Weekly Script Started (Serverless)")
110
 
111
- df = get_arxiv_papers(category= arxiv_category_name, comment= arxiv_comment_query)
112
 
113
  df = get_new_papers(df)
114
 
115
  if not isinstance(df, pd.DataFrame):
116
  return df
117
-
118
  embeddings, _ = get_hf_embeddings(hf_api_key, df)
119
 
120
- results = recommend_papers(pinecone_api_key, index_name, namespace_name, embeddings, df, threshold * 3)
 
 
121
 
122
  return results
123
 
124
- demo.launch(share = True)
 
 
3
  import gradio as gr
4
  import pandas as pd
5
  from pinecone import Pinecone
6
+ from utils import (
7
+ get_zotero_ids,
8
+ get_arxiv_papers,
9
+ get_hf_embeddings,
10
+ upload_to_pinecone,
11
+ get_new_papers,
12
+ recommend_papers,
13
+ )
14
+ from dotenv import load_dotenv
15
+
16
+ load_dotenv(".env")
17
+ HF_API_KEY = os.getenv("HF_API_KEY")
18
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
19
+ INDEX_NAME = os.getenv("INDEX_NAME")
20
+ NAMESPACE_NAME = os.getenv("NAMESPACE_NAME")
21
 
22
  script_dir = os.path.dirname(os.path.abspath(__file__))
23
+ os.chdir(script_dir)
24
+
25
 
26
  def category_radio(cat):
27
+ if cat == "Computer Vision and Pattern Recognition":
28
+ return "cs.CV"
29
+ elif cat == "Computation and Language":
30
+ return "cs.CL"
31
+ elif cat == "Artificial Intelligence":
32
+ return "cs.AI"
33
+ elif cat == "Robotics":
34
+ return "cs.RO"
35
+
36
 
37
  def comment_radio(com):
38
+ if com == "None":
39
  return None
40
  else:
41
  return com
42
+
43
+
44
  def reset_project():
45
+ file_path = "arxiv-scrape.csv"
46
  if os.path.exists(file_path):
47
  os.remove(file_path)
48
+ logging.info(
49
+ f"{file_path} has been deleted. Delete reset_project() if you want to persist recommended papers."
50
+ )
51
 
52
+ api_key = os.getenv("PINECONE_API_KEY")
53
+ index = os.getenv("INDEX_NAME")
54
+ pc = Pinecone(api_key=api_key)
55
  if index in pc.list_indexes().names():
56
  pc.delete_index(index)
57
+ logging.info(
58
+ f"{index} index has been deleted from the vectordb. Delete reset_project() if you want to persist recommended papers."
59
+ )
60
  return f"{file_path} has been deleted.<br />{index} index has been deleted from the vectordb.<br />"
61
 
62
+
63
  def reset_csv():
64
+ file_path = "arxiv-scrape.csv"
65
  if os.path.exists(file_path):
66
  os.remove(file_path)
67
+ logging.info(
68
+ f"{file_path} has been deleted. Delete reset_project() if you want to persist recommended papers."
69
+ )
70
+
71
 
72
  with gr.Blocks() as demo:
73
 
74
+ zotero_api_key = gr.Textbox(
75
+ label="Zotero API Key", type="password", value=os.getenv("ZOTERO_API_KEY")
76
+ )
77
 
78
+ zotero_library_id = gr.Textbox(
79
+ label="Zotero Library ID", value=os.getenv("ZOTERO_LIBRARY_ID")
80
+ )
81
 
82
+ zotero_tag = gr.Textbox(label="Zotero Tag", value=os.getenv("ZOTERO_TAG"))
83
 
84
  arxiv_category_name = gr.State([])
85
+ radio_arxiv_category_name = gr.Radio(
86
+ [
87
+ "Computer Vision and Pattern Recognition",
88
+ "Computation and Language",
89
+ "Artificial Intelligence",
90
+ "Robotics",
91
+ ],
92
+ value=["Computer Vision and Pattern Recognition"],
93
+ label="ArXiv Category Query",
94
+ )
95
+ radio_arxiv_category_name.change(
96
+ fn=category_radio, inputs=radio_arxiv_category_name, outputs=arxiv_category_name
97
+ )
98
 
99
  arxiv_comment_query = gr.State([])
100
+ radio_arxiv_comment_query = gr.Radio(
101
+ ["CVPR", "ACL", "TACL", "JAIR", "IJRR", "None"],
102
+ value=["CVPR"],
103
+ label="ArXiv Comment Query",
104
+ )
105
+ radio_arxiv_comment_query.change(
106
+ fn=comment_radio, inputs=radio_arxiv_comment_query, outputs=arxiv_comment_query
107
+ )
108
+
109
+ threshold = gr.Slider(
110
+ minimum=0.70, maximum=0.99, value=0.80, label="Similarity Score Threshold"
111
+ )
112
 
113
  init_output = gr.Textbox(label="Project Initialization Result")
114
 
115
+ rec_output = gr.Markdown(label="Recommended Papers")
116
 
117
+ reset_output = gr.Markdown(label="Reset Declaration")
118
 
119
  init_btn = gr.Button("Initialize")
120
 
 
122
 
123
  reset_btn = gr.Button("Reset")
124
 
125
+ reset_btn.click(fn=reset_project, inputs=[], outputs=[reset_output])
126
+
127
+ @init_btn.click(
128
+ inputs=[zotero_api_key, zotero_library_id, zotero_tag], outputs=[init_output]
129
+ )
130
+ def init(
131
+ zotero_api_key,
132
+ zotero_library_id,
133
+ zotero_tag,
134
+ hf_api_key=HF_API_KEY,
135
+ pinecone_api_key=PINECONE_API_KEY,
136
+ index_name=INDEX_NAME,
137
+ namespace_name=NAMESPACE_NAME,
138
+ ):
139
+
140
+ logging.basicConfig(
141
+ filename="logfile.log",
142
+ level=logging.INFO,
143
+ format="%(asctime)s - %(levelname)s - %(message)s",
144
+ )
145
  logging.info("Project Initialization Script Started (Serverless)")
146
+
147
  ids = get_zotero_ids(zotero_api_key, zotero_library_id, zotero_tag)
148
 
149
  df = get_arxiv_papers(ids)
150
 
151
  embeddings, dim = get_hf_embeddings(hf_api_key, df)
152
 
153
+ feedback = upload_to_pinecone(
154
+ pinecone_api_key, index_name, namespace_name, embeddings, dim, df
155
+ )
156
 
157
  logging.info(feedback)
158
  if isinstance(feedback, dict):
159
  return f"Retrieved {len(ids)} papers from Zotero. Successfully upserted {feedback['upserted_count']} embeddings in {namespace_name} namespace."
160
+ else:
161
  return feedback
162
+
163
+ @rec_btn.click(
164
+ inputs=[arxiv_category_name, arxiv_comment_query, threshold],
165
+ outputs=[rec_output],
166
+ )
167
+ def recs(
168
+ arxiv_category_name,
169
+ arxiv_comment_query,
170
+ threshold,
171
+ hf_api_key=HF_API_KEY,
172
+ pinecone_api_key=PINECONE_API_KEY,
173
+ index_name=INDEX_NAME,
174
+ namespace_name=NAMESPACE_NAME,
175
+ ):
176
  logging.info("Weekly Script Started (Serverless)")
177
 
178
+ df = get_arxiv_papers(category=arxiv_category_name, comment=arxiv_comment_query)
179
 
180
  df = get_new_papers(df)
181
 
182
  if not isinstance(df, pd.DataFrame):
183
  return df
184
+
185
  embeddings, _ = get_hf_embeddings(hf_api_key, df)
186
 
187
+ results = recommend_papers(
188
+ pinecone_api_key, index_name, namespace_name, embeddings, df, threshold * 3
189
+ )
190
 
191
  return results
192
 
193
+
194
+ demo.launch(share=True)