wakeupmh commited on
Commit
f944585
·
1 Parent(s): d3e32db

fix: dataset

Browse files
Files changed (1) hide show
  1. app.py +10 -0
app.py CHANGED
@@ -21,6 +21,16 @@ def load_models():
21
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
22
  return tokenizer, model
23
 
 
 
 
 
 
 
 
 
 
 
24
  def generate_answer(question, context, max_length=200):
25
  tokenizer, model = load_models()
26
 
 
21
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
22
  return tokenizer, model
23
 
24
+ @st.cache_data
25
+ def load_dataset():
26
+ # Create initial dataset if it doesn't exist
27
+ if not os.path.exists(DATASET_PATH):
28
+ with st.spinner("Building initial dataset from autism research papers..."):
29
+ import faiss_index.index as idx
30
+ papers = idx.fetch_arxiv_papers("autism research", max_results=100)
31
+ idx.build_faiss_index(papers, dataset_dir=DATASET_DIR)
32
+ return load_from_disk(DATASET_PATH)
33
+
34
  def generate_answer(question, context, max_length=200):
35
  tokenizer, model = load_models()
36