Spaces:

wakeupmh
/

ama-autism

Sleeping

App Files Files Community

wakeupmh commited on Feb 15

Commit

42d1dd5

1 Parent(s): 8bb473c

fix: improve mem usage

Browse files

Files changed (3) hide show

app.py +20 -19
faiss_index/index.py +40 -30
requirements.txt +10 -8

app.py CHANGED Viewed

@@ -22,17 +22,19 @@ def load_models():
     model = AutoModelForSeq2SeqLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16,
-        low_cpu_mem_usage=True
     )
     return tokenizer, model
-@st.cache_data
 def load_dataset(query):
     # Create initial dataset if it doesn't exist
     if not os.path.exists(DATASET_PATH):
         with st.spinner("Building initial dataset from autism research papers..."):
             import faiss_index.index as idx
-            papers = idx.fetch_arxiv_papers(f"{query} AND (cat:q-bio.NC OR cat:q-bio.QM OR cat:q-bio.GN OR cat:q-bio.CB OR cat:q-bio.MN)", max_results=50)  # More focused search
             idx.build_faiss_index(papers, dataset_dir=DATASET_DIR)
     # Load and convert to pandas for easier handling
@@ -43,32 +45,31 @@ def load_dataset(query):
     })
     return df
-def generate_answer(question, context, max_length=200):
     tokenizer, model = load_models()
     # Add context about medical information
     prompt = f"Based on scientific research about autism and health: question: {question} context: {context}"
-    inputs = tokenizer(
-        prompt,
-        add_special_tokens=True,
-        return_tensors="pt",
-        max_length=512,
-        truncation=True,
-        padding=True
-    )
-    # Get model predictions
-    with torch.no_grad():
         outputs = model.generate(
-            inputs["input_ids"],
             max_length=max_length,
-            min_length=30,
-            num_beams=4,
-            length_penalty=2.0,
             early_stopping=True
         )
-        answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return answer if answer and not answer.isspace() else "I cannot find a specific answer to this question in the provided context."

     model = AutoModelForSeq2SeqLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16,
+        low_cpu_mem_usage=True,
+        device_map='auto',
+        max_memory={'cpu': '1GB'}
     )
     return tokenizer, model
+@st.cache_data(ttl=3600)  # Cache for 1 hour
 def load_dataset(query):
     # Create initial dataset if it doesn't exist
     if not os.path.exists(DATASET_PATH):
         with st.spinner("Building initial dataset from autism research papers..."):
             import faiss_index.index as idx
+            papers = idx.fetch_arxiv_papers(f"{query} AND (cat:q-bio.NC OR cat:q-bio.QM OR cat:q-bio.GN OR cat:q-bio.CB OR cat:q-bio.MN)", max_results=25)  # Reduced max results
             idx.build_faiss_index(papers, dataset_dir=DATASET_DIR)
     # Load and convert to pandas for easier handling
     })
     return df
+def generate_answer(question, context, max_length=150):  # Reduced max length
     tokenizer, model = load_models()
     # Add context about medical information
     prompt = f"Based on scientific research about autism and health: question: {question} context: {context}"
+    # Optimize input processing
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
+    with torch.inference_mode():  # More efficient than no_grad
         outputs = model.generate(
+            **inputs,
             max_length=max_length,
+            num_beams=2,  # Reduced beam search
+            temperature=0.7,
+            top_p=0.9,
+            repetition_penalty=1.2,
             early_stopping=True
         )
+    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Clear GPU memory if possible
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
     return answer if answer and not answer.isspace() else "I cannot find a specific answer to this question in the provided context."

faiss_index/index.py CHANGED Viewed

@@ -30,7 +30,11 @@ def fetch_arxiv_papers(query, max_results=10):
 def build_faiss_index(papers, dataset_dir=DATASET_DIR):
     """Build and save dataset with FAISS index for RAG"""
     # Initialize smaller DPR encoder
-    ctx_encoder = DPRContextEncoder.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base", device_map="auto", load_in_8bit=True)
     ctx_tokenizer = DPRContextEncoderTokenizer.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
     # Create embeddings with smaller batches and memory optimization
@@ -38,40 +42,46 @@ def build_faiss_index(papers, dataset_dir=DATASET_DIR):
     embeddings = []
     batch_size = 4  # Smaller batch size
-    for i in range(0, len(texts), batch_size):
-        batch = texts[i:i + batch_size]
-        inputs = ctx_tokenizer(batch, max_length=256, padding=True, truncation=True, return_tensors="pt")  # Reduced max_length
-        with torch.no_grad():
             outputs = ctx_encoder(**inputs)
-            batch_embeddings = outputs.pooler_output.cpu().numpy()
-            embeddings.append(batch_embeddings)
-            del outputs  # Explicit cleanup
-            torch.cuda.empty_cache()  # Clear GPU memory
-    embeddings = np.vstack(embeddings)
-    logging.info(f"Created embeddings with shape {embeddings.shape}")
-    # Create dataset
     dataset = Dataset.from_dict({
-        "id": [p["id"] for p in papers],
-        "text": [p["text"] for p in papers],
-        "title": [p["title"] for p in papers],
-        "embeddings": [emb.tolist() for emb in embeddings],
     })
-    # Create FAISS index
-    dimension = embeddings.shape[1]
-    quantizer = faiss.IndexFlatL2(dimension)
-    index = faiss.IndexQuantizer(dimension, quantizer, 8)
-    index.train(embeddings.astype(np.float32))
-    index.add(embeddings.astype(np.float32))
-    # Save dataset and index
     os.makedirs(dataset_dir, exist_ok=True)
-    dataset_path = os.path.join(dataset_dir, "dataset")
-    index_path = os.path.join(dataset_dir, "embeddings.faiss")
-    dataset.save_to_disk(dataset_path)
-    faiss.write_index(index, index_path)
-    logging.info(f"Saved dataset to {dataset_path}")
-    logging.info(f"Saved index to {index_path}")
     return dataset_dir

 def build_faiss_index(papers, dataset_dir=DATASET_DIR):
     """Build and save dataset with FAISS index for RAG"""
     # Initialize smaller DPR encoder
+    ctx_encoder = DPRContextEncoder.from_pretrained(
+        "facebook/dpr-ctx_encoder-single-nq-base",
+        torch_dtype=torch.float16,
+        low_cpu_mem_usage=True
+    )
     ctx_tokenizer = DPRContextEncoderTokenizer.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
     # Create embeddings with smaller batches and memory optimization
     embeddings = []
     batch_size = 4  # Smaller batch size
+    with torch.inference_mode():
+        for i in range(0, len(texts), batch_size):
+            batch_texts = texts[i:i + batch_size]
+            inputs = ctx_tokenizer(
+                batch_texts,
+                max_length=256,  # Reduced from default
+                padding=True,
+                truncation=True,
+                return_tensors="pt"
+            )
             outputs = ctx_encoder(**inputs)
+            embeddings.extend(outputs.pooler_output.cpu().numpy())
+            # Clear memory
+            del outputs
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+    # Convert to numpy array and build FAISS index
+    embeddings = np.array(embeddings)
+    dimension = embeddings.shape[1]
+    # Use more efficient index type
+    index = faiss.IndexFlatIP(dimension)  # Simple but efficient dot-product index
+    # Normalize vectors to use dot product as similarity
+    faiss.normalize_L2(embeddings)
+    index.add(embeddings)
+    # Create and save the dataset
     dataset = Dataset.from_dict({
+        "text": texts,
+        "embeddings": embeddings,
+        "title": [p["title"] for p in papers]
     })
+    # Create directory if it doesn't exist
     os.makedirs(dataset_dir, exist_ok=True)
+    # Save dataset
+    dataset.save_to_disk(os.path.join(dataset_dir, "dataset"))
+    logging.info(f"Dataset saved to {dataset_dir}")
     return dataset_dir

requirements.txt CHANGED Viewed

@@ -1,10 +1,12 @@
-streamlit
-transformers
-datasets
-sentence-transformers
-faiss-cpu
-arxiv
 --extra-index-url https://download.pytorch.org/whl/cpu
-torch
 accelerate>=0.26.0
-bitsandbytes>=0.41.1

+streamlit>=1.32.0
+transformers>=4.37.0
+datasets>=2.17.0
+sentence-transformers>=2.3.1
+faiss-cpu>=1.7.4
+arxiv>=2.1.0
 --extra-index-url https://download.pytorch.org/whl/cpu
+torch>=2.2.0
 accelerate>=0.26.0
+bitsandbytes>=0.41.1
+numpy>=1.24.0
+pandas>=2.2.0