Spaces:

VictorTomas09
/

my-rag-qa

Sleeping

App Files Files Community

VictorTomas09 commited on 29 days ago

Commit

970694a

verified ·

1 Parent(s): ff2408d

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -42

app.py CHANGED Viewed

@@ -28,20 +28,19 @@ DIST_THRESHOLD = float(os.getenv("DIST_THRESHOLD", 1.0))
 MAX_CTX_WORDS  = int(os.getenv("MAX_CTX_WORDS", 200))
 DEVICE = 0 if torch.cuda.is_available() else -1
 os.makedirs(DATA_DIR, exist_ok=True)
-print(f"Using MODEL_NAME={MODEL_NAME}, EMBEDDER_MODEL={EMBEDDER_MODEL}, device={'GPU' if DEVICE==0 else 'CPU'}")
 # ── 2. Helpers ──
 def make_context_snippets(contexts, max_words=MAX_CTX_WORDS):
-    out = []
     for c in contexts:
         words = c.split()
         if len(words) > max_words:
             c = " ".join(words[:max_words]) + " ... [truncated]"
-        out.append(c)
-    return out
 def chunk_text(text, max_tokens, stride=None):
     words = text.split()
@@ -57,20 +56,25 @@ def chunk_text(text, max_tokens, stride=None):
 # ── 3. Load & preprocess passages ──
 def load_passages():
     # 3.1 load raw corpora
-    wiki = load_dataset("rag-datasets/rag-mini-wikipedia", "text-corpus", split="passages")["passage"]
-    squad = load_dataset("rajpurkar/squad_v2", split="train[:100]")["context"]
     trivia_ds = load_dataset("mandarjoshi/trivia_qa", "rc", split="validation[:100]")
-    trivia = []
     for ex in trivia_ds:
         for fld in ("wiki_context", "search_context"):
             txt = ex.get(fld) or ""
-            if txt: trivia.append(txt)
-    all_passages = list(dict.fromkeys(wiki + squad + trivia))
-    # 3.2 chunk long passages
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    max_tokens = tokenizer.model_max_length
     chunks = []
     for p in all_passages:
         toks = tokenizer.tokenize(p)
@@ -86,20 +90,24 @@ def load_passages():
 # ── 4. Build or load FAISS ──
 def load_faiss_index(passages):
-    # sentence‐transformers embedder + cross‐encoder
     embedder = SentenceTransformer(EMBEDDER_MODEL)
     reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
     if os.path.exists(INDEX_PATH) and os.path.exists(EMB_PATH):
-        print("Loading FAISS index & embeddings from disk …")
-        index = faiss.read_index(INDEX_PATH)
         embeddings = np.load(EMB_PATH)
     else:
-        print("Encoding passages & building FAISS index …")
-        embeddings = embedder.encode(passages, show_progress_bar=True, convert_to_numpy=True, batch_size=32)
         embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
-        dim = embeddings.shape[1]
         index = faiss.IndexFlatIP(dim)
         index.add(embeddings)
@@ -108,9 +116,8 @@ def load_faiss_index(passages):
     return embedder, reranker, index
-# ── 5. Set up RAG pipeline ──
 def setup_rag():
-    # 5.1 load or build index + embedder/reranker
     if os.path.exists(PCTX_PATH):
         with open(PCTX_PATH, "rb") as f:
             passages = pickle.load(f)
@@ -119,8 +126,7 @@ def setup_rag():
     embedder, reranker, index = load_faiss_index(passages)
-    # 5.2 load generator model & HF pipeline
-    tok = AutoTokenizer.from_pretrained(MODEL_NAME)
     model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
     qa_pipe = hf_pipeline(
         "text2text-generation",
@@ -129,28 +135,28 @@ def setup_rag():
         device=DEVICE,
         truncation=True,
         max_length=512,
-        num_beams=4,        # optional: enable beam search
         early_stopping=True
     )
     return passages, embedder, reranker, index, qa_pipe
-# ── 6. Retrieval + Generation ──
 def retrieve(question, passages, embedder, index, k=20, rerank_k=5):
-    q_emb = embedder.encode([question], convert_to_numpy=True)
     distances, idxs = index.search(q_emb, k)
-    cands = [passages[i] for i in idxs[0]]
     scores = reranker.predict([[question, c] for c in cands])
-    top = np.argsort(scores)[-rerank_k:][::-1]
-    final_ctxs = [cands[i] for i in top]
-    final_dists = [distances[0][i] for i in top]
-    return final_ctxs, final_dists
 def generate(question, contexts, qa_pipe):
-    lines = [ f"Context {i+1}: {s}"
-              for i,s in enumerate(make_context_snippets(contexts)) ]
     prompt = (
         "You are a helpful assistant. Use ONLY the following contexts to answer. "
         "If the answer is not contained, say 'Sorry, I don't know.'\n\n"
@@ -160,20 +166,18 @@ def generate(question, contexts, qa_pipe):
     return qa_pipe(prompt)[0]["generated_text"].strip()
 def retrieve_and_answer(question, passages, embedder, reranker, index, qa_pipe):
-    ctxs, dists = retrieve(question, passages, embedder, index)
-    if not ctxs or dists[0] > DIST_THRESHOLD:
         return "Sorry, I don't know.", []
-    ans = generate(question, ctxs, qa_pipe)
-    return ans, ctxs
-def answer_and_contexts(question,
-                        passages, embedder, reranker, index, qa_pipe):
     ans, ctxs = retrieve_and_answer(question, passages, embedder, reranker, index, qa_pipe)
     if not ctxs:
         return ans, ""
     snippets = [
-        f"Context {i+1}: {s}"
-        for i,s in enumerate(make_context_snippets(ctxs))
     ]
     return ans, "\n\n---\n\n".join(snippets)
@@ -191,7 +195,8 @@ def main():
             "When was Abraham Lincoln inaugurated?",
             "What is the capital of France?",
             "Who wrote '1984'?"
-        ]
     )
     demo.launch()

 MAX_CTX_WORDS  = int(os.getenv("MAX_CTX_WORDS", 200))
 DEVICE = 0 if torch.cuda.is_available() else -1
 os.makedirs(DATA_DIR, exist_ok=True)
+print(f"MODEL={MODEL_NAME}, EMBEDDER={EMBEDDER_MODEL}, DEVICE={'GPU' if DEVICE==0 else 'CPU'}")
 # ── 2. Helpers ──
 def make_context_snippets(contexts, max_words=MAX_CTX_WORDS):
+    snippets = []
     for c in contexts:
         words = c.split()
         if len(words) > max_words:
             c = " ".join(words[:max_words]) + " ... [truncated]"
+        snippets.append(c)
+    return snippets
 def chunk_text(text, max_tokens, stride=None):
     words = text.split()
 # ── 3. Load & preprocess passages ──
 def load_passages():
     # 3.1 load raw corpora
+    wiki_ds   = load_dataset("rag-datasets/rag-mini-wikipedia", "text-corpus", split="passages")
+    squad_ds  = load_dataset("rajpurkar/squad_v2", split="train[:100]")
     trivia_ds = load_dataset("mandarjoshi/trivia_qa", "rc", split="validation[:100]")
+    wiki_passages   = wiki_ds["passage"]
+    squad_passages  = [ex["context"] for ex in squad_ds]
+    trivia_passages = []
     for ex in trivia_ds:
         for fld in ("wiki_context", "search_context"):
             txt = ex.get(fld) or ""
+            if txt:
+                trivia_passages.append(txt)
+    # dedupe
+    all_passages = list(dict.fromkeys(wiki_passages + squad_passages + trivia_passages))
+    # chunk long passages
+    tokenizer  = AutoTokenizer.from_pretrained(MODEL_NAME)
+    max_tokens = tokenizer.model_max_length
     chunks = []
     for p in all_passages:
         toks = tokenizer.tokenize(p)
 # ── 4. Build or load FAISS ──
 def load_faiss_index(passages):
     embedder = SentenceTransformer(EMBEDDER_MODEL)
     reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
     if os.path.exists(INDEX_PATH) and os.path.exists(EMB_PATH):
+        print("Loading FAISS index & embeddings…")
+        index      = faiss.read_index(INDEX_PATH)
         embeddings = np.load(EMB_PATH)
     else:
+        print("Encoding passages & building FAISS index…")
+        embeddings = embedder.encode(
+            passages,
+            show_progress_bar=True,
+            convert_to_numpy=True,
+            batch_size=32
+        )
         embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
+        dim   = embeddings.shape[1]
         index = faiss.IndexFlatIP(dim)
         index.add(embeddings)
     return embedder, reranker, index
+# ── 5. Initialize RAG components ──
 def setup_rag():
     if os.path.exists(PCTX_PATH):
         with open(PCTX_PATH, "rb") as f:
             passages = pickle.load(f)
     embedder, reranker, index = load_faiss_index(passages)
+    tok   = AutoTokenizer.from_pretrained(MODEL_NAME)
     model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
     qa_pipe = hf_pipeline(
         "text2text-generation",
         device=DEVICE,
         truncation=True,
         max_length=512,
+        num_beams=4,
         early_stopping=True
     )
     return passages, embedder, reranker, index, qa_pipe
+# ── 6. Retrieval & generation ──
 def retrieve(question, passages, embedder, index, k=20, rerank_k=5):
+    q_emb      = embedder.encode([question], convert_to_numpy=True)
     distances, idxs = index.search(q_emb, k)
+    cands  = [passages[i] for i in idxs[0]]
     scores = reranker.predict([[question, c] for c in cands])
+    top    = np.argsort(scores)[-rerank_k:][::-1]
+    return [cands[i] for i in top], [distances[0][i] for i in top]
 def generate(question, contexts, qa_pipe):
+    lines = [
+        f"Context {i+1}: {s}"
+        for i, s in enumerate(make_context_snippets(contexts))
+    ]
     prompt = (
         "You are a helpful assistant. Use ONLY the following contexts to answer. "
         "If the answer is not contained, say 'Sorry, I don't know.'\n\n"
     return qa_pipe(prompt)[0]["generated_text"].strip()
 def retrieve_and_answer(question, passages, embedder, reranker, index, qa_pipe):
+    contexts, dists = retrieve(question, passages, embedder, index)
+    if not contexts or dists[0] > DIST_THRESHOLD:
         return "Sorry, I don't know.", []
+    return generate(question, contexts, qa_pipe), contexts
+def answer_and_contexts(question, passages, embedder, reranker, index, qa_pipe):
     ans, ctxs = retrieve_and_answer(question, passages, embedder, reranker, index, qa_pipe)
     if not ctxs:
         return ans, ""
     snippets = [
+        f"Context {i+1}: {s}"
+        for i, s in enumerate(make_context_snippets(ctxs))
     ]
     return ans, "\n\n---\n\n".join(snippets)
             "When was Abraham Lincoln inaugurated?",
             "What is the capital of France?",
             "Who wrote '1984'?"
+        ],
+        allow_flagging="never",
     )
     demo.launch()