Spaces:
Runtime error
Runtime error
Rohit Rajpoot
commited on
Commit
·
7b06e2c
1
Parent(s):
cebfc21
Add RAG over training.txt for DeepSeek
Browse files- app.py +52 -25
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -7,6 +7,9 @@ from assist.transformer_demo import transformer_next
|
|
| 7 |
|
| 8 |
# DeepSeek imports
|
| 9 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
st.set_page_config(page_title="RepoSage All-in-One Demo", layout="wide")
|
| 12 |
st.title("🤖 RepoSage Unified Demo")
|
|
@@ -21,46 +24,70 @@ def load_deepseek():
|
|
| 21 |
|
| 22 |
deepseek_gen = load_deepseek()
|
| 23 |
|
| 24 |
-
#
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
-
#
|
| 28 |
-
|
|
|
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
| 34 |
|
|
|
|
| 35 |
with col1:
|
| 36 |
if st.button("DeepSeek-R1 Math Demo"):
|
| 37 |
-
if not
|
| 38 |
st.warning("Please enter a prompt first.")
|
| 39 |
else:
|
| 40 |
-
|
| 41 |
-
prompt = f"{math_prefix}\n\nf(x) = {question}\n\nSolution:\n"
|
| 42 |
-
# 2) Call the model deterministically
|
| 43 |
with st.spinner("Working it out…"):
|
| 44 |
-
out = deepseek_gen(
|
| 45 |
-
prompt,
|
| 46 |
-
max_new_tokens=80,
|
| 47 |
-
do_sample=False, # no random sampling
|
| 48 |
-
temperature=0.0 # fully deterministic
|
| 49 |
-
)
|
| 50 |
-
# 3) Display the clean, step-by-step answer
|
| 51 |
st.code(out[0]["generated_text"], language="text")
|
| 52 |
|
|
|
|
| 53 |
with col2:
|
| 54 |
-
if st.button("
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
|
|
|
| 57 |
with col3:
|
| 58 |
-
if st.button("
|
| 59 |
-
st.write(
|
| 60 |
|
|
|
|
| 61 |
with col4:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
if st.button("Transformer Demo"):
|
| 63 |
-
st.write(transformer_next(
|
| 64 |
|
| 65 |
st.markdown("---")
|
| 66 |
-
st.caption("DeepSeek-R1, Embedding, Bayesian & Transformer demos all in one place ✅")
|
|
|
|
| 7 |
|
| 8 |
# DeepSeek imports
|
| 9 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline
|
| 10 |
+
# Retrieval imports
|
| 11 |
+
from sentence_transformers import SentenceTransformer
|
| 12 |
+
import torch
|
| 13 |
|
| 14 |
st.set_page_config(page_title="RepoSage All-in-One Demo", layout="wide")
|
| 15 |
st.title("🤖 RepoSage Unified Demo")
|
|
|
|
| 24 |
|
| 25 |
deepseek_gen = load_deepseek()
|
| 26 |
|
| 27 |
+
# Cache and load training corpus passages
|
| 28 |
+
@st.cache_data
|
| 29 |
+
def load_passages(path="training.txt"):
|
| 30 |
+
text = open(path, encoding="utf8").read()
|
| 31 |
+
paras = [p.strip() for p in text.split("\n\n") if p.strip()]
|
| 32 |
+
return paras
|
| 33 |
+
|
| 34 |
+
# Cache and embed passages
|
| 35 |
+
@st.cache_resource
|
| 36 |
+
def embed_passages(passages):
|
| 37 |
+
encoder = SentenceTransformer("all-MiniLM-L6-v2")
|
| 38 |
+
embeddings = encoder.encode(passages, convert_to_tensor=True)
|
| 39 |
+
return encoder, passages, embeddings
|
| 40 |
|
| 41 |
+
# Prepare RAG resources
|
| 42 |
+
_passages = load_passages()
|
| 43 |
+
_encoder, passages, passage_embs = embed_passages(_passages)
|
| 44 |
|
| 45 |
+
# User input
|
| 46 |
+
title = st.text_input("Enter your question or prompt below:")
|
| 47 |
+
|
| 48 |
+
# Define columns for five demos
|
| 49 |
+
col1, col2, col3, col4, col5 = st.columns(5)
|
| 50 |
|
| 51 |
+
# Math demo in col1
|
| 52 |
with col1:
|
| 53 |
if st.button("DeepSeek-R1 Math Demo"):
|
| 54 |
+
if not title.strip():
|
| 55 |
st.warning("Please enter a prompt first.")
|
| 56 |
else:
|
| 57 |
+
prompt = f"You are an expert math tutor. Compute the derivative of f(x) = {title} step by step using the product rule. Solution:\n"
|
|
|
|
|
|
|
| 58 |
with st.spinner("Working it out…"):
|
| 59 |
+
out = deepseek_gen(prompt, max_new_tokens=80, do_sample=False, temperature=0.0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
st.code(out[0]["generated_text"], language="text")
|
| 61 |
|
| 62 |
+
# RAG-augmented demo in col2
|
| 63 |
with col2:
|
| 64 |
+
if st.button("DeepSeek-R1 RAG Demo"):
|
| 65 |
+
if not title.strip():
|
| 66 |
+
st.warning("Please enter a question first.")
|
| 67 |
+
else:
|
| 68 |
+
q_emb = _encoder.encode(title, convert_to_tensor=True)
|
| 69 |
+
sims = torch.nn.functional.cosine_similarity(q_emb.unsqueeze(0), passage_embs)
|
| 70 |
+
topk = torch.topk(sims, k=min(3, len(passages))).indices.tolist()
|
| 71 |
+
context = "\n\n".join(passages[i] for i in topk)
|
| 72 |
+
prompt = f"Use these notes to answer the question:\n\n{context}\n\nQ: {title}\nA:"
|
| 73 |
+
with st.spinner("Retrieving & generating…"):
|
| 74 |
+
out = deepseek_gen(prompt, max_new_tokens=100, do_sample=False)
|
| 75 |
+
st.write(out[0]["generated_text"])
|
| 76 |
|
| 77 |
+
# Embedding Q&A in col3
|
| 78 |
with col3:
|
| 79 |
+
if st.button("Embedding Q&A"):
|
| 80 |
+
st.write(embed_chat(title))
|
| 81 |
|
| 82 |
+
# Bayesian Q&A in col4
|
| 83 |
with col4:
|
| 84 |
+
if st.button("Bayesian Q&A"):
|
| 85 |
+
st.write(bayes_chat(title))
|
| 86 |
+
|
| 87 |
+
# Transformer Demo in col5
|
| 88 |
+
with col5:
|
| 89 |
if st.button("Transformer Demo"):
|
| 90 |
+
st.write(transformer_next(title))
|
| 91 |
|
| 92 |
st.markdown("---")
|
| 93 |
+
st.caption("DeepSeek-R1 Math, RAG, Embedding, Bayesian & Transformer demos all in one place ✅")
|
requirements.txt
CHANGED
|
@@ -5,4 +5,5 @@ streamlit==1.46.0
|
|
| 5 |
typer==0.16.0
|
| 6 |
rich==14.0.0
|
| 7 |
torch==2.7.1
|
| 8 |
-
transformers
|
|
|
|
|
|
| 5 |
typer==0.16.0
|
| 6 |
rich==14.0.0
|
| 7 |
torch==2.7.1
|
| 8 |
+
transformers
|
| 9 |
+
sentence-transformers
|