File size: 4,739 Bytes
8e0d36a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# app.py
import os
import streamlit as st
from dotenv import load_dotenv
from langchain.docstore.document import Document
from langchain_community.retrievers import BM25Retriever
from langchain.tools import Tool
from langgraph.graph.message import add_messages
from langgraph.graph import START, StateGraph
from langgraph.prebuilt import ToolNode, tools_condition
from langchain_core.messages import AnyMessage, HumanMessage
from langchain_groq import ChatGroq
from typing import TypedDict, Annotated
import fitz  # PyMuPDF

# Load .env vars
load_dotenv()
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
groq_api_key = os.getenv("GROQ_API_KEY")

# --- PDF uploader and parser ---
def parse_pdfs(uploaded_files):
    pdf_docs = []
    for uploaded_file in uploaded_files:
        with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
            text = ""
            for page in doc:
                text += page.get_text()
            pdf_docs.append(Document(page_content=text, metadata={"source": uploaded_file.name}))
    return pdf_docs

# --- Guest info retrieval ---
def build_retriever(all_docs):
    return BM25Retriever.from_documents(all_docs)

def extract_text(query: str, retriever):
    results = retriever.invoke(query)
    if results:
        return "\n\n".join([doc.page_content for doc in results[:3]])
    else:
        return "لم يتم العثور على معلومات مطابقة في الملفات."


# --- Streamlit UI ---
st.set_page_config(page_title="NINU Agent", page_icon="🏛️")
st.title("🏛️ NINU - Guest & PDF Assistant")

st.markdown("** Hint:** NINU can help summarize lectures and quiz you step-by-step in simple English.")

# Initialize session state to hold conversation history
if "conversation_history" not in st.session_state:
    st.session_state.conversation_history = []



# User input area
query = st.text_area("📝 اكتب سؤالك أو كمل مذاكرتك هنا:")

uploaded_files = st.file_uploader("📄 ارفع ملفات PDF للمحاضرات", type=["pdf"], accept_multiple_files=True)

if st.button("Ask NINU") and query:
    # 1. Parse PDF
    user_docs = parse_pdfs(uploaded_files) if uploaded_files else []
    bm25_retriever = build_retriever(user_docs)

    # 2. Create Tool
    NINU_tool = Tool(
        name="NINU_Lec_retriever",
        func=lambda q: extract_text(q, bm25_retriever),
        description="Retrieves content from uploaded PDFs based on a query."
    )

    # 3. Create LLM with tools
    llm = ChatGroq(model="deepseek-r1-distill-llama-70b", groq_api_key=groq_api_key)
    tools = [NINU_tool]
    llm_with_tools = llm.bind_tools(tools)

    class AgentState(TypedDict):
        messages: Annotated[list[AnyMessage], add_messages]

    def assistant(state: AgentState):
        return {
            "messages": [llm_with_tools.invoke(state["messages"])]
        }

    # 4. Build Agent Graph
    builder = StateGraph(AgentState)
    builder.add_node("assistant", assistant)
    builder.add_node("tools", ToolNode(tools))
    builder.add_edge(START, "assistant")
    builder.add_conditional_edges("assistant", tools_condition)
    builder.add_edge("tools", "assistant")
    NINU = builder.compile()

    # 5. Prepare full conversation messages
    if len(st.session_state.conversation_history) == 0:
        # Add the custom prompt first
        intro_prompt = """

I uploaded a lecture PDF. I want you to study it with me step by step.



- Summarize the lecture part by part.

- Explain each part in very simple English like you're teaching a friend.

- After each part, ask me 2-3 MCQ questions in English.

- Wait for my answer before moving to the next part.

- If I answer incorrectly, explain why.



Let's begin! 💪

"""
        st.session_state.conversation_history.append(HumanMessage(content=intro_prompt))

    # Add the new user message
    st.session_state.conversation_history.append(HumanMessage(content=query))

    # 6. Invoke agent with full conversation
    response = NINU.invoke({"messages": st.session_state.conversation_history})

    # 7. Add assistant response to history
    assistant_reply = response["messages"][-1]
    st.session_state.conversation_history.append(assistant_reply)

    # 8. Show output
    st.markdown("###  NINU's Response:")
    st.write(assistant_reply.content)

    # 9. Show full conversation history (optional)
    with st.expander("🧾 Show full conversation history"):
        for msg in st.session_state.conversation_history:
            role = " You" if msg.type == "human" else " NINU"
            st.markdown(f"**{role}:** {msg.content}")