Spaces:

UKURIKIYEYEZU
/

RRA_Chatbot

Sleeping

App Files Files Community

UKURIKIYEYEZU commited on Jan 30

Commit

c353a73

verified ·

1 Parent(s): 2e609d0

Update app.py

Browse files

Files changed (1) hide show

app.py +255 -48

app.py CHANGED Viewed

@@ -1,64 +1,271 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 """
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
 )
 if __name__ == "__main__":
-    demo.launch()

+import os
+from langchain_groq import ChatGroq
+from langchain.prompts import ChatPromptTemplate, PromptTemplate
+from langchain.output_parsers import ResponseSchema, StructuredOutputParser
+from urllib.parse import urljoin, urlparse
+import requests
+from io import BytesIO
+from langchain_chroma import Chroma
+import requests
+from bs4 import BeautifulSoup
+from langchain_core.prompts import ChatPromptTemplate
 import gradio as gr
+from PyPDF2 import PdfReader
+from langchain_huggingface import HuggingFaceEmbeddings
+groq_api_key= os.environ.get('ACCESS')
+embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
+def scrape_websites(base_urls):
+    try:
+        visited_links = set()  # To avoid revisiting the same link
+        content_by_url = {}  # Store content from each URL
+        for base_url in base_urls:
+            if not base_url.strip():
+                continue  # Skip empty or invalid URLs
+            print(f"Scraping base URL: {base_url}")
+            html_content = fetch_page_content(base_url)
+            if html_content:
+                cleaned_content = clean_body_content(html_content)
+                content_by_url[base_url] = cleaned_content
+                visited_links.add(base_url)
+                # Extract and process all internal links
+                soup = BeautifulSoup(html_content, "html.parser")
+                links = extract_internal_links(base_url, soup)
+                for link in links:
+                    if link not in visited_links:
+                        print(f"Scraping link: {link}")
+                        page_content = fetch_page_content(link)
+                        if page_content:
+                            cleaned_content = clean_body_content(page_content)
+                            content_by_url[link] = cleaned_content
+                            visited_links.add(link)
+                        # If the link is a PDF file, extract its content
+                        if link.lower().endswith('.pdf'):
+                            print(f"Extracting PDF content from: {link}")
+                            pdf_content = extract_pdf_text(link)
+                            if pdf_content:
+                                content_by_url[link] = pdf_content
+        return content_by_url
+    except Exception as e:
+        print(f"Error during scraping: {e}")
+        return {}
+def fetch_page_content(url):
+    try:
+        response = requests.get(url, timeout=10)
+        response.raise_for_status()
+        return response.text
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching {url}: {e}")
+        return None
+def extract_internal_links(base_url, soup):
+    links = set()
+    for anchor in soup.find_all("a", href=True):
+        href = anchor["href"]
+        full_url = urljoin(base_url, href)
+        if is_internal_link(base_url, full_url):
+            links.add(full_url)
+    return links
+def is_internal_link(base_url, link_url):
+    base_netloc = urlparse(base_url).netloc
+    link_netloc = urlparse(link_url).netloc
+    return base_netloc == link_netloc
+def extract_pdf_text(pdf_url):
+    try:
+        response = requests.get(pdf_url)
+        response.raise_for_status()
+        with BytesIO(response.content) as file:
+            reader = PdfReader(file)
+            pdf_text = ""
+            for page in reader.pages:
+                pdf_text += page.extract_text()
+        return pdf_text if pdf_text else None
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching PDF {pdf_url}: {e}")
+        return None
+    except Exception as e:
+        print(f"Error reading PDF {pdf_url}: {e}")
+        return None
+def clean_body_content(html_content):
+    soup = BeautifulSoup(html_content, "html.parser")
+    for script_or_style in soup(["script", "style"]):
+        script_or_style.extract()
+    cleaned_content = soup.get_text(separator="\n")
+    cleaned_content = "\n".join(
+        line.strip() for line in cleaned_content.splitlines() if line.strip()
+    )
+    return cleaned_content
+if __name__ == "__main__":
+    website = ["https://www.rra.gov.rw/en/publications",
+               "https://www.rra.gov.rw/en/customs-services"
+               ]
+    all_content = scrape_websites(website)
+    temp_list = []
+    for url, content in all_content.items():
+        temp_list.append((url, content))
+processed_texts = []
+for element in temp_list:
+    if isinstance(element, tuple):
+        url, content = element
+        processed_texts.append(f"url: {url}, content: {content}")
+    elif isinstance(element, str):
+        processed_texts.append(element)
+    else:
+        processed_texts.append(str(element))
+def chunk_string(s, chunk_size=1000):
+    return [s[i:i+chunk_size] for i in range(0, len(s), chunk_size)]
+chunked_texts = []
+for text in processed_texts:
+  chunked_texts.extend(chunk_string(text))
+vectorstore = Chroma(
+    collection_name="RRA",
+    embedding_function=embed_model,
+    persist_directory="./",
+)
+vectorstore.get().keys()
+vectorstore.add_texts(chunked_texts)
+template = ("""
+    You are a friendly and intelligent chatbot designed to assist users in a conversational and human-like manner. Your goal is to provide accurate, helpful, and engaging responses from the provided context: {context} while maintaining a natural tone. Follow these guidelines:
+    1. **Greetings:** If the user greets you (e.g., "Morning," "Hello," "Hi"), respond warmly and acknowledge the greeting. For example:
+       - "😊 Good morning! How can I assist you today?"
+       - "Hello! What can I do for you? 🚀"
+    2. **Extract Information:** If the user asks for specific information, extract only the relevant details from the provided context: {context}.
+    3. **Human-like Interaction:** Respond in a warm, conversational tone. Use emojis occasionally to make the interaction more engaging (e.g., 😊, 🚀).
+    4. **Stay Updated:** Acknowledge the current date and time to show you are aware of real-time updates.
+    5. **No Extra Content:** If no information matches the user's request, respond politely: "I don't have that information at the moment, but I'm happy to help with something else! 😊"
+    6. **Personalized Interaction:** Use the user's historical interactions (if available) to tailor your responses and make the conversation more personalized.
+    7. **Direct Data Only:** If the user requests specific data, provide only the requested information without additional explanations unless asked.
+    Context: {context}
+    User's Question: {question}
+    Your Response:
+""")
+rag_prompt = PromptTemplate.from_template(template)
+retriever = vectorstore.as_retriever()
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+llm = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_api_key )
+rag_chain = (
+    {"context": retriever, "question": RunnablePassthrough()}
+    | rag_prompt
+    | llm
+    | StrOutputParser()
+)
+# Define the RAG memory stream function
+def rag_memory_stream(message, history):
+    partial_text = ""
+    for new_text in rag_chain.stream(message):  # Replace with actual streaming logic
+        partial_text += new_text
+        yield partial_text
+# Title with emojis
+title = "RRA Chatbot"
+# Short description for the examples section
+examples = [
+    " What is TIN deregistration? What about Tax account deactivation?",
+    "What is "permanent establishment"?",
+    "when do I receive my registration certificate?"
+]
+# Custom CSS for styling the interface
+custom_css = """
+body {
+    font-family: "Arial", serif;
+}
+.gradio-container {
+    font-family: "Times New Roman", serif;
+}
+.gr-button {
+    background-color: #007bff; /* Blue button */
+    color: white;
+    border: none;
+    border-radius: 5px;
+    font-size: 16px;
+    padding: 10px 20px;
+    cursor: pointer;
+}
+.gr-textbox:focus, .gr-button:focus {
+    outline: none; /* Remove outline focus for a cleaner look */
+}
+/* Custom CSS for the examples section */
+.gr-examples {
+    font-size: 30px; /* Increase font size of examples */
+    background-color: #f9f9f9; /* Light background color */
+    border-radius: 30px; /* Rounded corners */
+}
+.gr-examples .example {
+    background-color: white; /* White background for each example */
+    cursor: pointer; /* Change cursor to pointer on hover */
+    transition: background-color 0.3s ease; /* Smooth hover effect */
+}
+.gr-examples .example:hover {
+    background-color: #f1f1f1; /* Light gray background on hover */
+}
 """
+# Create the Chat Interface
 demo = gr.ChatInterface(
+    fn=rag_memory_stream,
+    title=title,
+    examples=examples,  # Display the short description and example questions
+    fill_height=True,
+    theme="soft",
+    css=custom_css,  # Apply the custom CSS
 )
+# Launch the app
 if __name__ == "__main__":
+    demo.launch(share=True, inbrowser=True, height=800, debug=True, width="100%")