Spaces:

shukdevdatta123
/

Multi-modal-o1-Chatbot

Running

App Files Files Community

shukdevdatta123 commited on Mar 11

Commit

d39c096

verified ·

1 Parent(s): 511c89a

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -20

app.py CHANGED Viewed

@@ -22,6 +22,41 @@ def extract_text_from_pdf(pdf_file):
     except Exception as e:
         return f"Error extracting text from PDF: {str(e)}"
 # Function to send the request to OpenAI API with an image, text or PDF input
 def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_effort="medium", model_choice="o1"):
     if not openai_api_key:
@@ -102,7 +137,7 @@ def transcribe_audio(audio, openai_api_key):
         return f"Error transcribing audio: {str(e)}"
 # The function that will be used by Gradio interface
-def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort, model_choice, pdf_content, history=[]):
     # If there's audio, transcribe it to text
     if audio:
         input_text = transcribe_audio(audio, openai_api_key)
@@ -112,14 +147,20 @@ def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort
     if pdf_file is not None:
         new_pdf_content = extract_text_from_pdf(pdf_file)
-    # Generate the response
-    response = generate_response(input_text, image, new_pdf_content, openai_api_key, reasoning_effort, model_choice)
-    # Append the response to the history
-    if input_text:
-        history.append((f"User: {input_text}", f"Assistant: {response}"))
     else:
-        history.append((f"User: [Uploaded content]", f"Assistant: {response}"))
     return "", None, None, None, new_pdf_content, history
@@ -136,13 +177,15 @@ def process_pdf(pdf_file):
 # Function to update visible components based on input type selection
 def update_input_type(choice):
     if choice == "Text":
-        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
     elif choice == "Image":
-        return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
     elif choice == "Voice":
-        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
     elif choice == "PDF":
-        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
 # Custom CSS styles with animations and button colors
 custom_css = """
@@ -180,7 +223,7 @@ custom_css = """
         animation: fadeIn 2s ease-out;
     }
     /* Input field styles */
-    .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file {
         border-radius: 8px;
         border: 2px solid #ccc;
         padding: 10px;
@@ -189,7 +232,7 @@ custom_css = """
         font-size: 1rem;
         transition: all 0.3s ease;
     }
-    .gradio-textbox:focus, .gradio-dropdown:focus, .gradio-image:focus, .gradio-audio:focus, .gradio-file:focus {
         border-color: #007bff;
     }
     /* Button styles */
@@ -299,7 +342,7 @@ custom_css = """
         .gradio-chatbot {
             max-height: 400px;
         }
-        .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file {
             width: 100%;
         }
         #submit-btn, #clear-history {
@@ -314,7 +357,7 @@ def create_interface():
     with gr.Blocks(css=custom_css) as demo:
         gr.Markdown("""
             <div class="gradio-header">
-                <h1>Multimodal Chatbot (Text + Image + Voice + PDF)</h1>
                 <h3>Interact with a chatbot using text, image, voice, or PDF inputs</h3>
             </div>
         """)
@@ -323,11 +366,12 @@ def create_interface():
         with gr.Accordion("Click to expand for details", open=False):
             gr.Markdown("""
             ### Description:
-            This is a multimodal chatbot that can handle text, image, voice, and PDF inputs.
             - You can ask questions or provide text, and the assistant will respond.
             - You can upload an image, and the assistant will process it and answer questions about the image.
             - Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
             - PDF support: Upload a PDF and ask questions about its content.
             - Enter your OpenAI API key to start interacting with the model.
             - You can use the 'Clear History' button to remove the conversation history.
             - "o1" is for image, voice, PDF and text chat and "o3-mini" is for text, PDF and voice chat only.
@@ -347,7 +391,7 @@ def create_interface():
         # Input type selector
         with gr.Row():
             input_type = gr.Radio(
-                ["Text", "Image", "Voice", "PDF"],
                 label="Choose Input Type",
                 value="Text"
             )
@@ -382,6 +426,23 @@ def create_interface():
                 file_types=[".pdf"],
                 visible=False
             )
         with gr.Row():
             reasoning_effort = gr.Dropdown(
@@ -403,7 +464,7 @@ def create_interface():
         input_type.change(
             fn=update_input_type,
             inputs=[input_type],
-            outputs=[input_text, image_input, audio_input, pdf_input]
         )
         # Process PDF when uploaded
@@ -413,10 +474,20 @@ def create_interface():
             outputs=[pdf_content]
         )
         # Button interactions
         submit_btn.click(
             fn=chatbot,
-            inputs=[input_text, image_input, audio_input, pdf_input, openai_api_key, reasoning_effort, model_choice, pdf_content],
             outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history]
         )

     except Exception as e:
         return f"Error extracting text from PDF: {str(e)}"
+# Function to generate MCQ quiz from PDF content
+def generate_mcq_quiz(pdf_content, num_questions, openai_api_key, model_choice):
+    if not openai_api_key:
+        return "Error: No API key provided."
+    openai.api_key = openai_api_key
+    prompt = f"""Based on the following document content, generate {num_questions} multiple-choice quiz questions.
+For each question:
+1. Create a clear question based on key concepts in the document
+2. Provide 4 possible answers (A, B, C, D)
+3. Indicate the correct answer
+4. Briefly explain why the answer is correct
+Format the output clearly with each question numbered and separated.
+Document content:
+{pdf_content[:8000]}  # Limiting content to avoid token limits
+"""
+    try:
+        messages = [
+            {"role": "user", "content": [{"type": "text", "text": prompt}]}
+        ]
+        response = openai.ChatCompletion.create(
+            model=model_choice,
+            messages=messages,
+            max_completion_tokens=2000
+        )
+        return response["choices"][0]["message"]["content"]
+    except Exception as e:
+        return f"Error generating quiz: {str(e)}"
 # Function to send the request to OpenAI API with an image, text or PDF input
 def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_effort="medium", model_choice="o1"):
     if not openai_api_key:
         return f"Error transcribing audio: {str(e)}"
 # The function that will be used by Gradio interface
+def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort, model_choice, pdf_content, num_quiz_questions, pdf_quiz_mode, history=[]):
     # If there's audio, transcribe it to text
     if audio:
         input_text = transcribe_audio(audio, openai_api_key)
     if pdf_file is not None:
         new_pdf_content = extract_text_from_pdf(pdf_file)
+    # Check if we're in PDF quiz mode
+    if pdf_quiz_mode and new_pdf_content:
+        # Generate MCQ quiz questions
+        response = generate_mcq_quiz(new_pdf_content, num_quiz_questions, openai_api_key, model_choice)
+        history.append((f"User: [Uploaded PDF for Quiz - {num_quiz_questions} questions]", f"Assistant: {response}"))
     else:
+        # Regular chat mode - generate the response
+        response = generate_response(input_text, image, new_pdf_content, openai_api_key, reasoning_effort, model_choice)
+        # Append the response to the history
+        if input_text:
+            history.append((f"User: {input_text}", f"Assistant: {response}"))
+        else:
+            history.append((f"User: [Uploaded content]", f"Assistant: {response}"))
     return "", None, None, None, new_pdf_content, history
 # Function to update visible components based on input type selection
 def update_input_type(choice):
     if choice == "Text":
+        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
     elif choice == "Image":
+        return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
     elif choice == "Voice":
+        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
     elif choice == "PDF":
+        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
+    elif choice == "PDF(QUIZ)":
+        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
 # Custom CSS styles with animations and button colors
 custom_css = """
         animation: fadeIn 2s ease-out;
     }
     /* Input field styles */
+    .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file, .gradio-slider {
         border-radius: 8px;
         border: 2px solid #ccc;
         padding: 10px;
         font-size: 1rem;
         transition: all 0.3s ease;
     }
+    .gradio-textbox:focus, .gradio-dropdown:focus, .gradio-image:focus, .gradio-audio:focus, .gradio-file:focus, .gradio-slider:focus {
         border-color: #007bff;
     }
     /* Button styles */
         .gradio-chatbot {
             max-height: 400px;
         }
+        .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file, .gradio-slider {
             width: 100%;
         }
         #submit-btn, #clear-history {
     with gr.Blocks(css=custom_css) as demo:
         gr.Markdown("""
             <div class="gradio-header">
+                <h1>Multimodal Chatbot (Text + Image + Voice + PDF + Quiz)</h1>
                 <h3>Interact with a chatbot using text, image, voice, or PDF inputs</h3>
             </div>
         """)
         with gr.Accordion("Click to expand for details", open=False):
             gr.Markdown("""
             ### Description:
+            This is a multimodal chatbot that can handle text, image, voice, PDF inputs, and generate quizzes from PDFs.
             - You can ask questions or provide text, and the assistant will respond.
             - You can upload an image, and the assistant will process it and answer questions about the image.
             - Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
             - PDF support: Upload a PDF and ask questions about its content.
+            - PDF Quiz: Upload a PDF and specify how many MCQ questions you want generated based on the content.
             - Enter your OpenAI API key to start interacting with the model.
             - You can use the 'Clear History' button to remove the conversation history.
             - "o1" is for image, voice, PDF and text chat and "o3-mini" is for text, PDF and voice chat only.
         # Input type selector
         with gr.Row():
             input_type = gr.Radio(
+                ["Text", "Image", "Voice", "PDF", "PDF(QUIZ)"],
                 label="Choose Input Type",
                 value="Text"
             )
                 file_types=[".pdf"],
                 visible=False
             )
+            # Quiz specific components
+            quiz_questions_slider = gr.Slider(
+                minimum=1,
+                maximum=20,
+                value=5,
+                step=1,
+                label="Number of Quiz Questions",
+                visible=False
+            )
+            # Hidden state for quiz mode
+            quiz_mode = gr.Checkbox(
+                label="Quiz Mode",
+                visible=False,
+                value=False
+            )
         with gr.Row():
             reasoning_effort = gr.Dropdown(
         input_type.change(
             fn=update_input_type,
             inputs=[input_type],
+            outputs=[input_text, image_input, audio_input, pdf_input, quiz_questions_slider, quiz_mode]
         )
         # Process PDF when uploaded
             outputs=[pdf_content]
         )
+        # Update quiz mode when PDF(QUIZ) is selected
+        def update_quiz_mode(choice):
+            return True if choice == "PDF(QUIZ)" else False
+        input_type.change(
+            fn=update_quiz_mode,
+            inputs=[input_type],
+            outputs=[quiz_mode]
+        )
         # Button interactions
         submit_btn.click(
             fn=chatbot,
+            inputs=[input_text, image_input, audio_input, pdf_input, openai_api_key, reasoning_effort, model_choice, pdf_content, quiz_questions_slider, quiz_mode],
             outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history]
         )