Spaces:

shukdevdatta123
/

Multi-modal-o1-Chatbot

Running

App Files Files Community

shukdevdatta123 commited on Mar 12

Commit

bb3cead

verified ·

1 Parent(s): 380b344

Update abc3.txt

Browse files

Files changed (1) hide show

abc3.txt +115 -26

abc3.txt CHANGED Viewed

@@ -22,6 +22,41 @@ def extract_text_from_pdf(pdf_file):
     except Exception as e:
         return f"Error extracting text from PDF: {str(e)}"
 # Function to send the request to OpenAI API with an image, text or PDF input
 def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_effort="medium", model_choice="o1"):
     if not openai_api_key:
@@ -50,11 +85,11 @@ def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_
             ]
         else:
             messages = [
-                {"role": "user", "content": [{"type": "text", "text": input_content}]}
             ]
     elif model_choice == "o3-mini":
         messages = [
-            {"role": "user", "content": [{"type": "text", "text": input_content}]}
         ]
     try:
@@ -62,11 +97,10 @@ def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_
         response = openai.ChatCompletion.create(
             model=model_choice,
             messages=messages,
-            reasoning_effort=reasoning_effort,
             max_completion_tokens=2000
         )
-        return response["choices"][0]["message"]["content"]
     except Exception as e:
         return f"Error calling OpenAI API: {str(e)}"
@@ -97,12 +131,15 @@ def transcribe_audio(audio, openai_api_key):
         # Transcribe the audio to text using OpenAI's whisper model
         audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
-        return audio_file_transcription['text']
     except Exception as e:
         return f"Error transcribing audio: {str(e)}"
 # The function that will be used by Gradio interface
-def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort, model_choice, pdf_content, history=[]):
     # If there's audio, transcribe it to text
     if audio:
         input_text = transcribe_audio(audio, openai_api_key)
@@ -112,14 +149,27 @@ def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort
     if pdf_file is not None:
         new_pdf_content = extract_text_from_pdf(pdf_file)
-    # Generate the response
-    response = generate_response(input_text, image, new_pdf_content, openai_api_key, reasoning_effort, model_choice)
-    # Append the response to the history
-    if input_text:
-        history.append((f"User: {input_text}", f"Assistant: {response}"))
     else:
-        history.append((f"User: [Uploaded content]", f"Assistant: {response}"))
     return "", None, None, None, new_pdf_content, history
@@ -136,13 +186,15 @@ def process_pdf(pdf_file):
 # Function to update visible components based on input type selection
 def update_input_type(choice):
     if choice == "Text":
-        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
     elif choice == "Image":
-        return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
     elif choice == "Voice":
-        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
     elif choice == "PDF":
-        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
 # Custom CSS styles with animations and button colors
 custom_css = """
@@ -180,7 +232,7 @@ custom_css = """
         animation: fadeIn 2s ease-out;
     }
     /* Input field styles */
-    .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file {
         border-radius: 8px;
         border: 2px solid #ccc;
         padding: 10px;
@@ -189,7 +241,7 @@ custom_css = """
         font-size: 1rem;
         transition: all 0.3s ease;
     }
-    .gradio-textbox:focus, .gradio-dropdown:focus, .gradio-image:focus, .gradio-audio:focus, .gradio-file:focus {
         border-color: #007bff;
     }
     /* Button styles */
@@ -299,7 +351,7 @@ custom_css = """
         .gradio-chatbot {
             max-height: 400px;
         }
-        .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file {
             width: 100%;
         }
         #submit-btn, #clear-history {
@@ -314,7 +366,7 @@ def create_interface():
     with gr.Blocks(css=custom_css) as demo:
         gr.Markdown("""
             <div class="gradio-header">
-                <h1>Multimodal Chatbot (Text + Image + Voice + PDF)</h1>
                 <h3>Interact with a chatbot using text, image, voice, or PDF inputs</h3>
             </div>
         """)
@@ -323,11 +375,12 @@ def create_interface():
         with gr.Accordion("Click to expand for details", open=False):
             gr.Markdown("""
             ### Description:
-            This is a multimodal chatbot that can handle text, image, voice, and PDF inputs.
             - You can ask questions or provide text, and the assistant will respond.
             - You can upload an image, and the assistant will process it and answer questions about the image.
             - Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
             - PDF support: Upload a PDF and ask questions about its content.
             - Enter your OpenAI API key to start interacting with the model.
             - You can use the 'Clear History' button to remove the conversation history.
             - "o1" is for image, voice, PDF and text chat and "o3-mini" is for text, PDF and voice chat only.
@@ -347,7 +400,7 @@ def create_interface():
         # Input type selector
         with gr.Row():
             input_type = gr.Radio(
-                ["Text", "Image", "Voice", "PDF"],
                 label="Choose Input Type",
                 value="Text"
             )
@@ -382,6 +435,23 @@ def create_interface():
                 file_types=[".pdf"],
                 visible=False
             )
         with gr.Row():
             reasoning_effort = gr.Dropdown(
@@ -403,7 +473,7 @@ def create_interface():
         input_type.change(
             fn=update_input_type,
             inputs=[input_type],
-            outputs=[input_text, image_input, audio_input, pdf_input]
         )
         # Process PDF when uploaded
@@ -416,8 +486,27 @@ def create_interface():
         # Button interactions
         submit_btn.click(
             fn=chatbot,
-            inputs=[input_text, image_input, audio_input, pdf_input, openai_api_key, reasoning_effort, model_choice, pdf_content],
-            outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history]
         )
         clear_btn.click(

     except Exception as e:
         return f"Error extracting text from PDF: {str(e)}"
+# Function to generate MCQ quiz from PDF content
+def generate_mcq_quiz(pdf_content, num_questions, openai_api_key, model_choice):
+    if not openai_api_key:
+        return "Error: No API key provided."
+    openai.api_key = openai_api_key
+    # Limit content length to avoid token limits
+    limited_content = pdf_content[:8000] if len(pdf_content) > 8000 else pdf_content
+    prompt = f"""Based on the following document content, generate {num_questions} multiple-choice quiz questions.
+For each question:
+1. Create a clear question based on key concepts in the document
+2. Provide 4 possible answers (A, B, C, D)
+3. Indicate the correct answer
+4. Briefly explain why the answer is correct
+Format the output clearly with each question numbered and separated.
+Document content:
+{limited_content}
+"""
+    try:
+        messages = [
+            {"role": "user", "content": prompt}
+        ]
+        response = openai.ChatCompletion.create(
+            model=model_choice,
+            messages=messages
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"Error generating quiz: {str(e)}"
 # Function to send the request to OpenAI API with an image, text or PDF input
 def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_effort="medium", model_choice="o1"):
     if not openai_api_key:
             ]
         else:
             messages = [
+                {"role": "user", "content": input_content}
             ]
     elif model_choice == "o3-mini":
         messages = [
+            {"role": "user", "content": input_content}
         ]
     try:
         response = openai.ChatCompletion.create(
             model=model_choice,
             messages=messages,
             max_completion_tokens=2000
         )
+        return response.choices[0].message.content
     except Exception as e:
         return f"Error calling OpenAI API: {str(e)}"
         # Transcribe the audio to text using OpenAI's whisper model
         audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
+        return audio_file_transcription.text
     except Exception as e:
         return f"Error transcribing audio: {str(e)}"
 # The function that will be used by Gradio interface
+def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort, model_choice, pdf_content, num_quiz_questions, pdf_quiz_mode, history):
+    if history is None:
+        history = []
     # If there's audio, transcribe it to text
     if audio:
         input_text = transcribe_audio(audio, openai_api_key)
     if pdf_file is not None:
         new_pdf_content = extract_text_from_pdf(pdf_file)
+    # Check if we're in PDF quiz mode
+    if pdf_quiz_mode:
+        if new_pdf_content:
+            # Generate MCQ quiz questions
+            quiz_response = generate_mcq_quiz(new_pdf_content, int(num_quiz_questions), openai_api_key, model_choice)
+            history.append((f"User: [Uploaded PDF for Quiz - {int(num_quiz_questions)} questions]", f"Assistant: {quiz_response}"))
+        else:
+            history.append(("User: [Attempted to generate quiz without PDF]", "Assistant: Please upload a PDF file to generate quiz questions."))
     else:
+        # Regular chat mode - generate the response
+        response = generate_response(input_text, image, new_pdf_content, openai_api_key, reasoning_effort, model_choice)
+        # Append the response to the history
+        if input_text:
+            history.append((f"User: {input_text}", f"Assistant: {response}"))
+        elif image is not None:
+            history.append((f"User: [Uploaded image]", f"Assistant: {response}"))
+        elif pdf_file is not None:
+            history.append((f"User: [Uploaded PDF]", f"Assistant: {response}"))
+        else:
+            history.append((f"User: [No input provided]", f"Assistant: Please provide some input (text, image, or PDF) for me to respond to."))
     return "", None, None, None, new_pdf_content, history
 # Function to update visible components based on input type selection
 def update_input_type(choice):
     if choice == "Text":
+        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
     elif choice == "Image":
+        return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
     elif choice == "Voice":
+        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
     elif choice == "PDF":
+        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value=False)
+    elif choice == "PDF(QUIZ)":
+        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(value=True)
 # Custom CSS styles with animations and button colors
 custom_css = """
         animation: fadeIn 2s ease-out;
     }
     /* Input field styles */
+    .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file, .gradio-slider {
         border-radius: 8px;
         border: 2px solid #ccc;
         padding: 10px;
         font-size: 1rem;
         transition: all 0.3s ease;
     }
+    .gradio-textbox:focus, .gradio-dropdown:focus, .gradio-image:focus, .gradio-audio:focus, .gradio-file:focus, .gradio-slider:focus {
         border-color: #007bff;
     }
     /* Button styles */
         .gradio-chatbot {
             max-height: 400px;
         }
+        .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file, .gradio-slider {
             width: 100%;
         }
         #submit-btn, #clear-history {
     with gr.Blocks(css=custom_css) as demo:
         gr.Markdown("""
             <div class="gradio-header">
+                <h1>Multimodal Chatbot (Text + Image + Voice + PDF + Quiz)</h1>
                 <h3>Interact with a chatbot using text, image, voice, or PDF inputs</h3>
             </div>
         """)
         with gr.Accordion("Click to expand for details", open=False):
             gr.Markdown("""
             ### Description:
+            This is a multimodal chatbot that can handle text, image, voice, PDF inputs, and generate quizzes from PDFs.
             - You can ask questions or provide text, and the assistant will respond.
             - You can upload an image, and the assistant will process it and answer questions about the image.
             - Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
             - PDF support: Upload a PDF and ask questions about its content.
+            - PDF Quiz: Upload a PDF and specify how many MCQ questions you want generated based on the content.
             - Enter your OpenAI API key to start interacting with the model.
             - You can use the 'Clear History' button to remove the conversation history.
             - "o1" is for image, voice, PDF and text chat and "o3-mini" is for text, PDF and voice chat only.
         # Input type selector
         with gr.Row():
             input_type = gr.Radio(
+                ["Text", "Image", "Voice", "PDF", "PDF(QUIZ)"],
                 label="Choose Input Type",
                 value="Text"
             )
                 file_types=[".pdf"],
                 visible=False
             )
+            # Quiz specific components
+            quiz_questions_slider = gr.Slider(
+                minimum=1,
+                maximum=20,
+                value=5,
+                step=1,
+                label="Number of Quiz Questions",
+                visible=False
+            )
+            # Hidden state for quiz mode
+            quiz_mode = gr.Checkbox(
+                label="Quiz Mode",
+                visible=False,
+                value=False
+            )
         with gr.Row():
             reasoning_effort = gr.Dropdown(
         input_type.change(
             fn=update_input_type,
             inputs=[input_type],
+            outputs=[input_text, image_input, audio_input, pdf_input, quiz_questions_slider, quiz_mode]
         )
         # Process PDF when uploaded
         # Button interactions
         submit_btn.click(
             fn=chatbot,
+            inputs=[
+                input_text,
+                image_input,
+                audio_input,
+                pdf_input,
+                openai_api_key,
+                reasoning_effort,
+                model_choice,
+                pdf_content,
+                quiz_questions_slider,
+                quiz_mode,
+                chat_history
+            ],
+            outputs=[
+                input_text,
+                image_input,
+                audio_input,
+                pdf_input,
+                pdf_content,
+                chat_history
+            ]
         )
         clear_btn.click(