import gradio as gr import pandas as pd from transformers import pipeline from bs4 import BeautifulSoup import requests from PyPDF2 import PdfReader import docx from pptx import Presentation import openpyxl # Load the model model = pipeline("question-answering", model="facebook/llama-7b-hf") # Function to read text from uploaded documents def read_text_from_document(file): if file.name.endswith('.txt'): text = file.read().decode('utf-8') elif file.name.endswith('.pdf'): reader = PdfReader(file) text = '' for page in reader.pages: text += page.extract_text() elif file.name.endswith('.docx'): doc = docx.Document(file) text = '' for para in doc.paragraphs: text += para.text elif file.name.endswith('.pptx'): presentation = Presentation(file) text = '' for slide in presentation.slides: for shape in slide.shapes: if hasattr(shape, "text"): text += shape.text elif file.name.endswith('.xlsx'): wb = openpyxl.load_workbook(file) sheet = wb.active text = '' for row in sheet.rows: for cell in row: text += str(cell.value) + ' ' return text # Function to scrape URL def scrape_url(url): try: response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') text = soup.get_text() return text except Exception as e: return str(e) # Function to answer questions based on input data def answer_questions(data, question): if data: try: result = model(question=question, context=data) return result['answer'] except Exception as e: return str(e) else: return "No data provided" # Gradio interface demo = gr.Interface( fn=lambda data, url, question: answer_questions(read_text_from_document(data) if data else scrape_url(url), question), inputs=[ gr.File(label="Upload Document (.txt, .pdf, .docx, .pptx, .xlsx)"), gr.Textbox(label="Enter URL"), gr.Textbox(label="Ask a question") ], outputs=gr.Textbox(label="Answer"), title="LLM Chatbot", description="Upload a document or enter a URL and ask a question" ) # Launch the demo demo.launch()