import gradio as gr import pandas as pd from transformers import pipeline from bs4 import BeautifulSoup import requests from PyPDF2 import PdfReader import docx from pptx import Presentation import openpyxl from transformers import AutoModelForSeq2SeqLM, AutoTokenizer # Load the RAG model model_name = "facebook/llama-7b-hf" rag_tokenizer = AutoTokenizer.from_pretrained(model_name) rag_model = AutoModelForSeq2SeqLM.from_pretrained(model_name) # Define a function to read text from uploaded documents def read_text_from_document(file): if file.name.endswith('.txt'): text = file.read().decode('utf-8') elif file.name.endswith('.pdf'): reader = PdfReader(file) text = '' for page in reader.pages: text += page.extract_text() elif file.name.endswith('.docx'): doc = docx.Document(file) text = '' for para in doc.paragraphs: text += para.text elif file.name.endswith('.pptx'): presentation = Presentation(file) text = '' for slide in presentation.slides: for shape in slide.shapes: if hasattr(shape, "text"): text += shape.text elif file.name.endswith('.xlsx'): wb = openpyxl.load_workbook(file) sheet = wb.active text = '' for row in sheet.rows: for cell in row: text += str(cell.value) + ' ' return text # Define a function to scrape URL def scrape_url(url): try: response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') text = soup.get_text() return text except Exception as e: return str(e) # Define a function to answer questions based on input data using RAG def answer_questions(data, question): if data: inputs = rag_tokenizer.encode("Question: " + question + " Context: " + data, return_tensors="pt") outputs = rag_model.generate(inputs, max_length=100) answer = rag_tokenizer.decode(outputs, skip_special_tokens=True) return answer else: return "No data provided" # Gradio interface demo = gr.Interface( fn=lambda data, url, question: answer_questions(read_text_from_document(data) if data else scrape_url(url), question), inputs=[ gr.File(label="Upload Document (.txt, .pdf, .docx, .pptx, .xlsx)"), gr.Textbox(label="Enter URL"), gr.Textbox(label="Ask a question") ], outputs=gr.Textbox(label="Answer"), title="RAG Chat", description="Upload a document or enter a URL and ask a question" ) # Launch the demo demo.launch()