File size: 2,357 Bytes
1bd6a74
bf7ece0
 
 
 
 
 
 
 
1bd6a74
bf7ece0
 
1bd6a74
bf7ece0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1bd6a74
bf7ece0
 
 
 
 
 
 
 
 
1bd6a74
bf7ece0
 
 
 
 
 
 
 
 
 
1bd6a74
bf7ece0
 
 
 
 
 
 
1bd6a74
bf7ece0
 
 
1bd6a74
 
bf7ece0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
import pandas as pd
from transformers import pipeline
from bs4 import BeautifulSoup
import requests
from PyPDF2 import PdfReader
import docx
from pptx import Presentation
import openpyxl

# Load the model
model = pipeline("question-answering", model="facebook/llama-7b-hf")

# Function to read text from uploaded documents
def read_text_from_document(file):
    if file.name.endswith('.txt'):
        text = file.read().decode('utf-8')
    elif file.name.endswith('.pdf'):
        reader = PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
    elif file.name.endswith('.docx'):
        doc = docx.Document(file)
        text = ''
        for para in doc.paragraphs:
            text += para.text
    elif file.name.endswith('.pptx'):
        presentation = Presentation(file)
        text = ''
        for slide in presentation.slides:
            for shape in slide.shapes:
                if hasattr(shape, "text"):
                    text += shape.text
    elif file.name.endswith('.xlsx'):
        wb = openpyxl.load_workbook(file)
        sheet = wb.active
        text = ''
        for row in sheet.rows:
            for cell in row:
                text += str(cell.value) + ' '
    return text

# Function to scrape URL
def scrape_url(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        text = soup.get_text()
        return text
    except Exception as e:
        return str(e)

# Function to answer questions based on input data
def answer_questions(data, question):
    if data:
        try:
            result = model(question=question, context=data)
            return result['answer']
        except Exception as e:
            return str(e)
    else:
        return "No data provided"

# Gradio interface
demo = gr.Interface(
    fn=lambda data, url, question: answer_questions(read_text_from_document(data) if data else scrape_url(url), question),
    inputs=[
        gr.File(label="Upload Document (.txt, .pdf, .docx, .pptx, .xlsx)"),
        gr.Textbox(label="Enter URL"),
        gr.Textbox(label="Ask a question")
    ],
    outputs=gr.Textbox(label="Answer"),
    title="LLM Chatbot",
    description="Upload a document or enter a URL and ask a question"
)

# Launch the demo
demo.launch()