File size: 3,569 Bytes
8953fd3
 
 
 
 
 
ab9f628
c0666b1
1ea5637
8953fd3
ab9f628
 
 
 
 
 
 
 
 
 
 
 
f069ded
 
 
c0666b1
 
f069ded
 
 
8953fd3
 
 
 
 
 
 
 
 
 
 
 
3ce84fd
 
 
8953fd3
 
 
3ce84fd
 
 
8953fd3
3ce84fd
8953fd3
 
 
 
 
 
 
 
 
 
 
 
 
c0666b1
1ea5637
 
c0666b1
 
 
1ea5637
c0666b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8953fd3
 
 
 
1ea5637
659a935
f069ded
ab9f628
c0666b1
8953fd3
 
 
ab9f628
c0666b1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import os
import gradio as gr
from transformers import ViTFeatureExtractor, ViTModel
from PIL import Image
from transformers import AutoTokenizer, AutoModel
import torch
from pdf2image import convert_from_path
import io
from io import BytesIO

# CSS styles
css = """
.button { 
    padding: 10px 20px; 
    background: #007BFF; 
    color: white; 
    border: none; 
    cursor: pointer; 
    font-size: 16px; 
    margin: 10px;
}
"""

# Define layout with custom styles
layout = [
    gr.Row([gr.File(label="Upload PDF", type="file")]),
    gr.Row([gr.Button("Generate Insights")]),
    gr.Row([gr.Textbox("Placeholder for PDF insights", label="Insights", type="text")])
]

# Function to get image embeddings using ViT
def get_image_embeddings(image_path, model_name='google/vit-base-patch16-224'):
    feature_extractor = ViTFeatureExtractor.from_pretrained(model_name)
    model = ViTModel.from_pretrained(model_name)
    
    image = Image.open(image_path)
    inputs = feature_extractor(images=image, return_tensors="pt")
    outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1)  # Mean pooling
    return embeddings

# Function to convert PDF to images
def pdf_to_images(pdf_file, img_dir):
    images = convert_from_path(pdf_file)
    
    # Create the directory if it doesn't exist
    os.makedirs(img_dir, exist_ok=True)

    for i, image in enumerate(images):
        image_path = f"{img_dir}/page_{i + 1}.png"
        image.save(image_path, "PNG")

    print(f"Converted {len(images)} pages to images and saved in {img_dir}")

# Function to get text embeddings using a transformer model
def get_text_embeddings(text, model_name='bert-base-uncased'):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name)
    
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
    outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1)  # Mean pooling
    return embeddings

# Function to process PDF and generate a response
def process_pdf_and_generate_response(pdf_file):
    try:
        # Convert the binary stream to a file-like object
        pdf_file_stream = BytesIO(pdf_file)

        # Convert PDF to images
        img_dir = "pdf_images"
        pdf_to_images(pdf_file_stream, img_dir)

        # Generate embeddings for each image
        image_embeddings = []
        for filename in os.listdir(img_dir):
            if filename.endswith(".png"):
                image_path = os.path.join(img_dir, filename)
                image_embeddings.append(get_image_embeddings(image_path))

        # Perform some text analysis on the PDF content (replace with your logic)
        pdf_text = "PDF content analysis placeholder"
        text_embeddings = get_text_embeddings(pdf_text)

        # Combine image and text embeddings and generate a response (replace with your logic)
        combined_embeddings = torch.cat([*image_embeddings, text_embeddings], dim=0)
        response = "Response based on the processed PDF"
    except Exception as e:
        response = f"An error occurred: {str(e)}"
    return response

iface = gr.Interface(
    fn=process_pdf_and_generate_response,
    inputs=gr.File(label="Upload PDF", type="binary"),  # Changed 'file' to 'binary'
    outputs=gr.Textbox("Placeholder for PDF insights", label="Insights", type="text"),
    title="pdf-chatbot",
    description="Upload a PDF and receive insights based on its content.",
    css=css
)

if __name__ == "__main__":
    iface.launch()