File size: 2,732 Bytes
caae3e0
3afe20e
34ed9b9
 
2c74aeb
caae3e0
f6ca61a
44218c9
 
caae3e0
2337a39
 
 
 
 
 
 
 
6bf7cb4
 
2337a39
cc1887c
ff3f385
778806f
3bfbec3
f51c254
2c74aeb
 
 
 
 
f51c254
 
3bfbec3
 
 
 
72d2777
 
3bfbec3
6bc3d42
ba34741
 
0814097
4898f6d
 
 
 
 
 
 
 
7c56fd1
569e991
7c56fd1
e5ef9c4
7c56fd1
 
b7b5d68
 
 
cc1887c
caae3e0
64e1a19
0bb8b0b
a77c349
 
 
 
 
 
64e1a19
a77c349
64e1a19
caae3e0
a77c349
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import gradio as gr
import numpy as np
#from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from transformers import AutoProcessor, AutoModelForDocumentQuestionAnswering
from PIL import Image

#client = InferenceClient("models/microsoft/trocr-base-handwritten")
processor = AutoProcessor.from_pretrained("Sharka/CIVQA_LayoutLMv2_EasyOCR")
model = AutoModelForDocumentQuestionAnswering.from_pretrained("Sharka/CIVQA_LayoutLMv2_EasyOCR")

def sepia(input_img):
    sepia_filter = np.array([
        [0.393, 0.769, 0.189],
        [0.349, 0.686, 0.168],
        [0.272, 0.534, 0.131]
    ])
    sepia_img = input_img.dot(sepia_filter.T)
    sepia_img /= sepia_img.max()
    sepia_values = repr(sepia_img)
    return sepia_img, sepia_values


## https://www.gradio.app/docs/gradio/blocks
## required positional arguments: 'inputs' and 'outputs'
def process_image(image):
    try:
        pixel_values = processor(images=image, return_tensors="pt").pixel_values
        
        generated_ids = model.generate(pixel_values)
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        return generated_text
    except Exception as e:
        return f"Error: {str(e)}"

def additional_input(text):
    return f"Additional input received: {text}"

sepia_interface = gr.Interface(sepia, gr.Image(), "image")

with gr.Blocks() as generated_output:
    with gr.Column():
        sepia_values_text=gr.Textbox(label="Sepia Values")
        output_img = gr.Image(label="Output Image")
        gr.Interface(fn=sepia,
                    inputs=gr.Image(
                        #this makes the camera stream live
                        sources=["webcam"], 
                        streaming=True
                    ), 
                    outputs=[output_img, sepia_values_text],
                    live=True,
                    show_progress="full")
    with gr.Row():
        output_img.change(
            fn=process_image,
            inputs=output_img,
            outputs=gr.Textbox(label="Recognized Text"),
            show_progress="full")
#with gr.Blocks() as generated_output:
#    inp = gr.Interface(sepia, gr.Image(), "image")
#    out = gr.Textbox()


#demo = gr.TabbedInterface([sepia_interface, generated_output], ["RGB Sepia Filter", "Handwritten to Text"])

#with gr.Blocks() as demo:
#    with gr.Row():
#        input_img = gr.Image(label="Input Image")
#        submit_button = gr.Button("Submit")
#        output_img = gr.Image(label="Output Image")
#        sepia_values_text = gr.Textbox(label="Sepia Values")

#    submit_button.click(sepia, inputs=input_img, outputs=[output_img, sepia_values_text])
    
if __name__ == "__main__":
    generated_output.launch()