Shak33l-UiRev commited on
Commit
db7e40d
·
verified ·
1 Parent(s): 4045262

Update app.py

Browse files

Streamlit app that compares different document understanding models

Files changed (1) hide show
  1. app.py +138 -2
app.py CHANGED
@@ -1,4 +1,140 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from PIL import Image
3
+ import torch
4
+ from transformers import (
5
+ DonutProcessor,
6
+ VisionEncoderDecoderModel,
7
+ LayoutLMv3Processor,
8
+ LayoutLMv3ForSequenceClassification,
9
+ BrosProcessor,
10
+ BrosForTokenClassification,
11
+ LlavaProcessor,
12
+ LlavaForConditionalGeneration
13
+ )
14
 
15
+ def load_model(model_name):
16
+ """Load the selected model and processor"""
17
+ if model_name == "Donut":
18
+ processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base")
19
+ model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base")
20
+ elif model_name == "LayoutLMv3":
21
+ processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base")
22
+ model = LayoutLMv3ForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
23
+ elif model_name == "BROS":
24
+ processor = BrosProcessor.from_pretrained("microsoft/bros-base")
25
+ model = BrosForTokenClassification.from_pretrained("microsoft/bros-base")
26
+ elif model_name == "LLaVA-1.5":
27
+ processor = LlavaProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf")
28
+ model = LlavaForConditionalGeneration.from_pretrained("llava-hf/llava-1.5-7b-hf")
29
+
30
+ return model, processor
31
+
32
+ def analyze_document(image, model_name, model, processor):
33
+ """Analyze document using selected model"""
34
+ try:
35
+ # Process image according to model requirements
36
+ if model_name == "Donut":
37
+ inputs = processor(image, return_tensors="pt")
38
+ outputs = model.generate(**inputs)
39
+ result = processor.decode(outputs[0], skip_special_tokens=True)
40
+ elif model_name == "LayoutLMv3":
41
+ inputs = processor(image, return_tensors="pt")
42
+ outputs = model(**inputs)
43
+ result = outputs.logits
44
+ # Add similar processing for other models
45
+
46
+ return result
47
+ except Exception as e:
48
+ st.error(f"Error analyzing document: {str(e)}")
49
+ return None
50
+
51
+ # Set page config
52
+ st.set_page_config(page_title="Document Analysis Comparison", layout="wide")
53
+
54
+ # Title and description
55
+ st.title("Document Understanding Model Comparison")
56
+ st.markdown("""
57
+ Compare different models for document analysis and understanding.
58
+ Upload an image and select a model to analyze it.
59
+ """)
60
+
61
+ # Create two columns for layout
62
+ col1, col2 = st.columns([1, 1])
63
+
64
+ with col1:
65
+ # File uploader
66
+ uploaded_file = st.file_uploader("Choose a document image", type=['png', 'jpg', 'jpeg', 'pdf'])
67
+
68
+ if uploaded_file is not None:
69
+ # Display uploaded image
70
+ image = Image.open(uploaded_file)
71
+ st.image(image, caption='Uploaded Document', use_column_width=True)
72
+
73
+ with col2:
74
+ # Model selection
75
+ model_info = {
76
+ "Donut": {
77
+ "description": "Best for structured OCR and document format understanding",
78
+ "memory": "6-8GB",
79
+ "strengths": ["Structured OCR", "Memory efficient", "Good with fixed formats"]
80
+ },
81
+ "LayoutLMv3": {
82
+ "description": "Strong layout understanding with reasoning capabilities",
83
+ "memory": "12-15GB",
84
+ "strengths": ["Layout understanding", "Reasoning", "Pre-trained knowledge"]
85
+ },
86
+ "BROS": {
87
+ "description": "Memory efficient with fast inference",
88
+ "memory": "4-6GB",
89
+ "strengths": ["Fast inference", "Memory efficient", "Easy fine-tuning"]
90
+ },
91
+ "LLaVA-1.5": {
92
+ "description": "Comprehensive OCR with strong reasoning",
93
+ "memory": "25-40GB",
94
+ "strengths": ["Strong reasoning", "Zero-shot capable", "Visual understanding"]
95
+ }
96
+ }
97
+
98
+ selected_model = st.selectbox(
99
+ "Select Model",
100
+ list(model_info.keys())
101
+ )
102
+
103
+ # Display model information
104
+ st.write("### Model Details")
105
+ st.write(f"**Description:** {model_info[selected_model]['description']}")
106
+ st.write(f"**Memory Required:** {model_info[selected_model]['memory']}")
107
+ st.write("**Strengths:**")
108
+ for strength in model_info[selected_model]['strengths']:
109
+ st.write(f"- {strength}")
110
+
111
+ # Analysis section
112
+ if uploaded_file is not None and selected_model:
113
+ if st.button("Analyze Document"):
114
+ with st.spinner('Loading model and analyzing document...'):
115
+ try:
116
+ # Load model and processor
117
+ model, processor = load_model(selected_model)
118
+
119
+ # Analyze document
120
+ results = analyze_document(image, selected_model, model, processor)
121
+
122
+ # Display results
123
+ st.write("### Analysis Results")
124
+ st.json(results)
125
+
126
+ except Exception as e:
127
+ st.error(f"Error during analysis: {str(e)}")
128
+
129
+ # Add information about usage and limitations
130
+ st.markdown("""
131
+ ---
132
+ ### Notes:
133
+ - Different models may perform better for different types of documents
134
+ - Processing time and memory requirements vary by model
135
+ - Results may vary based on document quality and format
136
+ """)
137
+
138
+ # Add a footer with version information
139
+ st.markdown("---")
140
+ st.markdown("v1.0 - Created with Streamlit")