rajsecrets0 commited on
Commit
7b76e52
·
verified ·
1 Parent(s): ad4ced6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -0
app.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import base64
3
+ import requests
4
+ from PIL import Image, ImageDraw
5
+ from io import BytesIO
6
+ import fitz # PyMuPDF
7
+ import time
8
+ import os
9
+
10
+ # Configuration
11
+ GEMINI_API_KEY = st.secrets["GEMINI_API_KEY"]
12
+ GEMINI_MODEL = "gemini-1.5-flash" # Verify with Gemini API documentation
13
+ DOCUMENT_TYPES = ["Land Records", "Caste Certificates", "Property Registrations"]
14
+
15
+ # Define a class to mimic Streamlit's UploadedFile object for the example image
16
+ class ExampleFile:
17
+ def __init__(self, file_path):
18
+ self.name = os.path.basename(file_path)
19
+ self.type = "image/jpeg"
20
+ with open(file_path, "rb") as f:
21
+ self._buffer = BytesIO(f.read())
22
+
23
+ def getvalue(self):
24
+ return self._buffer.getvalue()
25
+
26
+ # Initialize session state
27
+ def initialize_session_state():
28
+ if "processed_doc" not in st.session_state:
29
+ st.session_state.processed_doc = None
30
+ if "current_file" not in st.session_state:
31
+ st.session_state.current_file = None
32
+
33
+ # Reset session state
34
+ def reset_session_state():
35
+ st.session_state.processed_doc = None
36
+ st.session_state.current_file = None
37
+
38
+ # Encode file to base64 for Gemini API
39
+ def encode_file(file):
40
+ try:
41
+ file_content = file.getvalue()
42
+ return base64.b64encode(file_content).decode('utf-8')
43
+ except Exception as e:
44
+ st.error(f"Error encoding file: {str(e)}")
45
+ return None
46
+
47
+ # Query Gemini API
48
+ def query_gemini(prompt, image_b64):
49
+ try:
50
+ headers = {
51
+ "Authorization": f"Bearer {GEMINI_API_KEY}",
52
+ "Content-Type": "application/json"
53
+ }
54
+ payload = {
55
+ "model": GEMINI_MODEL,
56
+ "prompt": prompt,
57
+ "image": image_b64
58
+ }
59
+ response = requests.post(
60
+ "https://api.gemini.com/v1/analyze", # Replace with actual Gemini API endpoint
61
+ headers=headers,
62
+ json=payload
63
+ )
64
+ response.raise_for_status()
65
+ return response.json().get("result", "")
66
+ except Exception as e:
67
+ st.error(f"Gemini API error: {str(e)}")
68
+ return None
69
+
70
+ # Process the document
71
+ def process_document(file):
72
+ try:
73
+ with st.spinner("Analyzing document..."):
74
+ # Encode file to base64
75
+ image_b64 = encode_file(file)
76
+ if not image_b64:
77
+ return
78
+
79
+ # Store preview image
80
+ if file.type == "application/pdf":
81
+ pdf = fitz.open(stream=BytesIO(file.getvalue()))
82
+ page = pdf[0]
83
+ pix = page.get_pixmap()
84
+ st.session_state.doc_preview = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
85
+ elif file.type.startswith('image/'):
86
+ st.session_state.doc_preview = Image.open(BytesIO(file.getvalue()))
87
+ elif file.type == "text/plain":
88
+ text = file.getvalue().decode('utf-8')
89
+ img = Image.new('RGB', (800, 600), color=(73, 109, 137))
90
+ d = ImageDraw.Draw(img)
91
+ d.text((10, 10), text, fill=(255, 255, 0))
92
+ st.session_state.doc_preview = img
93
+ else:
94
+ st.error("Unsupported file format")
95
+ return
96
+
97
+ # Classify document
98
+ classify_prompt = f"Classify this document into one of these categories: {', '.join(DOCUMENT_TYPES)}. Respond only with the category name."
99
+ doc_type = query_gemini(classify_prompt, image_b64)
100
+
101
+ # Extract details
102
+ extract_prompt = """Extract and organize all important details from this document including:
103
+ - Names
104
+ - Dates
105
+ - Identification numbers
106
+ - Locations
107
+ - Key terms
108
+ Format as a bullet-point list with clear headings."""
109
+ details = query_gemini(extract_prompt, image_b64)
110
+
111
+ # Verify authenticity
112
+ verify_prompt = "Analyze this document for signs of tampering or forgery. Check for: inconsistent fonts, alignment issues, suspicious modifications. Provide verification conclusion."
113
+ verification = query_gemini(verify_prompt, image_b64)
114
+
115
+ st.session_state.processed_doc = {
116
+ "type": doc_type or "Unclassified",
117
+ "details": details or "No details extracted",
118
+ "verification": verification or "Verification failed",
119
+ "preview": st.session_state.doc_preview
120
+ }
121
+
122
+ st.success("Document processing complete!")
123
+ time.sleep(1)
124
+
125
+ except Exception as e:
126
+ st.error(f"Document processing failed: {str(e)}")
127
+ st.session_state.processed_doc = None
128
+
129
+ # Main application
130
+ def main():
131
+ st.set_page_config(page_title="DocVerify AI", layout="wide")
132
+ initialize_session_state()
133
+
134
+ # Sidebar Controls
135
+ with st.sidebar:
136
+ st.header("Document Controls")
137
+
138
+ uploaded_file = st.file_uploader(
139
+ "Upload Document",
140
+ type=["pdf", "jpg", "jpeg", "png", "txt"],
141
+ key="uploaded_file"
142
+ )
143
+
144
+ if st.button("Use Example Image"):
145
+ example_file_path = "Caste-Certificate.jpg"
146
+ st.session_state.current_file = ExampleFile(example_file_path)
147
+ elif uploaded_file:
148
+ st.session_state.current_file = uploaded_file
149
+ else:
150
+ st.session_state.current_file = None
151
+
152
+ if st.button("Process Document"):
153
+ if st.session_state.current_file:
154
+ process_document(st.session_state.current_file)
155
+ else:
156
+ st.error("Please select a document to process.")
157
+
158
+ if st.button("New Document"):
159
+ reset_session_state()
160
+ st.rerun()
161
+
162
+ if st.session_state.processed_doc:
163
+ st.divider()
164
+ st.subheader("Document Summary")
165
+ st.markdown(f"**Type:** {st.session_state.processed_doc['type']}")
166
+ st.markdown(f"**Verification Status:**\n{st.session_state.processed_doc['verification']}")
167
+
168
+ # Main Interface
169
+ st.title("📄 Automated Document Verifier")
170
+
171
+ if st.session_state.processed_doc and 'preview' in st.session_state.processed_doc:
172
+ col1, col2 = st.columns([1, 2])
173
+ with col1:
174
+ st.subheader("Document Preview")
175
+ st.image(st.session_state.processed_doc['preview'], use_column_width=True)
176
+
177
+ with col2:
178
+ st.subheader("Extracted Details")
179
+ st.markdown(st.session_state.processed_doc['details'])
180
+
181
+ st.subheader("Verification Analysis")
182
+ st.markdown(st.session_state.processed_doc['verification'])
183
+ else:
184
+ st.info("Please select a document and click 'Process Document' to begin verification analysis.")
185
+
186
+ if __name__ == "__main__":
187
+ main()