SURESHBEEKHANI commited on
Commit
a1f4811
Β·
verified Β·
1 Parent(s): 47af600

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -88
app.py CHANGED
@@ -1,130 +1,100 @@
1
- import streamlit as st # Streamlit for the web app interface
2
- from groq import Groq # Groq for handling the API requests
3
- from PIL import Image # PIL for image processing (opening, displaying images)
4
- import os # For file and environment variable handling
5
- from dotenv import load_dotenv # To load environment variables from .env file
6
- import base64 # To encode images into base64 format
7
- import io # To handle in-memory byte buffers
8
-
9
- # Load environment variables from .env file
10
  load_dotenv()
 
 
11
 
12
- # Retrieve the Groq API key from the environment variable
13
- api_key = os.getenv("GROQ_API_KEY") # Retrieves the API key stored in the .env file
14
-
15
- # Initialize the Groq client with the API key
16
- client = Groq(api_key=api_key) # Creates a Groq client object using the API key
17
-
18
- # Page configuration for Streamlit
19
  st.set_page_config(
20
- page_title="Llama OCR", # Set the title of the app
21
- page_icon="πŸ¦™", # Set the page icon (Llama emoji)
22
- layout="wide", # Use a wide layout for the app
23
- initial_sidebar_state="expanded" # Set the initial state of the sidebar to expanded
24
  )
25
 
26
- # Function to handle main content of the page
27
  def main_content():
28
- st.title("πŸ¦™ Llama OCR") # Display the main title
29
- st.markdown('<p style="margin-top: -20px;">Extract structured text from images using Llama 3.2 Vision!</p>', unsafe_allow_html=True) # Display a description below the title with custom styling
30
- st.markdown("---") # Horizontal line to separate sections
31
 
32
- col1, col2 = st.columns([6, 1]) # Create two columns: a large left column and a smaller right column for the clear button
33
  with col2:
34
- if st.button("Clear πŸ—‘οΈ"): # If the "Clear" button is clicked
35
- if 'ocr_result' in st.session_state: # Check if OCR result exists in session state
36
- del st.session_state['ocr_result'] # Delete the OCR result from session state
37
- st.rerun() # Rerun the app to reset everything
38
 
39
- # Display OCR result in the main content section (if it exists)
40
  if 'ocr_result' in st.session_state:
41
- st.markdown("### 🎯 **Extracted Text**") # Professional heading with a target emoji to make it stand out
42
- st.markdown(st.session_state['ocr_result'], unsafe_allow_html=True) # Display the OCR result stored in session state
43
 
44
- # Function to handle sidebar content
45
  def sidebar_content():
46
- with st.sidebar: # Everything inside this block will appear in the sidebar
47
- st.header("πŸ“₯ Upload Image") # Sidebar header for the image upload section
48
 
49
- # Display message if no image is uploaded
50
  if 'ocr_result' not in st.session_state:
51
- st.write("### Please upload an image to extract text.") # Instruction message to upload an image
52
 
53
- uploaded_file = st.file_uploader("Choose an image...", type=['png', 'jpg', 'jpeg']) # Upload an image file with supported types
54
 
55
- if uploaded_file: # If an image is uploaded
56
- display_uploaded_image(uploaded_file) # Call function to display the uploaded image
57
 
58
- # This button triggers the processing of the uploaded image to extract text
59
  if uploaded_file and st.button("Extract Text πŸ”") and 'ocr_result' not in st.session_state:
60
- with st.spinner("Processing image... Please wait."): # Show a spinner during image processing
61
- process_image(uploaded_file) # Call the function to process the image and extract text
62
 
63
- # If no image is uploaded or processed, clear the sidebar
64
  if not uploaded_file and 'ocr_result' not in st.session_state:
65
- st.sidebar.empty() # Ensures the sidebar is empty unless there is interaction
66
 
67
- # Function to display the uploaded image
68
  def display_uploaded_image(uploaded_file):
69
- image = Image.open(uploaded_file) # Open the uploaded image using PIL
70
- st.image(image, caption="Uploaded Image", use_container_width=True) # Display the image in the app with a caption and automatic width
71
 
72
- # Function to encode the image into base64 format
73
  def encode_image(uploaded_file):
74
- image = Image.open(uploaded_file) # Open the uploaded image
75
- buffered = io.BytesIO() # Create an in-memory byte buffer
76
- image.save(buffered, format=image.format) # Save the image into the buffer
77
- img_byte_array = buffered.getvalue() # Get the byte array of the image
78
- return base64.b64encode(img_byte_array).decode('utf-8'), image.format # Return the base64 encoded image and its format
79
 
80
- # Function to process the image and extract text using Groq API
81
  def process_image(uploaded_file):
82
- if uploaded_file: # Check if an image is uploaded
83
- # Encode the uploaded image to base64 and retrieve the image format
84
  base64_image, image_format = encode_image(uploaded_file)
85
-
86
- # Determine the MIME type for the base64 encoded image
87
  mime_type = f"image/{image_format.lower()}"
88
-
89
- # Create a base64 URL for the image
90
  base64_url = f"data:{mime_type};base64,{base64_image}"
91
 
92
- # Start spinner while waiting for the API response
93
  with st.spinner("Generating response... This may take a moment."):
94
  try:
95
- # Call the Groq API to extract text from the image
96
  response = client.chat.completions.create(
97
- model="llama-3.2-11b-vision-preview", # Specify the model to use (Llama 3.2 Vision)
98
  messages=[
99
  {
100
- "role": "user", # Role of the message sender
101
- "content": [ # The content of the message
102
- {"type": "text", "text": "Analyze the text in the provided image. Extract all readable content "
103
- "and present it in a structured Markdown format. Use headings, lists, "
104
- "or code blocks as appropriate for clarity and organization."},
105
- {
106
- "type": "image_url", # Type of content: image
107
- "image_url": {
108
- "url": base64_url, # The base64 URL of the uploaded image
109
- },
110
- },
111
  ]
112
  }
113
  ],
114
- temperature=0.2, # Set the temperature to 0.1 for less randomness and more focused results
115
- max_tokens=200, # Limit the maximum number of tokens (words) to 200 for shorter responses
116
- top_p=0.5, # Set top_p to 0.5 to control the diversity of generated text
117
- stream=False # Disable streaming of results
118
  )
119
-
120
- # Access the content of the response from the Groq API
121
  message_content = response.choices[0].message.content
122
- st.session_state['ocr_result'] = message_content # Store the extracted text in session state
123
-
124
- except Exception as e: # Catch any errors during the image processing
125
- st.error(f"Error during text extraction: {e}") # Display the error message in the app
126
 
127
- # Running the Streamlit app
128
  if __name__ == "__main__":
129
- main_content() # Display the main content (title, OCR result)
130
- sidebar_content() # Display the sidebar content (image upload and processing)
 
1
+ import streamlit as st
2
+ from groq import Groq
3
+ from PIL import Image
4
+ import os
5
+ from dotenv import load_dotenv
6
+ import base64
7
+ import io
8
+
9
+ # Load environment variables
10
  load_dotenv()
11
+ api_key = os.getenv("GROQ_API_KEY")
12
+ client = Groq(api_key=api_key)
13
 
14
+ # Streamlit page configuration
 
 
 
 
 
 
15
  st.set_page_config(
16
+ page_title="Llama OCR",
17
+ page_icon="πŸ¦™",
18
+ layout="wide",
19
+ initial_sidebar_state="expanded"
20
  )
21
 
 
22
  def main_content():
23
+ st.title("πŸ¦™ Llama OCR")
24
+ st.markdown('<p style="margin-top: -20px;">Extract structured text from images using Llama 3.2 Vision!</p>', unsafe_allow_html=True)
25
+ st.markdown("---")
26
 
27
+ col1, col2 = st.columns([6, 1])
28
  with col2:
29
+ if st.button("Clear πŸ—‘οΈ"):
30
+ if 'ocr_result' in st.session_state:
31
+ del st.session_state['ocr_result']
32
+ st.rerun()
33
 
 
34
  if 'ocr_result' in st.session_state:
35
+ st.markdown("### 🎯 **Extracted Text**")
36
+ st.markdown(st.session_state['ocr_result'], unsafe_allow_html=True)
37
 
 
38
  def sidebar_content():
39
+ with st.sidebar:
40
+ st.header("πŸ“₯ Upload Image")
41
 
 
42
  if 'ocr_result' not in st.session_state:
43
+ st.write("### Please upload an image to extract text.")
44
 
45
+ uploaded_file = st.file_uploader("Choose an image...", type=['png', 'jpg', 'jpeg'])
46
 
47
+ if uploaded_file:
48
+ display_uploaded_image(uploaded_file)
49
 
 
50
  if uploaded_file and st.button("Extract Text πŸ”") and 'ocr_result' not in st.session_state:
51
+ with st.spinner("Processing image... Please wait."):
52
+ process_image(uploaded_file)
53
 
 
54
  if not uploaded_file and 'ocr_result' not in st.session_state:
55
+ st.sidebar.empty()
56
 
 
57
  def display_uploaded_image(uploaded_file):
58
+ image = Image.open(uploaded_file)
59
+ st.image(image, caption="Uploaded Image", use_container_width=True)
60
 
 
61
  def encode_image(uploaded_file):
62
+ image = Image.open(uploaded_file)
63
+ buffered = io.BytesIO()
64
+ image.save(buffered, format=image.format)
65
+ img_byte_array = buffered.getvalue()
66
+ return base64.b64encode(img_byte_array).decode('utf-8'), image.format
67
 
 
68
  def process_image(uploaded_file):
69
+ if uploaded_file:
 
70
  base64_image, image_format = encode_image(uploaded_file)
 
 
71
  mime_type = f"image/{image_format.lower()}"
 
 
72
  base64_url = f"data:{mime_type};base64,{base64_image}"
73
 
 
74
  with st.spinner("Generating response... This may take a moment."):
75
  try:
 
76
  response = client.chat.completions.create(
77
+ model="llama-3.2-11b-vision-preview",
78
  messages=[
79
  {
80
+ "role": "user",
81
+ "content": [
82
+ {"type": "text", "text": "Analyze the text in the provided image. Extract all readable content and present it in a structured Markdown format. Use headings, lists, or code blocks as appropriate for clarity and organization."},
83
+ {"type": "image_url", "image_url": {"url": base64_url}},
 
 
 
 
 
 
 
84
  ]
85
  }
86
  ],
87
+ temperature=0.2,
88
+ max_tokens=200,
89
+ top_p=0.5,
90
+ stream=False
91
  )
 
 
92
  message_content = response.choices[0].message.content
93
+ st.session_state['ocr_result'] = message_content
94
+ except Exception as e:
95
+ st.error(f"Error during text extraction: {e}")
 
96
 
97
+ # Corrected execution order: process sidebar first, then main content
98
  if __name__ == "__main__":
99
+ sidebar_content()
100
+ main_content()