File size: 3,659 Bytes
a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 47af600 a1f4811 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import streamlit as st
from groq import Groq
from PIL import Image
import os
from dotenv import load_dotenv
import base64
import io
# Load environment variables
load_dotenv()
api_key = os.getenv("GROQ_API_KEY")
client = Groq(api_key=api_key)
# Streamlit page configuration
st.set_page_config(
page_title="Llama OCR",
page_icon="π¦",
layout="wide",
initial_sidebar_state="expanded"
)
def main_content():
st.title("π¦ Llama OCR")
st.markdown('<p style="margin-top: -20px;">Extract structured text from images using Llama 3.2 Vision!</p>', unsafe_allow_html=True)
st.markdown("---")
col1, col2 = st.columns([6, 1])
with col2:
if st.button("Clear ποΈ"):
if 'ocr_result' in st.session_state:
del st.session_state['ocr_result']
st.rerun()
if 'ocr_result' in st.session_state:
st.markdown("### π― **Extracted Text**")
st.markdown(st.session_state['ocr_result'], unsafe_allow_html=True)
def sidebar_content():
with st.sidebar:
st.header("π₯ Upload Image")
if 'ocr_result' not in st.session_state:
st.write("### Please upload an image to extract text.")
uploaded_file = st.file_uploader("Choose an image...", type=['png', 'jpg', 'jpeg'])
if uploaded_file:
display_uploaded_image(uploaded_file)
if uploaded_file and st.button("Extract Text π") and 'ocr_result' not in st.session_state:
with st.spinner("Processing image... Please wait."):
process_image(uploaded_file)
if not uploaded_file and 'ocr_result' not in st.session_state:
st.sidebar.empty()
def display_uploaded_image(uploaded_file):
image = Image.open(uploaded_file)
st.image(image, caption="Uploaded Image", use_container_width=True)
def encode_image(uploaded_file):
image = Image.open(uploaded_file)
buffered = io.BytesIO()
image.save(buffered, format=image.format)
img_byte_array = buffered.getvalue()
return base64.b64encode(img_byte_array).decode('utf-8'), image.format
def process_image(uploaded_file):
if uploaded_file:
base64_image, image_format = encode_image(uploaded_file)
mime_type = f"image/{image_format.lower()}"
base64_url = f"data:{mime_type};base64,{base64_image}"
with st.spinner("Generating response... This may take a moment."):
try:
response = client.chat.completions.create(
model="llama-3.2-11b-vision-preview",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Analyze the text in the provided image. Extract all readable content and present it in a structured Markdown format. Use headings, lists, or code blocks as appropriate for clarity and organization."},
{"type": "image_url", "image_url": {"url": base64_url}},
]
}
],
temperature=0.2,
max_tokens=200,
top_p=0.5,
stream=False
)
message_content = response.choices[0].message.content
st.session_state['ocr_result'] = message_content
except Exception as e:
st.error(f"Error during text extraction: {e}")
# Corrected execution order: process sidebar first, then main content
if __name__ == "__main__":
sidebar_content()
main_content() |