File size: 3,659 Bytes
a1f4811
 
 
 
 
 
 
 
 
47af600
a1f4811
 
47af600
a1f4811
47af600
a1f4811
 
 
 
47af600
 
 
a1f4811
 
 
47af600
a1f4811
47af600
a1f4811
 
 
 
47af600
 
a1f4811
 
47af600
 
a1f4811
 
47af600
 
a1f4811
47af600
a1f4811
47af600
a1f4811
 
47af600
 
a1f4811
 
47af600
 
a1f4811
47af600
 
a1f4811
 
47af600
 
a1f4811
 
 
 
 
47af600
 
a1f4811
47af600
 
 
 
 
 
 
a1f4811
47af600
 
a1f4811
 
 
 
47af600
 
 
a1f4811
 
 
 
47af600
 
a1f4811
 
 
47af600
a1f4811
47af600
a1f4811
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import streamlit as st
from groq import Groq
from PIL import Image
import os
from dotenv import load_dotenv
import base64
import io

# Load environment variables
load_dotenv()
api_key = os.getenv("GROQ_API_KEY")
client = Groq(api_key=api_key)

# Streamlit page configuration
st.set_page_config(
    page_title="Llama OCR",
    page_icon="πŸ¦™",
    layout="wide",
    initial_sidebar_state="expanded"
)

def main_content():
    st.title("πŸ¦™ Llama OCR")
    st.markdown('<p style="margin-top: -20px;">Extract structured text from images using Llama 3.2 Vision!</p>', unsafe_allow_html=True)
    st.markdown("---")

    col1, col2 = st.columns([6, 1])
    with col2:
        if st.button("Clear πŸ—‘οΈ"):
            if 'ocr_result' in st.session_state:
                del st.session_state['ocr_result']
            st.rerun()

    if 'ocr_result' in st.session_state:
        st.markdown("### 🎯 **Extracted Text**")
        st.markdown(st.session_state['ocr_result'], unsafe_allow_html=True)

def sidebar_content():
    with st.sidebar:
        st.header("πŸ“₯ Upload Image")
        
        if 'ocr_result' not in st.session_state:
            st.write("### Please upload an image to extract text.")
        
        uploaded_file = st.file_uploader("Choose an image...", type=['png', 'jpg', 'jpeg'])

        if uploaded_file:
            display_uploaded_image(uploaded_file)

        if uploaded_file and st.button("Extract Text πŸ”") and 'ocr_result' not in st.session_state:
            with st.spinner("Processing image... Please wait."):
                process_image(uploaded_file)

        if not uploaded_file and 'ocr_result' not in st.session_state:
            st.sidebar.empty()

def display_uploaded_image(uploaded_file):
    image = Image.open(uploaded_file)
    st.image(image, caption="Uploaded Image", use_container_width=True)

def encode_image(uploaded_file):
    image = Image.open(uploaded_file)
    buffered = io.BytesIO()
    image.save(buffered, format=image.format)
    img_byte_array = buffered.getvalue()
    return base64.b64encode(img_byte_array).decode('utf-8'), image.format

def process_image(uploaded_file):
    if uploaded_file:
        base64_image, image_format = encode_image(uploaded_file)
        mime_type = f"image/{image_format.lower()}"
        base64_url = f"data:{mime_type};base64,{base64_image}"

        with st.spinner("Generating response... This may take a moment."):
            try:
                response = client.chat.completions.create(
                    model="llama-3.2-11b-vision-preview",
                    messages=[
                        {
                            "role": "user",
                            "content": [
                                {"type": "text", "text": "Analyze the text in the provided image. Extract all readable content and present it in a structured Markdown format. Use headings, lists, or code blocks as appropriate for clarity and organization."},
                                {"type": "image_url", "image_url": {"url": base64_url}},
                            ]
                        }
                    ],
                    temperature=0.2,
                    max_tokens=200,
                    top_p=0.5,
                    stream=False
                )
                message_content = response.choices[0].message.content
                st.session_state['ocr_result'] = message_content
            except Exception as e:
                st.error(f"Error during text extraction: {e}")

# Corrected execution order: process sidebar first, then main content
if __name__ == "__main__":
    sidebar_content()
    main_content()