|
import streamlit as st |
|
import os |
|
import glob |
|
import base64 |
|
import json |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
import matplotlib.image as mpimg |
|
from langchain_openai import ChatOpenAI |
|
from langchain_core.pydantic_v1 import BaseModel, Field |
|
from langchain_core.messages import HumanMessage, SystemMessage |
|
from langchain_core.output_parsers import JsonOutputParser |
|
from langchain_core.runnables import chain |
|
from PIL import Image as PILImage |
|
from io import BytesIO |
|
|
|
|
|
st.title("Vehicle Information Extraction from Images") |
|
|
|
translateimg = PILImage.open("car.JPG") |
|
st.image(translateimg, use_container_width=True) |
|
|
|
|
|
openai_api_key = st.text_input("Enter your OpenAI API Key:", type="password") |
|
|
|
|
|
if openai_api_key: |
|
os.environ["OPENAI_API_KEY"] = openai_api_key |
|
|
|
|
|
class Vehicle(BaseModel): |
|
Type: str = Field(..., examples=["Car", "Truck", "Motorcycle", 'Bus', 'Van'], description="The type of the vehicle.") |
|
License: str = Field(..., description="The license plate number of the vehicle.") |
|
Make: str = Field(..., examples=["Toyota", "Honda", "Ford", "Suzuki"], description="The Make of the vehicle.") |
|
Model: str = Field(..., examples=["Corolla", "Civic", "F-150"], description="The Model of the vehicle.") |
|
Color: str = Field(..., example=["Red", "Blue", "Black", "White"], description="Return the color of the vehicle.") |
|
Year: str = Field(None, description="The year of the vehicle.") |
|
Condition: str = Field(None, description="The condition of the vehicle.") |
|
Logo: str = Field(None, description="The visible logo of the vehicle, if applicable.") |
|
Damage: str = Field(None, description="Any visible damage or wear and tear on the vehicle.") |
|
Region: str = Field(None, description="Region or country based on the license plate or clues from the image.") |
|
PlateType: str = Field(None, description="Type of license plate, e.g., government, personal.") |
|
|
|
|
|
parser = JsonOutputParser(pydantic_object=Vehicle) |
|
instructions = parser.get_format_instructions() |
|
|
|
|
|
def image_encoding(inputs): |
|
"""Load and convert image to base64 encoding""" |
|
with open(inputs["image_path"], "rb") as image_file: |
|
image_base64 = base64.b64encode(image_file.read()).decode("utf-8") |
|
return {"image": image_base64} |
|
|
|
|
|
def display_image_grid(image_paths, rows=2, cols=3, figsize=(10, 7)): |
|
fig = plt.figure(figsize=figsize) |
|
max_images = rows * cols |
|
image_paths = image_paths[:max_images] |
|
|
|
for idx, path in enumerate(image_paths): |
|
ax = fig.add_subplot(rows, cols, idx + 1) |
|
img = mpimg.imread(path) |
|
ax.imshow(img) |
|
ax.axis('off') |
|
filename = path.split('/')[-1] |
|
ax.set_title(filename) |
|
|
|
plt.tight_layout() |
|
st.pyplot(fig) |
|
|
|
|
|
@chain |
|
def prompt(inputs): |
|
prompt = [ |
|
SystemMessage(content="""You are an AI assistant tasked with extracting detailed information from a vehicle image. Please extract the following details: |
|
- Vehicle type (e.g., Car, Truck, Bus) |
|
- License plate number and type (if identifiable, such as personal, commercial, government) |
|
- Vehicle make, model, and year (e.g., 2020 Toyota Corolla) |
|
- Vehicle color and condition (e.g., Red, well-maintained, damaged) |
|
- Any visible brand logos or distinguishing marks (e.g., Tesla logo) |
|
- Details of any visible damage (e.g., scratches, dents) |
|
- Vehicle’s region or country (based on the license plate or other clues) |
|
If some details are unclear or not visible, return `None` for those fields. Do not guess or provide inaccurate information."""), |
|
HumanMessage( |
|
content=[ |
|
{"type": "text", "text": "Analyze the vehicle in the image and extract as many details as possible, including type, license plate, make, model, year, condition, damage, etc."}, |
|
{"type": "text", "text": instructions}, |
|
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{inputs['image']}", "detail": "low"}} |
|
] |
|
) |
|
] |
|
return prompt |
|
|
|
|
|
@chain |
|
def MLLM_response(inputs): |
|
model: ChatOpenAI = ChatOpenAI(model="gpt-4o-2024-08-06", temperature=0.0, max_tokens=1024) |
|
output = model.invoke(inputs) |
|
return output.content |
|
|
|
|
|
pipeline = image_encoding | prompt | MLLM_response | parser |
|
|
|
|
|
st.header("Upload Vehicle Images for Information Extraction") |
|
|
|
|
|
upload_option = st.radio("Select Upload Type", ["Single Image Upload", "Batch Images Upload"]) |
|
|
|
|
|
if upload_option == "Single Image Upload": |
|
st.subheader("Upload a Single Vehicle Image") |
|
uploaded_image = st.file_uploader("Choose an Image (JPEG, PNG, GIF, BMP, etc.)", type=["jpeg", "png", "gif", "bmp", "jpg"]) |
|
|
|
if uploaded_image is not None: |
|
|
|
image = PILImage.open(uploaded_image) |
|
st.image(image, caption="Uploaded Image", use_container_width=True) |
|
|
|
|
|
image_path = "/tmp/uploaded_image" + os.path.splitext(uploaded_image.name)[1] |
|
with open(image_path, "wb") as f: |
|
f.write(uploaded_image.getbuffer()) |
|
|
|
|
|
if st.button("Extract Vehicle Information"): |
|
|
|
output = pipeline.invoke({"image_path": image_path}) |
|
|
|
|
|
st.subheader("Extracted Vehicle Information") |
|
st.json(output) |
|
|
|
|
|
elif upload_option == "Batch Images Upload": |
|
st.sidebar.header("Batch Image Upload") |
|
batch_images = st.sidebar.file_uploader("Upload Images (JPEG, PNG, GIF, BMP, etc.)", type=["jpeg", "png", "gif", "bmp", "jpg"], accept_multiple_files=True) |
|
|
|
if batch_images: |
|
batch_input = [{"image_path": f"/tmp/{file.name}"} for file in batch_images] |
|
for file in batch_images: |
|
with open(f"/tmp/{file.name}", "wb") as f: |
|
f.write(file.getbuffer()) |
|
|
|
|
|
if st.button("Extract Vehicle Information from Batch"): |
|
|
|
batch_output = pipeline.batch(batch_input) |
|
df = pd.DataFrame(batch_output) |
|
st.dataframe(df) |
|
|
|
|
|
image_paths = [f"/tmp/{file.name}" for file in batch_images] |
|
display_image_grid(image_paths) |