sadhya / app.py
RatanPrakash's picture
Update app.py
3319386 verified
raw
history blame
16.6 kB
import streamlit as st
from ultralytics import YOLO
import tensorflow as tf # Change this to import TensorFlow
import numpy as np
from PIL import Image, ImageOps, ImageDraw, ImageFont
import pandas as pd
import time
from paddleocr import PaddleOCR, draw_ocr
import re
import dateparser
import os
import matplotlib.pyplot as plt
# Function to get Instagram post details
import instaloader
def get_instagram_post_details(post_url):
try:
shortcode = post_url.split('/')[-2]
post = instaloader.Post.from_shortcode(L.context, shortcode)
# Retrieve caption and image URL
caption = post.caption
image_url = post.url
return caption, image_url
except Exception as e:
return str(e), None
# Initialize PaddleOCR model
ocr = PaddleOCR(use_angle_cls=True, lang='en')
# Team details
team_members = [
{"name": "Aman Deep", "image": "aman.jpg"}, # Replace with actual paths to images
{"name": "Nandini", "image": "myimage.jpg"},
{"name": "Abhay Sharma", "image": "gaurav.jpg"},
{"name": "Ratan Prakash Mishra", "image": "anandimg.jpg"}
]
# Function to preprocess the images for the model
from PIL import Image
import numpy as np
def preprocess_image(image):
"""
Preprocess the input image for model prediction.
Args:
image (PIL.Image): Input image in PIL format.
Returns:
np.ndarray: Preprocessed image array ready for prediction.
"""
try:
# Resize image to match model input size
img = image.resize((128, 128), Image.LANCZOS) # Using LANCZOS filter for high-quality resizing
# Convert image to NumPy array
img_array = np.array(img)
# Check if the image is grayscale and convert to RGB if needed
if img_array.ndim == 2: # Grayscale image
img_array = np.stack([img_array] * 3, axis=-1) # Convert to 3-channel RGB
elif img_array.shape[2] == 1: # Single-channel image
img_array = np.concatenate([img_array, img_array, img_array], axis=-1) # Convert to RGB
# Normalize pixel values to [0, 1] range
img_array = img_array / 255.0
# Add batch dimension
img_array = np.expand_dims(img_array, axis=0) # Shape: (1, 128, 128, 3)
return img_array
except Exception as e:
print(f"Error processing image: {e}")
return None # Return None if there's an error
# Function to create a high-quality circular mask for an image
def make_image_circular1(img, size=(256, 256)):
img = img.resize(size, Image.LANCZOS)
mask = Image.new("L", size, 0)
draw = ImageDraw.Draw(mask)
draw.ellipse((0, 0) + size, fill=255)
output = ImageOps.fit(img, mask.size, centering=(0.5, 0.5))
output.putalpha(mask) # Apply the mask as transparency
return output
# Function to check if a file exists
def file_exists(file_path):
return os.path.isfile(file_path)
def make_image_circular(image):
# Create a circular mask
mask = Image.new("L", image.size, 0)
draw = ImageDraw.Draw(mask)
draw.ellipse((0, 0, image.size[0], image.size[1]), fill=255)
# Apply the mask to the image
circular_image = Image.new("RGB", image.size)
circular_image.paste(image.convert("RGBA"), (0, 0), mask)
return circular_image
# Function to extract dates from recognized text using regex
def extract_dates_with_dateparser(texts, result):
date_texts = []
date_boxes = []
date_scores = []
def is_potential_date(text):
valid_date_pattern = r'^(0[1-9]|[12][0-9]|3[01])[-/.]?(0[1-9]|1[0-2])[-/.]?(\d{2}|\d{4})$|' \
r'^(0[1-9]|[12][0-9]|3[01])[-/.]?[A-Za-z]{3}[-/.]?(\d{2}|\d{4})$|' \
r'^(0[1-9]|1[0-2])[-/.]?(\d{2}|\d{4})$|' \
r'^[A-Za-z]{3}[-/.]?(\d{2}|\d{4})$'
return bool(re.match(valid_date_pattern, text))
dates_found = []
for i, text in enumerate(texts):
if is_potential_date(text): # Only process texts that are potential dates
parsed_date = dateparser.parse(text, settings={'DATE_ORDER': 'DMY'})
if parsed_date:
dates_found.append(parsed_date.strftime('%Y-%m-%d')) # Store as 'YYYY-MM-DD'
date_texts.append(text) # Store the original text
date_boxes.append(result[0][i][0]) # Store the bounding box
date_scores.append(result[0][i][1][1]) # Store confidence score
return dates_found, date_texts, date_boxes, date_scores
# Function to display circular images in a matrix format
def display_images_in_grid(images, max_images_per_row=4):
num_images = len(images)
num_rows = (num_images + max_images_per_row - 1) // max_images_per_row # Calculate number of rows
for i in range(num_rows):
cols = st.columns(min(max_images_per_row, num_images - i * max_images_per_row))
for j, img in enumerate(images[i * max_images_per_row:(i + 1) * max_images_per_row]):
with cols[j]:
st.image(img, use_column_width=True)
# Function to display team members in circular format
def display_team_members(members, max_members_per_row=4):
num_members = len(members)
num_rows = (num_members + max_members_per_row - 1) // max_members_per_row # Calculate number of rows
for i in range(num_rows):
cols = st.columns(min(max_members_per_row, num_members - i * max_members_per_row))
for j, member in enumerate(members[i * max_members_per_row:(i + 1) * max_members_per_row]):
with cols[j]:
img = Image.open(member["image"]) # Load the image
circular_img = make_image_circular(img) # Convert to circular format
st.image(circular_img, use_column_width=True) # Display the circular image
st.write(member["name"]) # Display the name below the image
# Title and description
st.title("Amazon Smbhav")
# Team Details with links
st.sidebar.title("Amazon Smbhav")
st.sidebar.write("DELHI TECHNOLOGICAL UNIVERSITY")
# Navbar with task tabs
st.sidebar.title("Navigation")
st.sidebar.write("Team Name: sadhya")
app_mode = st.sidebar.selectbox("Choose the task", ["Welcome","Project Details", "Task 1","Team Details"])
if app_mode == "Welcome":
# Navigation Menu
st.write("# Welcome to Amazon Smbhav! 🎉")
# Example for adding a local video
video_file = open('Finalist.mp4', 'rb') # Replace with the path to your video file
video_bytes = video_file.read()
# Embed the video using st.video()
st.video(video_bytes)
# Add a welcome image
welcome_image = Image.open("grid_banner.jpg") # Replace with the path to your welcome image
st.image(welcome_image, use_column_width=True) # Display the welcome image
elif app_mode=="Project Details":
st.markdown("""
## Navigation
- [Project Overview](#project-overview)
- [Proposal Round](#proposal-round)
- [Problem Statement](#problem-statement)
- [Proposed Solution](#proposed-solution)
""")
# Project Overview
st.write("## Project Overview:")
st.write("""
### Problem Statement
_Develop a system that automates Amazon product listings from social media content, extracting and organizing details from posts to generate accurate, engaging, and optimized listings._
---
### Solution Overview
Our system simplifies the listing process by analyzing social media content, using OCR, image recognition, LLMs, and internet data to create professional Amazon listings.
---
### Task Breakdown
#### Task 1: OCR for Image and Label Details
**Objective:** Extract core product details from images, labels, and packaging found in social media posts.
- **Tools:** PaddleOCR, LLMs.
- **Approach:**
- Use PaddleOCR to scan images for text, identifying product names, brands, and key features.
- Apply LLMs to refine extracted data, categorize key information (product name, type, features), and enhance product descriptions.
- Integrate internet sources to cross-verify product details, retrieve additional information, and collect metadata like the brand background or product specs.
---
#### Additional Task: Image Recognition & Object Counting
**Objective:** Quantify objects within social media images for batch products or multi-item listings.
- **Tools:** YOLOv8.
- **Approach:**
- Train YOLOv8 on a relevant dataset to recognize specific product types or packaging layouts.
- Use object detection counts to provide quantitative data (e.g., "3-item bundle"), enhancing accuracy in listings.
---
#### Task 2: Data Validation & Structuring
**Objective:** Organize and validate extracted information, ensuring it’s formatted to meet Amazon’s listing requirements.
- **Tools:** Regex, LLMs.
- **Approach:**
- Format and validate extracted details into Amazon-compliant structures (titles, descriptions, bullet points).
- Use regex and parser tools for accuracy checks.
- Leverage LLMs to create compelling descriptions and marketing brochures.
- Search online for supplementary media (images/videos) to enrich the listing.
---
#### Task 3: Amazon API Integration
**Objective:** Connect with Amazon’s API to publish fully formed product listings directly.
- **Tools:** Amazon MWS or Selling Partner API.
- **Approach:**
- Send structured listing data (text, media, product details) to Amazon’s API endpoints.
- Handle feedback for submission errors and make necessary adjustments.
- Develop a UI/dashboard for users to preview and edit listings before publishing.
---
### Future Enhancements
- **Model Improvement:** Further refine OCR and parsing accuracy.
- **Dashboard Development:** Enable users to preview and customize listings.
- **Multi-Market Compatibility:** Expand support to other e-commerce platforms.
This approach automates listing creation directly from social media content, helping sellers quickly launch optimized Amazon product pages.
""")
elif app_mode == "Team Details":
st.write("## Meet Our Team:")
display_team_members(team_members)
st.write("Delhi Technological University")
elif app_mode == "Task 1":
st.write("## Task 1: 🖼️ OCR to Extract Details 📄")
st.write("Using OCR to extract details from product packaging material, including brand name and pack size.")
# Instantiate Instaloader
L = instaloader.Instaloader()
# Streamlit UI
st.title("Instagram Post Details Extractor")
# Text input for Instagram post URL
post_url = st.text_input("Enter Instagram Post URL:")
if post_url:
caption, image_path = get_instagram_post_details(post_url)
if image_path and os.path.exists(image_path):
st.subheader("Caption:")
st.write(caption)
st.subheader("Image:")
# Load and display the image
image = Image.open(image_path)
st.image(image, use_column_width=True)
# Clean up (optional)
os.remove(image_path)
else:
st.error("Failed to retrieve the post details. Please check the URL.")
# File uploader for images (supports multiple files)
uploaded_files = st.file_uploader("Upload images of products", type=["jpeg", "png", "jpg"], accept_multiple_files=True)
if uploaded_files:
st.write("### Uploaded Images in Circular Format:")
circular_images = []
for uploaded_file in uploaded_files:
img = Image.open(uploaded_file)
circular_img = make_image_circular(img) # Create circular images
circular_images.append(circular_img)
# Display the circular images in a matrix/grid format
display_images_in_grid(circular_images, max_images_per_row=4)
# Function to simulate loading process with a progress bar
def simulate_progress():
progress_bar = st.progress(0)
for percent_complete in range(100):
time.sleep(0.02)
progress_bar.progress(percent_complete + 1)
# Function to remove gibberish using regex (removes non-alphanumeric chars, filters out very short text)
def clean_text(text):
# Keep text with letters, digits, and spaces, and remove short/irrelevant text
return re.sub(r'[^a-zA-Z0-9\s]', '', text).strip()
# Function to extract the most prominent text (product name) and other details
def extract_product_info(results):
product_name = ""
product_details = ""
largest_text_size = 0
for line in results:
for box in line:
text, confidence = box[1][0], box[1][1]
text_size = box[0][2][1] - box[0][0][1] # Calculate height of the text box
# Clean the text to avoid gibberish
clean_text_line = clean_text(text)
if confidence > 0.7 and len(clean_text_line) > 2: # Only consider confident, meaningful text
if text_size > largest_text_size: # Assume the largest text is the product name
largest_text_size = text_size
product_name = clean_text_line
else:
product_details += clean_text_line + " "
return product_name, product_details.strip()
if st.button("Start Analysis"):
simulate_progress()
# Loop through each uploaded image and process them
for uploaded_image in uploaded_files:
# Load the uploaded image
image = Image.open(uploaded_image)
# st.image(image, caption=f'Uploaded Image: {uploaded_image.name}', use_column_width=True)
# Convert image to numpy array for OCR processing
img_array = np.array(image)
# Perform OCR on the image
st.write(f"Extracting details from {uploaded_image.name}...")
result = ocr.ocr(img_array, cls=True)
#############################
#OCR result text to be parsed here through LLM and get product listing content.
#############################
# Process the OCR result to extract product name and properties
product_name, product_details = extract_product_info(result)
# UI display for single image product details
st.markdown("---")
st.markdown(f"### **Product Name:** `{product_name}`")
st.write(f"**Product Properties:** {product_details}")
st.markdown("---")
else:
st.write("Please upload images to extract product details.")
def make_image_circular1(image):
# Create a circular mask
mask = Image.new("L", image.size, 0)
draw = ImageDraw.Draw(mask)
draw.ellipse((0, 0, image.size[0], image.size[1]), fill=255)
# Apply the mask to the image
circular_image = Image.new("RGB", image.size)
circular_image.paste(image.convert("RGBA"), (0, 0), mask)
return circular_image
def display_images_in_grid1(images, max_images_per_row=4):
rows = (len(images) + max_images_per_row - 1) // max_images_per_row # Calculate number of rows needed
for i in range(0, len(images), max_images_per_row):
cols_to_show = images[i:i + max_images_per_row]
# Prepare to display in a grid format
cols = st.columns(max_images_per_row) # Create columns dynamically
for idx, img in enumerate(cols_to_show):
img = img.convert("RGB") # Ensure the image is in RGB mode
if idx < len(cols):
cols[idx].image(img, use_column_width=True)
# Footer with animation
st.markdown("""
<style>
@keyframes fade-in {
from { opacity: 0; }
to { opacity: 1;}
}
.footer {
text-align: center;
font-size: 1.1em;
animation: fade-in 2s;
padding-top: 2rem;
}
</style>
<div class="footer">
<p>© 2024 Amazon Smbhav Challenge. All rights reserved.</p>
</div>
""", unsafe_allow_html=True)