Image-Categorise / download_images.py
Mohi7's picture
Upload 18 files
c3d8a68 verified
import pandas as pd
import requests
import os
file_path = "open-images-dataset-train0.tsv"
# Read TSV file, skipping the first row
df = pd.read_csv(file_path, sep="\t", engine="python", skiprows=1, names=["ImageURL", "Subset", "ImageID"])
# Print first few rows to verify
print("First few rows of the cleaned dataset:")
print(df.head())
# Create a fixed category folder (since 'Subset' contains numbers, not real categories)
output_folder = "open_images_v7/dataset"
os.makedirs(output_folder, exist_ok=True)
# Limit downloads to the first 100 images
max_images = 100
for index, row in df.iterrows():
if index >= max_images:
break # Stop downloading after 100 images
image_url = row["ImageURL"]
image_id = row["ImageID"]
# Ensure the image filename ends with ".jpg"
image_path = os.path.join(output_folder, f"{image_id}.jpg")
try:
response = requests.get(image_url, timeout=10)
if response.status_code == 200:
with open(image_path, "wb") as f:
f.write(response.content)
print(f"βœ… Downloaded: {image_id}.jpg")
else:
print(f"❌ Failed: {image_id}")
except Exception as e:
print(f"❌ Error downloading {image_id}: {e}")