Spaces:

Mohi7
/

Image-Categorise

Running

App Files Files Community

Image-Categorise / scrapping.py

Mohi7

Upload 18 files

c3d8a68 verified 15 days ago

raw

history blame

1.82 kB

	import os
	import time
	import requests
	from selenium import webdriver
	from selenium.webdriver.chrome.service import Service
	from selenium.webdriver.common.by import By
	from webdriver_manager.chrome import ChromeDriverManager
	from tqdm import tqdm # Progress bar

	# Setup Chrome Driver
	options = webdriver.ChromeOptions()
	options.add_argument("--headless") # Run in background
	options.add_argument("--disable-gpu") # Prevents rendering issues
	driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

	# Open Pexels search page
	search_url = "https://www.pexels.com/search/productivity/"
	driver.get(search_url)

	# Wait for images to load
	time.sleep(5)

	# Scroll down multiple times to load more images
	for _ in range(10):
	driver.execute_script("window.scrollBy(0, 2000);")
	time.sleep(2) # Wait for new images to load

	# Find all image elements
	images = driver.find_elements(By.TAG_NAME, "img")

	# Extract Image URLs
	image_urls = []
	for img in images:
	url = img.get_attribute("src")
	if url and "pexels.com" in url: # Ensure it's a valid image link
	image_urls.append(url)

	# Keep only the first 100 images
	image_urls = image_urls[:100]

	# Create folder if not exists
	save_folder = "Productivity"
	os.makedirs(save_folder, exist_ok=True)

	# Download and save images
	for idx, img_url in enumerate(tqdm(image_urls, desc="Downloading Images")):
	try:
	img_data = requests.get(img_url).content
	with open(os.path.join(save_folder, f"image_{idx+1}.jpg"), "wb") as f:
	f.write(img_data)
	except Exception as e:
	print(f"Error downloading image {idx+1}: {e}")

	# Close the browser
	driver.quit()

	print(f"✅ {len(image_urls)} images downloaded in the '{save_folder}' folder.")