|
import os |
|
import logging |
|
from bs4 import BeautifulSoup |
|
import requests |
|
from openpyxl import Workbook |
|
from app.models.product import Product |
|
from app.services.scraping_service import ScrapingService |
|
from app.utils.logger import logger |
|
|
|
logger.info("Starting scraping process") |
|
|
|
|
|
scraping_service = ScrapingService() |
|
|
|
|
|
websites = ["https://example.com", "https://example.net"] |
|
|
|
|
|
for website in websites: |
|
logger.info(f"Scraping {website}") |
|
html = requests.get(website).text |
|
soup = BeautifulSoup(html, "html.parser") |
|
products = [] |
|
for product in soup.find_all("div", {"class": "product"}): |
|
product_name = product.find("h2", {"class": "product-name"}).text.strip() |
|
product_price = product.find("span", {"class": "product-price"}).text.strip() |
|
products.append(Product(product_name, product_price)) |
|
logger.info(f"Found {len(products)} products on {website}") |
|
|
|
wb = Workbook() |
|
ws = wb.active |
|
ws.title = "Products" |
|
ws.append(["Product Name", "Product Price"]) |
|
for product in products: |
|
ws.append([product.name, product.price]) |
|
wb.save(f"{website}.xlsx") |
|
logger.info(f"Saved products to {website}.xlsx") |