File size: 5,535 Bytes
63b4f5a 05129a0 3bba463 63b4f5a 9ff6909 3bba463 05129a0 3bba463 05129a0 3bba463 05129a0 3bba463 05129a0 63b4f5a 05129a0 63b4f5a 05129a0 63b4f5a 05129a0 63b4f5a 05129a0 63b4f5a 05129a0 63b4f5a 05129a0 63b4f5a 05129a0 63b4f5a 05129a0 63b4f5a 05129a0 63b4f5a 05129a0 63b4f5a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import os
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
def scrape_comments(url):
# Create data directory if it doesn't exist
data_directory = "data"
if not os.path.exists(data_directory):
os.makedirs(data_directory)
def comprehensive_scroll(driver):
# Scroll until no more new content is loaded
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(3) # Wait for potential content loading
# Calculate new scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
# Check if bottom has been reached
if new_height == last_height:
break
last_height = new_height
try:
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("--disable-setuid-sandbox")
chrome_options.add_argument("--remote-debugging-port=9222")
chrome_options.binary_location = "/usr/bin/google-chrome" # Chrome'un yolu
try:
driver = webdriver.Chrome(options=chrome_options)
except Exception as e:
print(f"Chrome initialization failed: {str(e)}")
try:
# Alternatif olarak undetected-chromedriver'ı deneyin
import undetected_chromedriver as uc
driver = uc.Chrome(options=chrome_options)
except Exception as e:
print(f"Undetected chromedriver failed: {str(e)}")
return None
try:
driver.maximize_window()
driver.get(url)
# Cookie popup'ı kapatmayı dene, başarısız olursa devam et
try:
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
).click()
except:
print("Cookie popup not found or couldn't be closed")
pass
comprehensive_scroll(driver)
comment_elements = driver.find_elements(
By.XPATH,
"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div",
)
total_comments = len(comment_elements)
data = []
for i in range(1, total_comments + 1):
kullanıcı_id = i
try:
username_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[1]"
username = driver.find_element(By.XPATH, username_xpath).text
except:
username = "N/A"
try:
comment_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[2]/p"
comment = driver.find_element(By.XPATH, comment_xpath).text
except:
comment = "N/A"
try:
date_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[2]"
date = driver.find_element(By.XPATH, date_xpath).text
except:
date = "N/A"
star_xpath_base = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[1]/div"
try:
full_stars = driver.find_elements(
By.XPATH,
f"{star_xpath_base}/div[@class='star-w']/div[@class='full'][@style='width: 100%; max-width: 100%;']",
)
star_count = len(full_stars)
except:
star_count = 0
data.append(
{
"Kullanıcı_id": kullanıcı_id,
"Kullanıcı Adı": username,
"Yorum": comment,
"Tarih": date,
"Yıldız Sayısı": star_count,
}
)
df = pd.DataFrame(data)
return df
finally:
try:
driver.quit()
except:
pass
except Exception as e:
print(f"Hata oluştu: {str(e)}")
try:
if 'driver' in locals():
driver.quit()
except:
pass
return None
if __name__ == "__main__":
# Test URL
url = "https://www.trendyol.com/apple/macbook-air-m1-cip-8gb-256gb-ssd-macos-13-qhd-tasinabilir-bilgisayar-uzay-grisi-p-68042136/yorumlar"
df = scrape_comments(url)
if df is not None:
print(f"Toplam {len(df)} yorum çekildi.")
|