File size: 4,159 Bytes
ab36536 fb24c70 ab36536 fb24c70 ab36536 fb24c70 ab36536 fb24c70 ab36536 fb24c70 ab36536 fb24c70 ab36536 fb24c70 ab36536 fb24c70 ab36536 fb24c70 ab36536 fb24c70 ab36536 fb24c70 ab36536 fb24c70 ab36536 fb24c70 ab36536 fb24c70 ab36536 fb24c70 ab36536 fb24c70 ab36536 fb24c70 ab36536 9cbdd01 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time
import pandas as pd
import os
import random
def scrape_reviews(url):
# Create data directory if it doesn't exist
data_directory = "data"
if not os.path.exists(data_directory):
os.makedirs(data_directory)
def comprehensive_scroll(driver):
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(3)
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
# Chrome ayarları
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--lang=tr')
chrome_options.add_argument('--disable-notifications')
chrome_options.add_argument("--window-size=1920,1080")
try:
# Linux için ChromeDriver ayarları
service = Service()
driver = webdriver.Chrome(service=service, options=chrome_options)
driver.get(url)
time.sleep(3)
try:
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.ID, 'onetrust-accept-btn-handler'))
).click()
except:
print("Çerez popup'ı bulunamadı, devam ediliyor...")
comprehensive_scroll(driver)
# İlk çalışan xpath'leri kullanalım
comment_elements = driver.find_elements(By.XPATH, '/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div')
total_comments = len(comment_elements)
if total_comments == 0:
print("Yorum elementi bulunamadı!")
return pd.DataFrame()
print(f"Toplam {total_comments} yorum bulundu")
data = []
for i in range(1, total_comments + 1):
try:
username_xpath = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[1]'
username = driver.find_element(By.XPATH, username_xpath).text
except:
username = "N/A"
try:
comment_xpath = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[2]/p'
comment = driver.find_element(By.XPATH, comment_xpath).text
except:
comment = "N/A"
try:
date_xpath = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[2]'
date = driver.find_element(By.XPATH, date_xpath).text
except:
date = "N/A"
try:
star_xpath_base = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[1]/div'
full_stars = driver.find_elements(By.XPATH, f"{star_xpath_base}/div[@class='star-w']/div[@class='full'][@style='width: 100%; max-width: 100%;']")
star_count = len(full_stars)
except:
star_count = 0
data.append({
"Kullanıcı_id": i,
"Kullanıcı Adı": username,
"Yorum": comment,
"Tarih": date,
"Yıldız Sayısı": star_count
})
if i % 5 == 0:
print(f"{i} yorum işlendi...")
return pd.DataFrame(data)
except Exception as e:
print(f"Hata detayı: {str(e)}")
return pd.DataFrame()
finally:
if 'driver' in locals():
driver.quit() |