File size: 4,276 Bytes
ab36536 99d7b92 ab36536 99d7b92 ab36536 99d7b92 ab36536 99d7b92 ab36536 99d7b92 ab36536 99d7b92 ab36536 99d7b92 fb24c70 ab36536 26b9192 99d7b92 ab36536 99d7b92 ab36536 99d7b92 ab36536 99d7b92 ab36536 99d7b92 ab36536 99d7b92 ab36536 99d7b92 ab36536 99d7b92 ab36536 99d7b92 ab36536 99d7b92 ab36536 99d7b92 ab36536 fb24c70 99d7b92 ab36536 99d7b92 ab36536 99d7b92 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import os
def comprehensive_scroll(driver):
# Scroll until no more new content is loaded
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(3) # Wait for potential content loading
# Calculate new scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
# Check if bottom has been reached
if new_height == last_height:
break
last_height = new_height
def scrape_reviews(url):
"""URL'den yorumları çeken fonksiyon"""
# Data directory oluştur
data_directory = "data"
if not os.path.exists(data_directory):
os.makedirs(data_directory)
# Chrome options ayarları
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument("--window-size=1920,1080")
try:
# Linux için ChromeDriver ayarı
service = Service('chromedriver') # Linux'ta path belirtmeye gerek yok
driver = webdriver.Chrome(service=service, options=chrome_options)
driver.get(url)
# Çerez popup'ını kabul et
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.ID, 'onetrust-accept-btn-handler'))
).click()
comprehensive_scroll(driver)
comment_elements = driver.find_elements(By.XPATH, '/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div')
total_comments = len(comment_elements)
data = []
for i in range(1, total_comments + 1):
kullanıcı_id = i
try:
username_xpath = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[1]'
username = driver.find_element(By.XPATH, username_xpath).text
except:
username = "N/A"
try:
comment_xpath = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[2]/p'
comment = driver.find_element(By.XPATH, comment_xpath).text
except:
comment = "N/A"
try:
date_xpath = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[2]'
date = driver.find_element(By.XPATH, date_xpath).text
except:
date = "N/A"
star_xpath_base = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[1]/div'
try:
full_stars = driver.find_elements(By.XPATH, f"{star_xpath_base}/div[@class='star-w']/div[@class='full'][@style='width: 100%; max-width: 100%;']")
star_count = len(full_stars)
except:
star_count = 0
data.append({
"Kullanıcı_id": kullanıcı_id,
"Kullanıcı Adı": username,
"Yorum": comment,
"Tarih": date,
"Yıldız Sayısı": star_count
})
# Geçici dosya olarak kaydet
temp_file = os.path.join(data_directory, 'temp_comments.csv')
df = pd.DataFrame(data)
df.to_csv(temp_file, index=False, encoding='utf-8-sig')
return df
except Exception as e:
print(f"Hata oluştu: {str(e)}")
return pd.DataFrame() # Boş DataFrame döndür
finally:
driver.quit()
# Geçici dosyayı sil
if os.path.exists(os.path.join(data_directory, 'temp_comments.csv')):
os.remove(os.path.join(data_directory, 'temp_comments.csv')) |