File size: 4,159 Bytes
ab36536
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb24c70
 
ab36536
 
 
 
 
 
 
 
 
 
 
fb24c70
 
ab36536
 
 
fb24c70
 
ab36536
 
 
fb24c70
ab36536
 
fb24c70
ab36536
 
fb24c70
 
 
ab36536
fb24c70
 
 
ab36536
 
 
 
 
 
fb24c70
ab36536
 
fb24c70
ab36536
fb24c70
 
ab36536
 
 
 
fb24c70
 
ab36536
 
 
 
fb24c70
 
ab36536
 
 
 
fb24c70
 
 
ab36536
fb24c70
ab36536
 
 
 
 
 
fb24c70
ab36536
fb24c70
ab36536
fb24c70
ab36536
 
 
 
 
 
 
 
 
9cbdd01
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time
import pandas as pd
import os
import random

def scrape_reviews(url):
    # Create data directory if it doesn't exist
    data_directory = "data"
    if not os.path.exists(data_directory):
        os.makedirs(data_directory)

    def comprehensive_scroll(driver):
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(3)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

    # Chrome ayarları
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--disable-gpu')
    chrome_options.add_argument('--lang=tr')
    chrome_options.add_argument('--disable-notifications')
    chrome_options.add_argument("--window-size=1920,1080")
    
    try:
        # Linux için ChromeDriver ayarları
        service = Service()
        driver = webdriver.Chrome(service=service, options=chrome_options)
        
        driver.get(url)
        time.sleep(3)
        
        try:
            WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.ID, 'onetrust-accept-btn-handler'))
            ).click()
        except:
            print("Çerez popup'ı bulunamadı, devam ediliyor...")

        comprehensive_scroll(driver)

        # İlk çalışan xpath'leri kullanalım
        comment_elements = driver.find_elements(By.XPATH, '/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div')
        total_comments = len(comment_elements)
        
        if total_comments == 0:
            print("Yorum elementi bulunamadı!")
            return pd.DataFrame()
            
        print(f"Toplam {total_comments} yorum bulundu")
        
        data = []
        for i in range(1, total_comments + 1):
            try:
                username_xpath = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[1]'
                username = driver.find_element(By.XPATH, username_xpath).text
            except:
                username = "N/A"

            try:
                comment_xpath = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[2]/p'
                comment = driver.find_element(By.XPATH, comment_xpath).text
            except:
                comment = "N/A"

            try:
                date_xpath = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[2]'
                date = driver.find_element(By.XPATH, date_xpath).text
            except:
                date = "N/A"

            try:
                star_xpath_base = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[1]/div'
                full_stars = driver.find_elements(By.XPATH, f"{star_xpath_base}/div[@class='star-w']/div[@class='full'][@style='width: 100%; max-width: 100%;']")
                star_count = len(full_stars)
            except:
                star_count = 0

            data.append({
                "Kullanıcı_id": i,
                "Kullanıcı Adı": username,
                "Yorum": comment,
                "Tarih": date,
                "Yıldız Sayısı": star_count
            })

            if i % 5 == 0:
                print(f"{i} yorum işlendi...")

        return pd.DataFrame(data)

    except Exception as e:
        print(f"Hata detayı: {str(e)}")
        return pd.DataFrame()

    finally:
        if 'driver' in locals():
            driver.quit()