File size: 4,276 Bytes
ab36536
 
 
 
 
 
 
 
99d7b92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab36536
 
99d7b92
 
ab36536
 
 
 
99d7b92
 
ab36536
99d7b92
ab36536
 
 
99d7b92
ab36536
99d7b92
 
ab36536
 
 
 
99d7b92
 
 
 
fb24c70
ab36536
26b9192
99d7b92
ab36536
99d7b92
ab36536
99d7b92
 
ab36536
99d7b92
 
ab36536
 
 
 
99d7b92
 
ab36536
 
 
 
99d7b92
 
ab36536
 
 
99d7b92
ab36536
99d7b92
 
ab36536
99d7b92
ab36536
 
99d7b92
ab36536
 
 
99d7b92
ab36536
fb24c70
99d7b92
 
 
 
 
 
ab36536
 
99d7b92
 
 
ab36536
99d7b92
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import os

def comprehensive_scroll(driver):
    # Scroll until no more new content is loaded
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        # Scroll to bottom
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Wait for potential content loading
        
        # Calculate new scroll height
        new_height = driver.execute_script("return document.body.scrollHeight")
        
        # Check if bottom has been reached
        if new_height == last_height:
            break
        
        last_height = new_height

def scrape_reviews(url):
    """URL'den yorumları çeken fonksiyon"""
    # Data directory oluştur
    data_directory = "data"
    if not os.path.exists(data_directory):
        os.makedirs(data_directory)

    # Chrome options ayarları
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument("--window-size=1920,1080")

    try:
        # Linux için ChromeDriver ayarı
        service = Service('chromedriver')  # Linux'ta path belirtmeye gerek yok
        driver = webdriver.Chrome(service=service, options=chrome_options)
        
        driver.get(url)
        
        # Çerez popup'ını kabul et
        WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.ID, 'onetrust-accept-btn-handler'))
        ).click()

        comprehensive_scroll(driver)
        
        comment_elements = driver.find_elements(By.XPATH, '/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div')
        total_comments = len(comment_elements)

        data = []
        for i in range(1, total_comments + 1):
            kullanıcı_id = i
            try:
                username_xpath = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[1]'
                username = driver.find_element(By.XPATH, username_xpath).text
            except:
                username = "N/A"

            try:
                comment_xpath = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[2]/p'
                comment = driver.find_element(By.XPATH, comment_xpath).text
            except:
                comment = "N/A"

            try:
                date_xpath = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[2]'
                date = driver.find_element(By.XPATH, date_xpath).text
            except:
                date = "N/A"

            star_xpath_base = f'/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[1]/div'
            try:
                full_stars = driver.find_elements(By.XPATH, f"{star_xpath_base}/div[@class='star-w']/div[@class='full'][@style='width: 100%; max-width: 100%;']")
                star_count = len(full_stars)
            except:
                star_count = 0

            data.append({
                "Kullanıcı_id": kullanıcı_id,
                "Kullanıcı Adı": username,
                "Yorum": comment,
                "Tarih": date,
                "Yıldız Sayısı": star_count
            })

        # Geçici dosya olarak kaydet
        temp_file = os.path.join(data_directory, 'temp_comments.csv')
        df = pd.DataFrame(data)
        df.to_csv(temp_file, index=False, encoding='utf-8-sig')
        
        return df

    except Exception as e:
        print(f"Hata oluştu: {str(e)}")
        return pd.DataFrame()  # Boş DataFrame döndür
        
    finally:
        driver.quit()
        # Geçici dosyayı sil
        if os.path.exists(os.path.join(data_directory, 'temp_comments.csv')):
            os.remove(os.path.join(data_directory, 'temp_comments.csv'))