File size: 5,535 Bytes
63b4f5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05129a0
 
3bba463
63b4f5a
9ff6909
3bba463
05129a0
3bba463
05129a0
3bba463
 
 
05129a0
3bba463
05129a0
63b4f5a
05129a0
 
 
63b4f5a
05129a0
63b4f5a
05129a0
 
 
63b4f5a
05129a0
 
63b4f5a
05129a0
63b4f5a
05129a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63b4f5a
05129a0
 
 
 
63b4f5a
05129a0
63b4f5a
05129a0
63b4f5a
 
 
05129a0
 
 
 
 
63b4f5a
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import os
import time

import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager

def scrape_comments(url):
    # Create data directory if it doesn't exist
    data_directory = "data"
    if not os.path.exists(data_directory):
        os.makedirs(data_directory)

    def comprehensive_scroll(driver):
        # Scroll until no more new content is loaded
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            # Scroll to bottom
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(3)  # Wait for potential content loading

            # Calculate new scroll height
            new_height = driver.execute_script("return document.body.scrollHeight")

            # Check if bottom has been reached
            if new_height == last_height:
                break

            last_height = new_height

    try:
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("--disable-notifications")
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--disable-gpu")
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--disable-dev-shm-usage")
        chrome_options.add_argument("--window-size=1920,1080")
        chrome_options.add_argument("--disable-setuid-sandbox")
        chrome_options.add_argument("--remote-debugging-port=9222")
        chrome_options.binary_location = "/usr/bin/google-chrome"  # Chrome'un yolu

        try:
            driver = webdriver.Chrome(options=chrome_options)
        except Exception as e:
            print(f"Chrome initialization failed: {str(e)}")
            try:
                # Alternatif olarak undetected-chromedriver'ı deneyin
                import undetected_chromedriver as uc
                driver = uc.Chrome(options=chrome_options)
            except Exception as e:
                print(f"Undetected chromedriver failed: {str(e)}")
                return None

        try:
            driver.maximize_window()
            driver.get(url)

            # Cookie popup'ı kapatmayı dene, başarısız olursa devam et
            try:
                WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
                ).click()
            except:
                print("Cookie popup not found or couldn't be closed")
                pass

            comprehensive_scroll(driver)

            comment_elements = driver.find_elements(
                By.XPATH,
                "/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div",
            )
            total_comments = len(comment_elements)

            data = []
            for i in range(1, total_comments + 1):
                kullanıcı_id = i
                try:
                    username_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[1]"
                    username = driver.find_element(By.XPATH, username_xpath).text
                except:
                    username = "N/A"

                try:
                    comment_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[2]/p"
                    comment = driver.find_element(By.XPATH, comment_xpath).text
                except:
                    comment = "N/A"

                try:
                    date_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[2]"
                    date = driver.find_element(By.XPATH, date_xpath).text
                except:
                    date = "N/A"

                star_xpath_base = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[1]/div"
                try:
                    full_stars = driver.find_elements(
                        By.XPATH,
                        f"{star_xpath_base}/div[@class='star-w']/div[@class='full'][@style='width: 100%; max-width: 100%;']",
                    )
                    star_count = len(full_stars)
                except:
                    star_count = 0

                data.append(
                    {
                        "Kullanıcı_id": kullanıcı_id,
                        "Kullanıcı Adı": username,
                        "Yorum": comment,
                        "Tarih": date,
                        "Yıldız Sayısı": star_count,
                    }
                )

            df = pd.DataFrame(data)
            return df

        finally:
            try:
                driver.quit()
            except:
                pass

    except Exception as e:
        print(f"Hata oluştu: {str(e)}")
        try:
            if 'driver' in locals():
                driver.quit()
        except:
            pass
        return None

if __name__ == "__main__":
    # Test URL
    url = "https://www.trendyol.com/apple/macbook-air-m1-cip-8gb-256gb-ssd-macos-13-qhd-tasinabilir-bilgisayar-uzay-grisi-p-68042136/yorumlar"
    df = scrape_comments(url)
    if df is not None:
        print(f"Toplam {len(df)} yorum çekildi.")