File size: 4,950 Bytes
63b4f5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ff6909
63b4f5a
 
 
 
 
 
 
 
 
 
 
9ff6909
 
 
 
 
 
 
63b4f5a
 
 
 
 
9ff6909
 
 
 
 
 
63b4f5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ff6909
 
63b4f5a
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import os
import time

import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager

def scrape_comments(url):
    # Create data directory if it doesn't exist
    data_directory = "data"
    if not os.path.exists(data_directory):
        os.makedirs(data_directory)

    def comprehensive_scroll(driver):
        # Scroll until no more new content is loaded
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            # Scroll to bottom
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(3)  # Wait for potential content loading

            # Calculate new scroll height
            new_height = driver.execute_script("return document.body.scrollHeight")

            # Check if bottom has been reached
            if new_height == last_height:
                break

            last_height = new_height

    driver = None
    try:
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("--disable-notifications")
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--disable-gpu")
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--disable-dev-shm-usage")
        chrome_options.add_argument("--window-size=1920,1080")
        chrome_options.add_argument("--start-maximized")
        chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")

        # HuggingFace Spaces için özel ayarlar
        if os.getenv('SPACE_ID'):
            chrome_options.binary_location = "/usr/bin/google-chrome"
            service = ChromeService("/usr/local/bin/chromedriver")
        else:
            service = ChromeService(ChromeDriverManager().install())

        driver = webdriver.Chrome(service=service, options=chrome_options)
        driver.maximize_window()

        driver.get(url)

        try:
            WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
            ).click()
        except:
            pass  # Bazen cookie popup görünmeyebilir

        comprehensive_scroll(driver)

        comment_elements = driver.find_elements(
            By.XPATH,
            "/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div",
        )
        total_comments = len(comment_elements)

        data = []
        for i in range(1, total_comments + 1):
            kullanıcı_id = i
            try:
                username_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[1]"
                username = driver.find_element(By.XPATH, username_xpath).text
            except:
                username = "N/A"

            try:
                comment_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[2]/p"
                comment = driver.find_element(By.XPATH, comment_xpath).text
            except:
                comment = "N/A"

            try:
                date_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[2]"
                date = driver.find_element(By.XPATH, date_xpath).text
            except:
                date = "N/A"

            star_xpath_base = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[1]/div"
            try:
                full_stars = driver.find_elements(
                    By.XPATH,
                    f"{star_xpath_base}/div[@class='star-w']/div[@class='full'][@style='width: 100%; max-width: 100%;']",
                )
                star_count = len(full_stars)
            except:
                star_count = 0

            data.append(
                {
                    "Kullanıcı_id": kullanıcı_id,
                    "Kullanıcı Adı": username,
                    "Yorum": comment,
                    "Tarih": date,
                    "Yıldız Sayısı": star_count,
                }
            )

        df = pd.DataFrame(data)
        return df

    except Exception as e:
        print(f"Hata oluştu: {str(e)}")
        return None

    finally:
        if driver:
            driver.quit()

if __name__ == "__main__":
    # Test URL
    url = "https://www.trendyol.com/apple/macbook-air-m1-cip-8gb-256gb-ssd-macos-13-qhd-tasinabilir-bilgisayar-uzay-grisi-p-68042136/yorumlar"
    df = scrape_comments(url)
    if df is not None:
        print(f"Toplam {len(df)} yorum çekildi.")