Upload trendyol_scraper_origin.py
Browse files
scrape/trendyol_scraper_origin.py
CHANGED
@@ -32,7 +32,6 @@ def scrape_comments(url):
|
|
32 |
|
33 |
last_height = new_height
|
34 |
|
35 |
-
driver = None
|
36 |
try:
|
37 |
chrome_options = webdriver.ChromeOptions()
|
38 |
chrome_options.add_argument("--disable-notifications")
|
@@ -45,84 +44,104 @@ def scrape_comments(url):
|
|
45 |
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
|
46 |
|
47 |
# HuggingFace Spaces için özel ayarlar
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
service = ChromeService(ChromeDriverManager().install())
|
53 |
-
|
54 |
-
driver = webdriver.Chrome(service=service, options=chrome_options)
|
55 |
-
driver.maximize_window()
|
56 |
-
|
57 |
-
driver.get(url)
|
58 |
|
59 |
try:
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
except:
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
)
|
72 |
-
total_comments = len(comment_elements)
|
73 |
|
74 |
-
|
75 |
-
for i in range(1, total_comments + 1):
|
76 |
-
kullanıcı_id = i
|
77 |
try:
|
78 |
-
|
79 |
-
|
|
|
80 |
except:
|
81 |
-
|
|
|
82 |
|
83 |
-
|
84 |
-
comment_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[2]/p"
|
85 |
-
comment = driver.find_element(By.XPATH, comment_xpath).text
|
86 |
-
except:
|
87 |
-
comment = "N/A"
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
-
|
|
|
|
|
|
|
96 |
try:
|
97 |
-
|
98 |
-
By.XPATH,
|
99 |
-
f"{star_xpath_base}/div[@class='star-w']/div[@class='full'][@style='width: 100%; max-width: 100%;']",
|
100 |
-
)
|
101 |
-
star_count = len(full_stars)
|
102 |
except:
|
103 |
-
|
104 |
-
|
105 |
-
data.append(
|
106 |
-
{
|
107 |
-
"Kullanıcı_id": kullanıcı_id,
|
108 |
-
"Kullanıcı Adı": username,
|
109 |
-
"Yorum": comment,
|
110 |
-
"Tarih": date,
|
111 |
-
"Yıldız Sayısı": star_count,
|
112 |
-
}
|
113 |
-
)
|
114 |
-
|
115 |
-
df = pd.DataFrame(data)
|
116 |
-
return df
|
117 |
|
118 |
except Exception as e:
|
119 |
print(f"Hata oluştu: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
120 |
return None
|
121 |
|
122 |
-
finally:
|
123 |
-
if driver:
|
124 |
-
driver.quit()
|
125 |
-
|
126 |
if __name__ == "__main__":
|
127 |
# Test URL
|
128 |
url = "https://www.trendyol.com/apple/macbook-air-m1-cip-8gb-256gb-ssd-macos-13-qhd-tasinabilir-bilgisayar-uzay-grisi-p-68042136/yorumlar"
|
|
|
32 |
|
33 |
last_height = new_height
|
34 |
|
|
|
35 |
try:
|
36 |
chrome_options = webdriver.ChromeOptions()
|
37 |
chrome_options.add_argument("--disable-notifications")
|
|
|
44 |
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
|
45 |
|
46 |
# HuggingFace Spaces için özel ayarlar
|
47 |
+
chrome_options.add_argument("--disable-setuid-sandbox")
|
48 |
+
chrome_options.add_argument("--remote-debugging-port=9222")
|
49 |
+
chrome_options.add_argument("--disable-extensions")
|
50 |
+
chrome_options.add_argument("--disable-software-rasterizer")
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
try:
|
53 |
+
# Önce ChromeDriverManager ile deneyin
|
54 |
+
service = ChromeService(ChromeDriverManager().install())
|
55 |
+
driver = webdriver.Chrome(service=service, options=chrome_options)
|
56 |
+
except Exception as e:
|
57 |
+
print(f"ChromeDriverManager failed: {str(e)}")
|
58 |
+
try:
|
59 |
+
# Eğer ChromeDriverManager başarısız olursa, doğrudan Chrome'u başlatın
|
60 |
+
driver = webdriver.Chrome(options=chrome_options)
|
61 |
+
except Exception as e:
|
62 |
+
print(f"Direct Chrome initialization failed: {str(e)}")
|
63 |
+
return None
|
64 |
|
65 |
+
try:
|
66 |
+
driver.maximize_window()
|
67 |
+
driver.get(url)
|
|
|
|
|
68 |
|
69 |
+
# Cookie popup'ı kapatmayı dene, başarısız olursa devam et
|
|
|
|
|
70 |
try:
|
71 |
+
WebDriverWait(driver, 10).until(
|
72 |
+
EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
|
73 |
+
).click()
|
74 |
except:
|
75 |
+
print("Cookie popup not found or couldn't be closed")
|
76 |
+
pass
|
77 |
|
78 |
+
comprehensive_scroll(driver)
|
|
|
|
|
|
|
|
|
79 |
|
80 |
+
comment_elements = driver.find_elements(
|
81 |
+
By.XPATH,
|
82 |
+
"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div",
|
83 |
+
)
|
84 |
+
total_comments = len(comment_elements)
|
85 |
+
|
86 |
+
data = []
|
87 |
+
for i in range(1, total_comments + 1):
|
88 |
+
kullanıcı_id = i
|
89 |
+
try:
|
90 |
+
username_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[1]"
|
91 |
+
username = driver.find_element(By.XPATH, username_xpath).text
|
92 |
+
except:
|
93 |
+
username = "N/A"
|
94 |
+
|
95 |
+
try:
|
96 |
+
comment_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[2]/p"
|
97 |
+
comment = driver.find_element(By.XPATH, comment_xpath).text
|
98 |
+
except:
|
99 |
+
comment = "N/A"
|
100 |
+
|
101 |
+
try:
|
102 |
+
date_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[2]"
|
103 |
+
date = driver.find_element(By.XPATH, date_xpath).text
|
104 |
+
except:
|
105 |
+
date = "N/A"
|
106 |
+
|
107 |
+
star_xpath_base = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[1]/div"
|
108 |
+
try:
|
109 |
+
full_stars = driver.find_elements(
|
110 |
+
By.XPATH,
|
111 |
+
f"{star_xpath_base}/div[@class='star-w']/div[@class='full'][@style='width: 100%; max-width: 100%;']",
|
112 |
+
)
|
113 |
+
star_count = len(full_stars)
|
114 |
+
except:
|
115 |
+
star_count = 0
|
116 |
+
|
117 |
+
data.append(
|
118 |
+
{
|
119 |
+
"Kullanıcı_id": kullanıcı_id,
|
120 |
+
"Kullanıcı Adı": username,
|
121 |
+
"Yorum": comment,
|
122 |
+
"Tarih": date,
|
123 |
+
"Yıldız Sayısı": star_count,
|
124 |
+
}
|
125 |
+
)
|
126 |
|
127 |
+
df = pd.DataFrame(data)
|
128 |
+
return df
|
129 |
+
|
130 |
+
finally:
|
131 |
try:
|
132 |
+
driver.quit()
|
|
|
|
|
|
|
|
|
133 |
except:
|
134 |
+
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
except Exception as e:
|
137 |
print(f"Hata oluştu: {str(e)}")
|
138 |
+
try:
|
139 |
+
if 'driver' in locals():
|
140 |
+
driver.quit()
|
141 |
+
except:
|
142 |
+
pass
|
143 |
return None
|
144 |
|
|
|
|
|
|
|
|
|
145 |
if __name__ == "__main__":
|
146 |
# Test URL
|
147 |
url = "https://www.trendyol.com/apple/macbook-air-m1-cip-8gb-256gb-ssd-macos-13-qhd-tasinabilir-bilgisayar-uzay-grisi-p-68042136/yorumlar"
|