enesmanan commited on
Commit
05129a0
·
verified ·
1 Parent(s): 9ff6909

Upload trendyol_scraper_origin.py

Browse files
Files changed (1) hide show
  1. scrape/trendyol_scraper_origin.py +82 -63
scrape/trendyol_scraper_origin.py CHANGED
@@ -32,7 +32,6 @@ def scrape_comments(url):
32
 
33
  last_height = new_height
34
 
35
- driver = None
36
  try:
37
  chrome_options = webdriver.ChromeOptions()
38
  chrome_options.add_argument("--disable-notifications")
@@ -45,84 +44,104 @@ def scrape_comments(url):
45
  chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
46
 
47
  # HuggingFace Spaces için özel ayarlar
48
- if os.getenv('SPACE_ID'):
49
- chrome_options.binary_location = "/usr/bin/google-chrome"
50
- service = ChromeService("/usr/local/bin/chromedriver")
51
- else:
52
- service = ChromeService(ChromeDriverManager().install())
53
-
54
- driver = webdriver.Chrome(service=service, options=chrome_options)
55
- driver.maximize_window()
56
-
57
- driver.get(url)
58
 
59
  try:
60
- WebDriverWait(driver, 10).until(
61
- EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
62
- ).click()
63
- except:
64
- pass # Bazen cookie popup görünmeyebilir
65
-
66
- comprehensive_scroll(driver)
 
 
 
 
67
 
68
- comment_elements = driver.find_elements(
69
- By.XPATH,
70
- "/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div",
71
- )
72
- total_comments = len(comment_elements)
73
 
74
- data = []
75
- for i in range(1, total_comments + 1):
76
- kullanıcı_id = i
77
  try:
78
- username_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[1]"
79
- username = driver.find_element(By.XPATH, username_xpath).text
 
80
  except:
81
- username = "N/A"
 
82
 
83
- try:
84
- comment_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[2]/p"
85
- comment = driver.find_element(By.XPATH, comment_xpath).text
86
- except:
87
- comment = "N/A"
88
 
89
- try:
90
- date_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[2]"
91
- date = driver.find_element(By.XPATH, date_xpath).text
92
- except:
93
- date = "N/A"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
- star_xpath_base = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[1]/div"
 
 
 
96
  try:
97
- full_stars = driver.find_elements(
98
- By.XPATH,
99
- f"{star_xpath_base}/div[@class='star-w']/div[@class='full'][@style='width: 100%; max-width: 100%;']",
100
- )
101
- star_count = len(full_stars)
102
  except:
103
- star_count = 0
104
-
105
- data.append(
106
- {
107
- "Kullanıcı_id": kullanıcı_id,
108
- "Kullanıcı Adı": username,
109
- "Yorum": comment,
110
- "Tarih": date,
111
- "Yıldız Sayısı": star_count,
112
- }
113
- )
114
-
115
- df = pd.DataFrame(data)
116
- return df
117
 
118
  except Exception as e:
119
  print(f"Hata oluştu: {str(e)}")
 
 
 
 
 
120
  return None
121
 
122
- finally:
123
- if driver:
124
- driver.quit()
125
-
126
  if __name__ == "__main__":
127
  # Test URL
128
  url = "https://www.trendyol.com/apple/macbook-air-m1-cip-8gb-256gb-ssd-macos-13-qhd-tasinabilir-bilgisayar-uzay-grisi-p-68042136/yorumlar"
 
32
 
33
  last_height = new_height
34
 
 
35
  try:
36
  chrome_options = webdriver.ChromeOptions()
37
  chrome_options.add_argument("--disable-notifications")
 
44
  chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
45
 
46
  # HuggingFace Spaces için özel ayarlar
47
+ chrome_options.add_argument("--disable-setuid-sandbox")
48
+ chrome_options.add_argument("--remote-debugging-port=9222")
49
+ chrome_options.add_argument("--disable-extensions")
50
+ chrome_options.add_argument("--disable-software-rasterizer")
 
 
 
 
 
 
51
 
52
  try:
53
+ # Önce ChromeDriverManager ile deneyin
54
+ service = ChromeService(ChromeDriverManager().install())
55
+ driver = webdriver.Chrome(service=service, options=chrome_options)
56
+ except Exception as e:
57
+ print(f"ChromeDriverManager failed: {str(e)}")
58
+ try:
59
+ # Eğer ChromeDriverManager başarısız olursa, doğrudan Chrome'u başlatın
60
+ driver = webdriver.Chrome(options=chrome_options)
61
+ except Exception as e:
62
+ print(f"Direct Chrome initialization failed: {str(e)}")
63
+ return None
64
 
65
+ try:
66
+ driver.maximize_window()
67
+ driver.get(url)
 
 
68
 
69
+ # Cookie popup'ı kapatmayı dene, başarısız olursa devam et
 
 
70
  try:
71
+ WebDriverWait(driver, 10).until(
72
+ EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
73
+ ).click()
74
  except:
75
+ print("Cookie popup not found or couldn't be closed")
76
+ pass
77
 
78
+ comprehensive_scroll(driver)
 
 
 
 
79
 
80
+ comment_elements = driver.find_elements(
81
+ By.XPATH,
82
+ "/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div",
83
+ )
84
+ total_comments = len(comment_elements)
85
+
86
+ data = []
87
+ for i in range(1, total_comments + 1):
88
+ kullanıcı_id = i
89
+ try:
90
+ username_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[1]"
91
+ username = driver.find_element(By.XPATH, username_xpath).text
92
+ except:
93
+ username = "N/A"
94
+
95
+ try:
96
+ comment_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[2]/p"
97
+ comment = driver.find_element(By.XPATH, comment_xpath).text
98
+ except:
99
+ comment = "N/A"
100
+
101
+ try:
102
+ date_xpath = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[2]/div[2]"
103
+ date = driver.find_element(By.XPATH, date_xpath).text
104
+ except:
105
+ date = "N/A"
106
+
107
+ star_xpath_base = f"/html/body/div[1]/div[4]/div/div/div/div/div[3]/div/div/div[3]/div[2]/div[{i}]/div[1]/div[1]/div"
108
+ try:
109
+ full_stars = driver.find_elements(
110
+ By.XPATH,
111
+ f"{star_xpath_base}/div[@class='star-w']/div[@class='full'][@style='width: 100%; max-width: 100%;']",
112
+ )
113
+ star_count = len(full_stars)
114
+ except:
115
+ star_count = 0
116
+
117
+ data.append(
118
+ {
119
+ "Kullanıcı_id": kullanıcı_id,
120
+ "Kullanıcı Adı": username,
121
+ "Yorum": comment,
122
+ "Tarih": date,
123
+ "Yıldız Sayısı": star_count,
124
+ }
125
+ )
126
 
127
+ df = pd.DataFrame(data)
128
+ return df
129
+
130
+ finally:
131
  try:
132
+ driver.quit()
 
 
 
 
133
  except:
134
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  except Exception as e:
137
  print(f"Hata oluştu: {str(e)}")
138
+ try:
139
+ if 'driver' in locals():
140
+ driver.quit()
141
+ except:
142
+ pass
143
  return None
144
 
 
 
 
 
145
  if __name__ == "__main__":
146
  # Test URL
147
  url = "https://www.trendyol.com/apple/macbook-air-m1-cip-8gb-256gb-ssd-macos-13-qhd-tasinabilir-bilgisayar-uzay-grisi-p-68042136/yorumlar"