Upload 4 files
Browse files- requirements.txt +15 -13
- scrape/__pycache__/trendyol_scraper_origin.cpython-310.pyc +0 -0
- scrape/trendyol_scraper_origin.py +7 -13
- setup.sh +14 -0
requirements.txt
CHANGED
@@ -1,14 +1,16 @@
|
|
1 |
-
pandas
|
2 |
-
numpy
|
3 |
-
torch
|
4 |
-
transformers
|
5 |
-
nltk
|
6 |
-
requests
|
7 |
-
google-generativeai
|
8 |
-
selenium
|
|
|
|
|
|
|
|
|
|
|
9 |
webdriver-manager
|
10 |
-
|
11 |
-
|
12 |
-
python-dotenv==1.0.1
|
13 |
-
tqdm==4.67.1
|
14 |
-
regex
|
|
|
1 |
+
pandas
|
2 |
+
numpy
|
3 |
+
torch
|
4 |
+
transformers
|
5 |
+
nltk
|
6 |
+
requests
|
7 |
+
google-generativeai
|
8 |
+
selenium
|
9 |
+
streamlit
|
10 |
+
plotly
|
11 |
+
python-dotenv
|
12 |
+
tqdm
|
13 |
+
regex
|
14 |
webdriver-manager
|
15 |
+
gradio
|
16 |
+
undetected-chromedriver
|
|
|
|
|
|
scrape/__pycache__/trendyol_scraper_origin.cpython-310.pyc
ADDED
Binary file (3.91 kB). View file
|
|
scrape/trendyol_scraper_origin.py
CHANGED
@@ -40,26 +40,20 @@ def scrape_comments(url):
|
|
40 |
chrome_options.add_argument("--no-sandbox")
|
41 |
chrome_options.add_argument("--disable-dev-shm-usage")
|
42 |
chrome_options.add_argument("--window-size=1920,1080")
|
43 |
-
chrome_options.add_argument("--start-maximized")
|
44 |
-
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
|
45 |
-
|
46 |
-
# HuggingFace Spaces için özel ayarlar
|
47 |
chrome_options.add_argument("--disable-setuid-sandbox")
|
48 |
chrome_options.add_argument("--remote-debugging-port=9222")
|
49 |
-
chrome_options.
|
50 |
-
chrome_options.add_argument("--disable-software-rasterizer")
|
51 |
|
52 |
try:
|
53 |
-
|
54 |
-
service = ChromeService(ChromeDriverManager().install())
|
55 |
-
driver = webdriver.Chrome(service=service, options=chrome_options)
|
56 |
except Exception as e:
|
57 |
-
print(f"
|
58 |
try:
|
59 |
-
#
|
60 |
-
|
|
|
61 |
except Exception as e:
|
62 |
-
print(f"
|
63 |
return None
|
64 |
|
65 |
try:
|
|
|
40 |
chrome_options.add_argument("--no-sandbox")
|
41 |
chrome_options.add_argument("--disable-dev-shm-usage")
|
42 |
chrome_options.add_argument("--window-size=1920,1080")
|
|
|
|
|
|
|
|
|
43 |
chrome_options.add_argument("--disable-setuid-sandbox")
|
44 |
chrome_options.add_argument("--remote-debugging-port=9222")
|
45 |
+
chrome_options.binary_location = "/usr/bin/google-chrome" # Chrome'un yolu
|
|
|
46 |
|
47 |
try:
|
48 |
+
driver = webdriver.Chrome(options=chrome_options)
|
|
|
|
|
49 |
except Exception as e:
|
50 |
+
print(f"Chrome initialization failed: {str(e)}")
|
51 |
try:
|
52 |
+
# Alternatif olarak undetected-chromedriver'ı deneyin
|
53 |
+
import undetected_chromedriver as uc
|
54 |
+
driver = uc.Chrome(options=chrome_options)
|
55 |
except Exception as e:
|
56 |
+
print(f"Undetected chromedriver failed: {str(e)}")
|
57 |
return None
|
58 |
|
59 |
try:
|
setup.sh
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Chrome'u yükle
|
4 |
+
wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
|
5 |
+
echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list
|
6 |
+
apt-get update
|
7 |
+
apt-get install -y google-chrome-stable
|
8 |
+
|
9 |
+
# ChromeDriver'ı yükle
|
10 |
+
CHROME_VERSION=$(google-chrome --version | cut -d " " -f3 | cut -d "." -f1)
|
11 |
+
wget -N "https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/$CHROME_VERSION.0.6045.105/linux64/chromedriver-linux64.zip"
|
12 |
+
unzip chromedriver-linux64.zip
|
13 |
+
mv chromedriver-linux64/chromedriver /usr/local/bin/
|
14 |
+
chmod +x /usr/local/bin/chromedriver
|