File size: 6,866 Bytes
b833f77 99d7b92 b833f77 4b29a3a b833f77 22d9d31 b833f77 4b29a3a b833f77 4b29a3a b833f77 99d7b92 b833f77 99d7b92 422c973 b833f77 99d7b92 b833f77 99d7b92 b833f77 422c973 b833f77 99d7b92 b833f77 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
import os
import time
import requests
import re
import pandas as pd
import plotly.express as px
import gradio as gr
from dotenv import load_dotenv
from scripts.review_summarizer import analyze_reviews
load_dotenv()
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
if not os.path.exists("data"):
os.makedirs("data")
def create_sentiment_plot(df):
"""Creates a pie chart visualization for sentiment distribution"""
sentiment_counts = df["sentiment_label"].value_counts()
fig = px.pie(
values=sentiment_counts.values,
names=sentiment_counts.index,
title="Duygu Analizi Dağılımı",
color_discrete_map={
"Pozitif": "#2ecc71",
"Nötr": "#95a5a6",
"Negatif": "#e74c3c",
},
)
return fig
def create_star_plot(df):
"""Creates a bar chart visualization for star rating distribution"""
star_counts = df["Yıldız Sayısı"].value_counts().sort_index()
fig = px.bar(
x=star_counts.index,
y=star_counts.values,
title="Yıldız Dağılımı",
labels={"x": "Yıldız Sayısı", "y": "Yorum Sayısı"},
color_discrete_sequence=["#f39c12"],
)
fig.update_layout(
xaxis=dict(
tickmode="array",
ticktext=["⭐", "⭐⭐", "⭐⭐⭐", "⭐⭐⭐⭐", "⭐⭐⭐⭐⭐"],
)
)
return fig
def scrape_product_comments_v2(url):
headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "en-US,en;q=0.9",
"cache-control": "max-age=0",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (iPad; CPU OS 14_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) FxiOS/129.0 Mobile/15E148 Safari/605.1.15"
}
# Extract product_id using regex
match = re.search(r"-p-(\d+)", url)
if not match:
raise ValueError("Product ID not found in URL")
product_id = match.group(1)
api_url = f"https://apigw.trendyol.com/discovery-web-websfxsocialreviewrating-santral/product-reviews-detailed?contentId={product_id}&page=1&order=DESC&orderBy=Score&channelId=1"
def fetch_reviews(api_url, headers):
all_reviews = []
response = requests.get(api_url, headers=headers)
if response.status_code != 200:
raise ConnectionError(f"Initial request failed: {response.status_code}")
data = response.json()
total_pages = data["result"]["productReviews"]["totalPages"]
all_reviews.extend(data["result"]["productReviews"]["content"])
for page in range(2, total_pages + 1):
paginated_url = api_url.replace("page=1", f"page={page}")
response = requests.get(paginated_url, headers=headers)
if response.status_code == 200:
page_data = response.json()
all_reviews.extend(page_data["result"]["productReviews"]["content"])
else:
print(f"Failed to fetch page {page}: {response.status_code}")
return all_reviews
reviews = fetch_reviews(api_url, headers)
reviews_df = pd.DataFrame(reviews)
reviews_df = reviews_df.rename(columns={
"id": "Kullanıcı_id",
"userFullName": "Kullanıcı Adı",
"comment": "Yorum",
"lastModifiedDate": "Tarih",
"rate": "Yıldız Sayısı"
})
reviews_df = reviews_df[["Kullanıcı_id", "Kullanıcı Adı", "Yorum", "Tarih", "Yıldız Sayısı"]]
return reviews_df
def analyze_product(url, progress=gr.Progress()):
try:
# Fetch reviews
progress(0.1, desc="Yorumlar çekiliyor...")
df = scrape_product_comments_v2(url)
if df is None or len(df) == 0:
return None, None, None, None, None, None, None, "Yorumlar çekilemedi. URL'yi kontrol edin."
# Save to CSV
data_path = os.path.join("data", "product_comments.csv")
df.to_csv(data_path, index=False, encoding="utf-8-sig")
# Analyze reviews
progress(0.4, desc="Yorumlar analiz ediliyor...")
summary, analyzed_df = analyze_reviews(data_path, GEMINI_API_KEY)
progress(0.7, desc="Sonuçlar hazırlanıyor...")
# Calculate metrics
total_reviews = len(df)
total_analyzed = len(analyzed_df)
avg_rating = f"{analyzed_df['Yıldız Sayısı'].mean():.1f}⭐"
positive_ratio = len(analyzed_df[analyzed_df["sentiment_label"] == "Pozitif"]) / len(analyzed_df) * 100
positive_ratio_str = f"%{positive_ratio:.1f}"
# Create plots
sentiment_plot = create_sentiment_plot(analyzed_df)
star_plot = create_star_plot(analyzed_df)
# Create info message for removed reviews
removed_reviews = total_reviews - total_analyzed
info_message = ""
if removed_reviews > 0:
info_message = f"Not: Toplam {removed_reviews} adet kargo, teslimat ve satıcı ile ilgili yorum analiz dışı bırakılmıştır."
progress(1.0, desc="Analiz tamamlandı!")
return (
str(total_reviews),
str(total_analyzed),
avg_rating,
positive_ratio_str,
sentiment_plot,
star_plot,
summary,
info_message
)
except Exception as e:
return None, None, None, None, None, None, None, f"Bir hata oluştu: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="Trendyol Yorum Analizi") as demo:
gr.Markdown("""
# Trendyol Yorum Analizi
Bu uygulama, Trendyol ürün sayfasındaki yorumları çeker, analiz eder ve özetler.
""")
with gr.Row():
url_input = gr.Textbox(
label="Trendyol Ürün Yorumları URL",
placeholder="ürünün linki"
)
analyze_btn = gr.Button("Analiz Et")
with gr.Row():
total_reviews = gr.Textbox(label="Toplam Yorum")
total_analyzed = gr.Textbox(label="Ürün Değerlendirme Sayısı")
avg_rating = gr.Textbox(label="Ortalama Puan")
positive_ratio = gr.Textbox(label="Olumlu Yorum Oranı")
summary = gr.Markdown(label="📝 Genel Değerlendirme")
info_message = gr.Markdown()
with gr.Row():
sentiment_plot = gr.Plot()
star_plot = gr.Plot()
error_message = gr.Markdown()
analyze_btn.click(
analyze_product,
inputs=[url_input],
outputs=[
total_reviews,
total_analyzed,
avg_rating,
positive_ratio,
sentiment_plot,
star_plot,
summary,
error_message
]
)
if __name__ == "__main__":
demo.launch() |