Spaces:

enesmanan
/

trendyol-review-summarizer

Running

App Files Files Community

trendyol-review-summarizer / app.py

enesmanan

change genai model

b42b622 verified 9 months ago

raw

history blame

11.7 kB

	import gradio as gr
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	import os
	import shutil
	from scrape.trendyol_scraper import scrape_reviews
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
	import re
	from tqdm import tqdm
	import nltk
	from nltk.corpus import stopwords
	from dotenv import load_dotenv
	import google.generativeai as genai
	from pathlib import Path

	class ReviewAnalysisApp:
	def __init__(self):
	self.setup_models()
	self.setup_stopwords()
	self.setup_gemini()

	def setup_stopwords(self):
	"""Türkçe stopwords'leri yükle"""
	try:
	nltk.data.find('corpora/stopwords')
	except LookupError:
	nltk.download('stopwords')

	self.turkish_stopwords = set(stopwords.words('turkish'))
	# Ekstra stopwords ekle
	self.logistics_seller_words = {
	'kargo', 'kargocu', 'paket', 'gönderi', 'satıcı', 'mağaza',
	'sipariş', 'teslimat', 'gönderim', 'kutu', 'paketleme'
	}
	self.turkish_stopwords.update(self.logistics_seller_words)

	def setup_models(self):
	"""Modelleri yükle ve hazırla"""
	# Sadece sentiment model
	self.device = "cpu"
	print(f"Cihaz: {self.device}")

	model_name = "savasy/bert-base-turkish-sentiment-cased"
	self.sentiment_tokenizer = AutoTokenizer.from_pretrained(model_name)
	self.sentiment_model = (
	AutoModelForSequenceClassification.from_pretrained(
	model_name,
	low_cpu_mem_usage=False
	)
	.to(self.device)
	.to(torch.float32)
	)

	def setup_gemini(self):
	"""Gemini API'yi hazırla"""
	try:
	# Önce .env dosyasından API key'i al
	load_dotenv()
	api_key = os.getenv('GOOGLE_API_KEY')
	if not api_key:
	raise ValueError("API key bulunamadı!")

	# Gemini'yi yapılandır
	genai.configure(api_key=api_key)

	# Modeli ayarla
	self.gemini_model = genai.GenerativeModel('gemini-pro')

	except Exception as e:
	print(f"Gemini API yapılandırma hatası: {str(e)}")
	self.gemini_model = None

	def preprocess_text(self, text):
	"""Metin ön işleme"""
	if isinstance(text, str):
	# Küçük harfe çevir
	text = text.lower()
	# Özel karakterleri temizle
	text = re.sub(r'[^\w\s]', '', text)
	# Sayıları temizle
	text = re.sub(r'\d+', '', text)
	# Fazla boşlukları temizle
	text = re.sub(r'\s+', ' ', text).strip()
	# Stop words'leri çıkar
	words = text.split()
	words = [word for word in words if word not in self.turkish_stopwords]
	return ' '.join(words)
	return ''

	def filter_product_reviews(self, df):
	"""Ürün ile ilgili olmayan yorumları filtrele"""
	def is_product_review(text):
	if not isinstance(text, str):
	return False
	return not any(word in text.lower() for word in self.logistics_seller_words)

	filtered_df = df[df['Yorum'].apply(is_product_review)].copy()

	print(f"\nFiltreleme İstatistikleri:")
	print(f"Toplam yorum sayısı: {len(df)}")
	print(f"Ürün yorumu sayısı: {len(filtered_df)}")
	print(f"Filtrelenen yorum sayısı: {len(df) - len(filtered_df)}")
	print(f"Filtreleme oranı: {((len(df) - len(filtered_df)) / len(df) * 100):.2f}%")

	return filtered_df

	def predict_sentiment(self, text):
	"""Tek bir yorum için sentiment analizi yap"""
	# Önce metni temizle
	text = self.preprocess_text(text)

	if not text:
	return {"label": "nötr", "score": 0.5}

	inputs = self.sentiment_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
	inputs = {k: v.to(self.device) for k, v in inputs.items()}

	with torch.no_grad():
	outputs = self.sentiment_model(**inputs)
	scores = torch.nn.functional.softmax(outputs.logits, dim=1)

	positive_score = scores[0][1].item()
	label = "pozitif" if positive_score > 0.5 else "negatif"

	return {"label": label, "score": positive_score}

	def analyze_reviews(self, df):
	"""Tüm yorumları analiz et"""
	print("\nSentiment analizi başlatılıyor...")

	# Önce ürün ile ilgili olmayan yorumları filtrele
	df = self.filter_product_reviews(df)

	# Sentiment analizi
	results = []
	for text in tqdm(df['Yorum'], desc="Yorumlar analiz ediliyor"):
	sentiment = self.predict_sentiment(text)
	results.append(sentiment)

	df['sentiment_score'] = [r['score'] for r in results]
	df['sentiment_label'] = [r['label'] for r in results]

	return df

	def generate_summary(self, df):
	"""İstatistiksel özet ve Gemini ile detaylı analiz"""
	# Temel istatistikler
	avg_rating = df['Yıldız Sayısı'].mean()
	total_reviews = len(df)

	# Sentiment bazlı gruplandırma
	positive_comments = df[df['sentiment_label'] == 'pozitif']['Yorum'].tolist()
	negative_comments = df[df['sentiment_label'] == 'negatif']['Yorum'].tolist()
	positive_count = len(positive_comments)
	negative_count = len(negative_comments)

	# Yıldız dağılımı
	star_dist = df['Yıldız Sayısı'].value_counts().sort_index()
	star_dist_text = "\n".join([f"{star} yıldız: {count} yorum" for star, count in star_dist.items()])

	# En sık kelimeler
	all_words = []
	for text in df['Yorum']:
	cleaned_text = self.preprocess_text(text)
	if cleaned_text:
	all_words.extend(cleaned_text.split())

	from collections import Counter
	word_freq = Counter(all_words).most_common(10)
	frequent_words = ", ".join([f"{word} ({count} kez)" for word, count in word_freq])

	# İstatistiksel özet metni
	stats_summary = f"""📊 ÜRÜN ANALİZ RAPORU

	⭐ Ortalama Puan: {avg_rating:.1f}/5
	📝 Toplam Yorum: {total_reviews}
	✅ Pozitif Yorum: {positive_count}
	❌ Negatif Yorum: {negative_count}

	📈 YILDIZ DAĞILIMI:
	{star_dist_text}

	🔍 EN SIK KULLANILAN KELİMELER:
	{frequent_words}

	💬 ÖRNEK YORUMLAR:
	✅ Pozitif Yorumlar:
	{' \| '.join(positive_comments[:3])}

	❌ Negatif Yorumlar:
	{' \| '.join(negative_comments[:3])}"""

	# Gemini ile detaylı analiz
	if self.gemini_model:
	try:
	prompt = f"""Aşağıdaki ürün yorumları verilerine dayanarak detaylı bir analiz yap:

	1. İstatistikler:
	- Toplam {total_reviews} yorum
	- Ortalama puan: {avg_rating:.1f}/5
	- {positive_count} pozitif, {negative_count} negatif yorum

	2. Örnek Pozitif Yorumlar:
	{' \| '.join(positive_comments[:3])}

	3. Örnek Negatif Yorumlar:
	{' \| '.join(negative_comments[:3])}

	4. En Sık Kullanılan Kelimeler:
	{frequent_words}

	Lütfen şu başlıklar altında bir değerlendirme yap:
	1. Ürünün güçlü yönleri
	2. Ürünün zayıf yönleri
	3. Genel kullanıcı memnuniyeti
	4. Potansiyel alıcılar için öneriler

	Yanıtını Türkçe olarak ver ve mümkün olduğunca özlü tut."""

	response = self.gemini_model.generate_content(prompt)
	ai_analysis = response.text

	# İstatistiksel özet ve AI analizini birleştir
	return f"{stats_summary}\n\n🤖 YAPAY ZEKA ANALİZİ:\n{ai_analysis}"

	except Exception as e:
	print(f"Gemini API hatası: {str(e)}")
	return stats_summary

	return stats_summary

	def analyze_url(self, url):
	try:
	# Temizlik
	if os.path.exists("data"):
	shutil.rmtree("data")

	# Yorumları çek
	df = scrape_reviews(url)

	if df.empty:
	return "Yorumlar çekilemedi. Lütfen URL'yi kontrol edin.", None, None, None

	# Sentiment analizi yap
	analyzed_df = self.analyze_reviews(df)

	# Özet oluştur
	summary = self.generate_summary(analyzed_df)

	# Grafikleri oluştur
	fig1 = self.create_sentiment_distribution(analyzed_df)
	fig2 = self.create_rating_distribution(analyzed_df)
	fig3 = self.create_sentiment_by_rating(analyzed_df)

	return summary, fig1, fig2, fig3

	except Exception as e:
	return f"Bir hata oluştu: {str(e)}", None, None, None

	finally:
	# Temizlik
	if os.path.exists("data"):
	shutil.rmtree("data")

	def create_sentiment_distribution(self, df):
	fig = px.pie(df,
	names='sentiment_label',
	title='Duygu Analizi Dağılımı')
	return fig

	def create_rating_distribution(self, df):
	fig = px.bar(df['Yıldız Sayısı'].value_counts().sort_index(),
	title='Yıldız Dağılımı')
	fig.update_layout(xaxis_title='Yıldız Sayısı',
	yaxis_title='Yorum Sayısı')
	return fig

	def create_sentiment_by_rating(self, df):
	avg_sentiment = df.groupby('Yıldız Sayısı')['sentiment_score'].mean()
	fig = px.line(avg_sentiment,
	title='Yıldız Sayısına Göre Ortalama Sentiment Skoru')
	fig.update_layout(xaxis_title='Yıldız Sayısı',
	yaxis_title='Ortalama Sentiment Skoru')
	return fig

	def create_interface():
	app = ReviewAnalysisApp()

	with gr.Blocks(theme=gr.themes.Soft()) as interface:
	gr.Markdown("# Trendyol Yorum Analizi")

	with gr.Row():
	url_input = gr.Textbox(
	label="Trendyol Ürün Yorumları URL'si",
	placeholder="https://www.trendyol.com/..."
	)

	analyze_btn = gr.Button("Analiz Et")

	with gr.Row():
	with gr.Column(scale=1):
	summary_output = gr.Textbox(
	label="Özet",
	lines=10
	)

	with gr.Column(scale=2):
	with gr.Tab("Duygu Analizi"):
	sentiment_dist = gr.Plot()
	with gr.Tab("Yıldız Dağılımı"):
	rating_dist = gr.Plot()
	with gr.Tab("Sentiment-Yıldız İlişkisi"):
	sentiment_rating = gr.Plot()

	analyze_btn.click(
	fn=app.analyze_url,
	inputs=[url_input],
	outputs=[summary_output, sentiment_dist, rating_dist, sentiment_rating]
	)

	return interface

	if __name__ == "__main__":
	interface = create_interface()
	interface.launch()