Spaces:

enesmanan
/

trendyol-review-summarizer

Sleeping

File size: 11,710 Bytes

import gradio as gr
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os
import shutil
from scrape.trendyol_scraper import scrape_reviews
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import re
from tqdm import tqdm
import nltk
from nltk.corpus import stopwords
from dotenv import load_dotenv
import google.generativeai as genai
from pathlib import Path

class ReviewAnalysisApp:
    def __init__(self):
        self.setup_models()
        self.setup_stopwords()
        self.setup_gemini()
        
    def setup_stopwords(self):
        """Türkçe stopwords'leri yükle"""
        try:
            nltk.data.find('corpora/stopwords')
        except LookupError:
            nltk.download('stopwords')
        
        self.turkish_stopwords = set(stopwords.words('turkish'))
        # Ekstra stopwords ekle
        self.logistics_seller_words = {
            'kargo', 'kargocu', 'paket', 'gönderi', 'satıcı', 'mağaza', 
            'sipariş', 'teslimat', 'gönderim', 'kutu', 'paketleme'
        }
        self.turkish_stopwords.update(self.logistics_seller_words)
        
    def setup_models(self):
        """Modelleri yükle ve hazırla"""
        # Sadece sentiment model
        self.device = "cpu"
        print(f"Cihaz: {self.device}")
        
        model_name = "savasy/bert-base-turkish-sentiment-cased"
        self.sentiment_tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.sentiment_model = (
            AutoModelForSequenceClassification.from_pretrained(
                model_name,
                low_cpu_mem_usage=False
            )
            .to(self.device)
            .to(torch.float32)
        )
        
    def setup_gemini(self):
        """Gemini API'yi hazırla"""
        try:
            # Önce .env dosyasından API key'i al
            load_dotenv()
            api_key = os.getenv('GOOGLE_API_KEY')
            if not api_key:
                raise ValueError("API key bulunamadı!")
            
            # Gemini'yi yapılandır
            genai.configure(api_key=api_key)
            
            # Modeli ayarla
            self.gemini_model = genai.GenerativeModel('gemini-pro')
            
        except Exception as e:
            print(f"Gemini API yapılandırma hatası: {str(e)}")
            self.gemini_model = None
    
    def preprocess_text(self, text):
        """Metin ön işleme"""
        if isinstance(text, str):
            # Küçük harfe çevir
            text = text.lower()
            # Özel karakterleri temizle
            text = re.sub(r'[^\w\s]', '', text)
            # Sayıları temizle
            text = re.sub(r'\d+', '', text)
            # Fazla boşlukları temizle
            text = re.sub(r'\s+', ' ', text).strip()
            # Stop words'leri çıkar
            words = text.split()
            words = [word for word in words if word not in self.turkish_stopwords]
            return ' '.join(words)
        return ''
    
    def filter_product_reviews(self, df):
        """Ürün ile ilgili olmayan yorumları filtrele"""
        def is_product_review(text):
            if not isinstance(text, str):
                return False
            return not any(word in text.lower() for word in self.logistics_seller_words)

        filtered_df = df[df['Yorum'].apply(is_product_review)].copy()
        
        print(f"\nFiltreleme İstatistikleri:")
        print(f"Toplam yorum sayısı: {len(df)}")
        print(f"Ürün yorumu sayısı: {len(filtered_df)}")
        print(f"Filtrelenen yorum sayısı: {len(df) - len(filtered_df)}")
        print(f"Filtreleme oranı: {((len(df) - len(filtered_df)) / len(df) * 100):.2f}%")
        
        return filtered_df
    
    def predict_sentiment(self, text):
        """Tek bir yorum için sentiment analizi yap"""
        # Önce metni temizle
        text = self.preprocess_text(text)
        
        if not text:
            return {"label": "nötr", "score": 0.5}
            
        inputs = self.sentiment_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = self.sentiment_model(**inputs)
            scores = torch.nn.functional.softmax(outputs.logits, dim=1)
            
        positive_score = scores[0][1].item()
        label = "pozitif" if positive_score > 0.5 else "negatif"
        
        return {"label": label, "score": positive_score}
    
    def analyze_reviews(self, df):
        """Tüm yorumları analiz et"""
        print("\nSentiment analizi başlatılıyor...")
        
        # Önce ürün ile ilgili olmayan yorumları filtrele
        df = self.filter_product_reviews(df)
        
        # Sentiment analizi
        results = []
        for text in tqdm(df['Yorum'], desc="Yorumlar analiz ediliyor"):
            sentiment = self.predict_sentiment(text)
            results.append(sentiment)
        
        df['sentiment_score'] = [r['score'] for r in results]
        df['sentiment_label'] = [r['label'] for r in results]
        
        return df
    
    def generate_summary(self, df):
        """İstatistiksel özet ve Gemini ile detaylı analiz"""
        # Temel istatistikler
        avg_rating = df['Yıldız Sayısı'].mean()
        total_reviews = len(df)
        
        # Sentiment bazlı gruplandırma
        positive_comments = df[df['sentiment_label'] == 'pozitif']['Yorum'].tolist()
        negative_comments = df[df['sentiment_label'] == 'negatif']['Yorum'].tolist()
        positive_count = len(positive_comments)
        negative_count = len(negative_comments)
        
        # Yıldız dağılımı
        star_dist = df['Yıldız Sayısı'].value_counts().sort_index()
        star_dist_text = "\n".join([f"{star} yıldız: {count} yorum" for star, count in star_dist.items()])
        
        # En sık kelimeler
        all_words = []
        for text in df['Yorum']:
            cleaned_text = self.preprocess_text(text)
            if cleaned_text:
                all_words.extend(cleaned_text.split())
        
        from collections import Counter
        word_freq = Counter(all_words).most_common(10)
        frequent_words = ", ".join([f"{word} ({count} kez)" for word, count in word_freq])
        
        # İstatistiksel özet metni
        stats_summary = f"""📊 ÜRÜN ANALİZ RAPORU



⭐ Ortalama Puan: {avg_rating:.1f}/5

📝 Toplam Yorum: {total_reviews}

✅ Pozitif Yorum: {positive_count}

❌ Negatif Yorum: {negative_count}



📈 YILDIZ DAĞILIMI:

{star_dist_text}



🔍 EN SIK KULLANILAN KELİMELER:

{frequent_words}



💬 ÖRNEK YORUMLAR:

✅ Pozitif Yorumlar:

{' | '.join(positive_comments[:3])}



❌ Negatif Yorumlar:

{' | '.join(negative_comments[:3])}"""

        # Gemini ile detaylı analiz
        if self.gemini_model:
            try:
                prompt = f"""Aşağıdaki ürün yorumları verilerine dayanarak detaylı bir analiz yap:



1. İstatistikler:

- Toplam {total_reviews} yorum

- Ortalama puan: {avg_rating:.1f}/5

- {positive_count} pozitif, {negative_count} negatif yorum



2. Örnek Pozitif Yorumlar:

{' | '.join(positive_comments[:3])}



3. Örnek Negatif Yorumlar:

{' | '.join(negative_comments[:3])}



4. En Sık Kullanılan Kelimeler:

{frequent_words}



Lütfen şu başlıklar altında bir değerlendirme yap:

1. Ürünün güçlü yönleri

2. Ürünün zayıf yönleri

3. Genel kullanıcı memnuniyeti

4. Potansiyel alıcılar için öneriler



Yanıtını Türkçe olarak ver ve mümkün olduğunca özlü tut."""

                response = self.gemini_model.generate_content(prompt)
                ai_analysis = response.text

                # İstatistiksel özet ve AI analizini birleştir
                return f"{stats_summary}\n\n🤖 YAPAY ZEKA ANALİZİ:\n{ai_analysis}"
            
            except Exception as e:
                print(f"Gemini API hatası: {str(e)}")
                return stats_summary
        
        return stats_summary
        
    def analyze_url(self, url):
        try:
            # Temizlik
            if os.path.exists("data"):
                shutil.rmtree("data")
            
            # Yorumları çek
            df = scrape_reviews(url)
            
            if df.empty:
                return "Yorumlar çekilemedi. Lütfen URL'yi kontrol edin.", None, None, None
            
            # Sentiment analizi yap
            analyzed_df = self.analyze_reviews(df)
            
            # Özet oluştur
            summary = self.generate_summary(analyzed_df)
            
            # Grafikleri oluştur
            fig1 = self.create_sentiment_distribution(analyzed_df)
            fig2 = self.create_rating_distribution(analyzed_df)
            fig3 = self.create_sentiment_by_rating(analyzed_df)
            
            return summary, fig1, fig2, fig3
            
        except Exception as e:
            return f"Bir hata oluştu: {str(e)}", None, None, None
        
        finally:
            # Temizlik
            if os.path.exists("data"):
                shutil.rmtree("data")
    
    def create_sentiment_distribution(self, df):
        fig = px.pie(df, 
                    names='sentiment_label', 
                    title='Duygu Analizi Dağılımı')
        return fig
    
    def create_rating_distribution(self, df):
        fig = px.bar(df['Yıldız Sayısı'].value_counts().sort_index(), 
                    title='Yıldız Dağılımı')
        fig.update_layout(xaxis_title='Yıldız Sayısı', 
                         yaxis_title='Yorum Sayısı')
        return fig
    
    def create_sentiment_by_rating(self, df):
        avg_sentiment = df.groupby('Yıldız Sayısı')['sentiment_score'].mean()
        fig = px.line(avg_sentiment, 
                     title='Yıldız Sayısına Göre Ortalama Sentiment Skoru')
        fig.update_layout(xaxis_title='Yıldız Sayısı', 
                         yaxis_title='Ortalama Sentiment Skoru')
        return fig

def create_interface():
    app = ReviewAnalysisApp()
    
    with gr.Blocks(theme=gr.themes.Soft()) as interface:
        gr.Markdown("# Trendyol Yorum Analizi")
        
        with gr.Row():
            url_input = gr.Textbox(
                label="Trendyol Ürün Yorumları URL'si",
                placeholder="https://www.trendyol.com/..."
            )
        
        analyze_btn = gr.Button("Analiz Et")
        
        with gr.Row():
            with gr.Column(scale=1):
                summary_output = gr.Textbox(
                    label="Özet",
                    lines=10
                )
            
            with gr.Column(scale=2):
                with gr.Tab("Duygu Analizi"):
                    sentiment_dist = gr.Plot()
                with gr.Tab("Yıldız Dağılımı"):
                    rating_dist = gr.Plot()
                with gr.Tab("Sentiment-Yıldız İlişkisi"):
                    sentiment_rating = gr.Plot()
        
        analyze_btn.click(
            fn=app.analyze_url,
            inputs=[url_input],
            outputs=[summary_output, sentiment_dist, rating_dist, sentiment_rating]
        )
    
    return interface

if __name__ == "__main__":
    interface = create_interface()
    interface.launch()