File size: 6,717 Bytes
877e000
 
 
 
 
 
 
9860c76
 
877e000
 
9860c76
 
877e000
6e3dbdb
 
877e000
9860c76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
877e000
6e3dbdb
877e000
 
9860c76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
877e000
9860c76
 
 
877e000
9860c76
 
 
 
 
6e3dbdb
877e000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e3dbdb
 
 
 
 
 
 
 
 
9860c76
 
 
6e3dbdb
 
 
 
 
 
 
 
 
9860c76
877e000
9860c76
877e000
6e3dbdb
 
 
 
 
877e000
 
6e3dbdb
 
 
877e000
6e3dbdb
 
 
 
 
 
 
 
 
877e000
9860c76
877e000
6e3dbdb
9860c76
 
 
 
877e000
9860c76
 
 
877e000
9860c76
 
 
 
 
 
877e000
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# models/text_quality.py

from .model_loader import load_model
from .logging_config import logger

def assess_text_quality(text):
    try:
        # Handle very short or empty text with more reasonable scoring
        if not text or len(str(text).strip()) < 5:
            return {
                'assessment': 'insufficient',
                'score': 5,  # Give minimum score instead of 0
                'reasoning': 'Text too short or empty.',
                'is_ai_generated': False,
                'quality_metrics': {},
                'model_used': 'static_fallback'
            }
        
        # For very short text (5-20 characters), give basic score
        if len(str(text).strip()) < 20:
            return {
                'assessment': 'basic',
                'score': 15,  # Basic score for minimal text
                'reasoning': 'Very short text provided.',
                'is_ai_generated': False,
                'quality_metrics': {
                    'text_length': len(text),
                    'word_count': len(text.split()),
                    'sentence_count': text.count('.') + text.count('!') + text.count('?')
                },
                'model_used': 'static_fallback'
            }
        
        try:
            classifier = load_model("zero-shot-classification")  # Use standard model instead of typeform
        except Exception as e:
            logger.error(f"Error loading model in text quality: {str(e)}")
            # Fallback scoring for when model fails
            text_length = len(text)
            if text_length > 200:
                fallback_score = 60
                assessment = 'good'
            elif text_length > 100:
                fallback_score = 40
                assessment = 'adequate'
            elif text_length > 50:
                fallback_score = 25
                assessment = 'basic'
            else:
                fallback_score = 15
                assessment = 'basic'
            
            return {
                'assessment': assessment,
                'score': fallback_score,
                'reasoning': f'Model loading error, using fallback scoring based on text length ({text_length} chars).',
                'is_ai_generated': False,
                'quality_metrics': {
                    'text_length': text_length,
                    'word_count': len(text.split()),
                    'sentence_count': text.count('.') + text.count('!') + text.count('?')
                },
                'model_used': 'static_fallback'
            }

        # Enhanced quality categories with more specific indicators
        quality_categories = [
            "detailed and informative",
            "adequately detailed",
            "basic information",
            "vague description",
            "misleading content",
            "professional listing",
            "amateur listing",
            "spam-like content",
            "template-based content",
            "authentic description"
        ]

        # Analyze text with multiple aspects
        quality_result = classifier(text[:1000], quality_categories, multi_label=True)

        # Get top classifications with confidence scores
        top_classifications = []
        for label, score in zip(quality_result['labels'][:3], quality_result['scores'][:3]):
            if score > 0.3:  # Only include if confidence is above 30%
                top_classifications.append({
                    'classification': label,
                    'confidence': float(score)
                })

        # Calculate overall quality score
        positive_categories = ["detailed and informative", "adequately detailed", "professional listing", "authentic description"]
        negative_categories = ["vague description", "misleading content", "amateur listing", "spam-like content", "template-based content"]
        
        positive_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) 
                           if label in positive_categories)
        negative_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) 
                           if label in negative_categories)
        
        # Calculate final score (0-100) with better handling of edge cases
        base_score = (positive_score - negative_score + 1) * 50
        quality_score = max(10, min(100, int(base_score)))  # Ensure minimum 10% score
        
        # Determine assessment
        if quality_score >= 80:
            assessment = 'excellent'
        elif quality_score >= 60:
            assessment = 'good'
        elif quality_score >= 40:
            assessment = 'adequate'
        elif quality_score >= 20:
            assessment = 'basic'
        else:
            assessment = 'basic'  # Changed from 'very poor' to 'basic'

        # Simple AI detection (basic heuristic)
        is_ai_generated = len(text) > 500 and (
            'beautiful' in text.lower() and 'excellent' in text.lower() and 'prime' in text.lower() or
            text.count('.') > 10 and len(text.split()) > 100
        )

        return {
            'assessment': assessment,
            'score': quality_score,
            'reasoning': f'Quality score: {quality_score}/100 based on {len(top_classifications)} classifications.',
            'is_ai_generated': is_ai_generated,
            'quality_metrics': {
                'text_length': len(text),
                'word_count': len(text.split()),
                'sentence_count': text.count('.') + text.count('!') + text.count('?'),
                'positive_score': positive_score,
                'negative_score': negative_score
            },
            'top_classifications': top_classifications,
            'model_used': getattr(classifier, 'fallback_model', 'primary_model')
        }

    except Exception as e:
        logger.error(f"Error in text quality assessment: {str(e)}")
        # Return reasonable fallback instead of 0
        text_length = len(str(text)) if text else 0
        fallback_score = max(10, min(50, text_length // 2))  # Basic scoring based on length
        
        return {
            'assessment': 'basic',
            'score': fallback_score,
            'reasoning': f'Text quality assessment failed: {str(e)}. Using fallback scoring.',
            'is_ai_generated': False,
            'quality_metrics': {
                'text_length': text_length,
                'word_count': len(str(text).split()) if text else 0,
                'sentence_count': str(text).count('.') + str(text).count('!') + str(text).count('?') if text else 0
            },
            'model_used': 'error_fallback'
        }