File size: 6,717 Bytes
877e000 9860c76 877e000 9860c76 877e000 6e3dbdb 877e000 9860c76 877e000 6e3dbdb 877e000 9860c76 877e000 9860c76 877e000 9860c76 6e3dbdb 877e000 6e3dbdb 9860c76 6e3dbdb 9860c76 877e000 9860c76 877e000 6e3dbdb 877e000 6e3dbdb 877e000 6e3dbdb 877e000 9860c76 877e000 6e3dbdb 9860c76 877e000 9860c76 877e000 9860c76 877e000 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# models/text_quality.py
from .model_loader import load_model
from .logging_config import logger
def assess_text_quality(text):
try:
# Handle very short or empty text with more reasonable scoring
if not text or len(str(text).strip()) < 5:
return {
'assessment': 'insufficient',
'score': 5, # Give minimum score instead of 0
'reasoning': 'Text too short or empty.',
'is_ai_generated': False,
'quality_metrics': {},
'model_used': 'static_fallback'
}
# For very short text (5-20 characters), give basic score
if len(str(text).strip()) < 20:
return {
'assessment': 'basic',
'score': 15, # Basic score for minimal text
'reasoning': 'Very short text provided.',
'is_ai_generated': False,
'quality_metrics': {
'text_length': len(text),
'word_count': len(text.split()),
'sentence_count': text.count('.') + text.count('!') + text.count('?')
},
'model_used': 'static_fallback'
}
try:
classifier = load_model("zero-shot-classification") # Use standard model instead of typeform
except Exception as e:
logger.error(f"Error loading model in text quality: {str(e)}")
# Fallback scoring for when model fails
text_length = len(text)
if text_length > 200:
fallback_score = 60
assessment = 'good'
elif text_length > 100:
fallback_score = 40
assessment = 'adequate'
elif text_length > 50:
fallback_score = 25
assessment = 'basic'
else:
fallback_score = 15
assessment = 'basic'
return {
'assessment': assessment,
'score': fallback_score,
'reasoning': f'Model loading error, using fallback scoring based on text length ({text_length} chars).',
'is_ai_generated': False,
'quality_metrics': {
'text_length': text_length,
'word_count': len(text.split()),
'sentence_count': text.count('.') + text.count('!') + text.count('?')
},
'model_used': 'static_fallback'
}
# Enhanced quality categories with more specific indicators
quality_categories = [
"detailed and informative",
"adequately detailed",
"basic information",
"vague description",
"misleading content",
"professional listing",
"amateur listing",
"spam-like content",
"template-based content",
"authentic description"
]
# Analyze text with multiple aspects
quality_result = classifier(text[:1000], quality_categories, multi_label=True)
# Get top classifications with confidence scores
top_classifications = []
for label, score in zip(quality_result['labels'][:3], quality_result['scores'][:3]):
if score > 0.3: # Only include if confidence is above 30%
top_classifications.append({
'classification': label,
'confidence': float(score)
})
# Calculate overall quality score
positive_categories = ["detailed and informative", "adequately detailed", "professional listing", "authentic description"]
negative_categories = ["vague description", "misleading content", "amateur listing", "spam-like content", "template-based content"]
positive_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
if label in positive_categories)
negative_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
if label in negative_categories)
# Calculate final score (0-100) with better handling of edge cases
base_score = (positive_score - negative_score + 1) * 50
quality_score = max(10, min(100, int(base_score))) # Ensure minimum 10% score
# Determine assessment
if quality_score >= 80:
assessment = 'excellent'
elif quality_score >= 60:
assessment = 'good'
elif quality_score >= 40:
assessment = 'adequate'
elif quality_score >= 20:
assessment = 'basic'
else:
assessment = 'basic' # Changed from 'very poor' to 'basic'
# Simple AI detection (basic heuristic)
is_ai_generated = len(text) > 500 and (
'beautiful' in text.lower() and 'excellent' in text.lower() and 'prime' in text.lower() or
text.count('.') > 10 and len(text.split()) > 100
)
return {
'assessment': assessment,
'score': quality_score,
'reasoning': f'Quality score: {quality_score}/100 based on {len(top_classifications)} classifications.',
'is_ai_generated': is_ai_generated,
'quality_metrics': {
'text_length': len(text),
'word_count': len(text.split()),
'sentence_count': text.count('.') + text.count('!') + text.count('?'),
'positive_score': positive_score,
'negative_score': negative_score
},
'top_classifications': top_classifications,
'model_used': getattr(classifier, 'fallback_model', 'primary_model')
}
except Exception as e:
logger.error(f"Error in text quality assessment: {str(e)}")
# Return reasonable fallback instead of 0
text_length = len(str(text)) if text else 0
fallback_score = max(10, min(50, text_length // 2)) # Basic scoring based on length
return {
'assessment': 'basic',
'score': fallback_score,
'reasoning': f'Text quality assessment failed: {str(e)}. Using fallback scoring.',
'is_ai_generated': False,
'quality_metrics': {
'text_length': text_length,
'word_count': len(str(text).split()) if text else 0,
'sentence_count': str(text).count('.') + str(text).count('!') + str(text).count('?') if text else 0
},
'model_used': 'error_fallback'
}
|