propertyverification / models /text_quality.py
sksameermujahid's picture
Upload 21 files
7ac74a0 verified
# models/text_quality.py
from .model_loader import load_model
from .logging_config import logger
def assess_text_quality(text):
try:
# Handle very short or empty text with more reasonable scoring
if not text or len(str(text).strip()) < 5:
return {
'assessment': 'insufficient',
'score': 5, # Give minimum score instead of 0
'reasoning': 'Text too short or empty.',
'is_ai_generated': False,
'quality_metrics': {},
'model_used': 'static_fallback'
}
# For very short text (5-20 characters), give basic score
if len(str(text).strip()) < 20:
return {
'assessment': 'basic',
'score': 15, # Basic score for minimal text
'reasoning': 'Very short text provided.',
'is_ai_generated': False,
'quality_metrics': {
'text_length': len(text),
'word_count': len(text.split()),
'sentence_count': text.count('.') + text.count('!') + text.count('?')
},
'model_used': 'static_fallback'
}
try:
classifier = load_model("zero-shot-classification") # Use standard model instead of typeform
except Exception as e:
logger.error(f"Error loading model in text quality: {str(e)}")
# Much more lenient fallback scoring for when model fails
text_length = len(text)
if text_length > 200:
fallback_score = 70 # Increased from 60
assessment = 'good'
elif text_length > 100:
fallback_score = 50 # Increased from 40
assessment = 'adequate'
elif text_length > 50:
fallback_score = 35 # Increased from 25
assessment = 'basic'
else:
fallback_score = 25 # Increased from 15
assessment = 'basic'
return {
'assessment': assessment,
'score': fallback_score,
'reasoning': f'Model loading error, using fallback scoring based on text length ({text_length} chars).',
'is_ai_generated': False,
'quality_metrics': {
'text_length': text_length,
'word_count': len(text.split()),
'sentence_count': text.count('.') + text.count('!') + text.count('?')
},
'model_used': 'static_fallback'
}
# Enhanced quality categories with more specific indicators
quality_categories = [
"detailed and informative",
"adequately detailed",
"basic information",
"vague description",
"misleading content",
"professional listing",
"amateur listing",
"spam-like content",
"template-based content",
"authentic description"
]
# Analyze text with multiple aspects
quality_result = classifier(text[:1000], quality_categories, multi_label=True)
# Get top classifications with confidence scores
top_classifications = []
for label, score in zip(quality_result['labels'][:3], quality_result['scores'][:3]):
if score > 0.3: # Only include if confidence is above 30%
top_classifications.append({
'classification': label,
'confidence': float(score)
})
# Calculate overall quality score
positive_categories = ["detailed and informative", "adequately detailed", "professional listing", "authentic description"]
negative_categories = ["vague description", "misleading content", "amateur listing", "spam-like content", "template-based content"]
positive_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
if label in positive_categories)
negative_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
if label in negative_categories)
# Calculate final score (0-100) with better handling of edge cases
base_score = (positive_score - negative_score + 1) * 50
quality_score = max(20, min(100, int(base_score))) # Increased minimum from 10% to 20%
# Much more lenient assessment thresholds
if quality_score >= 70: # Reduced from 80
assessment = 'excellent'
elif quality_score >= 50: # Reduced from 60
assessment = 'good'
elif quality_score >= 30: # Reduced from 40
assessment = 'adequate'
elif quality_score >= 20: # Reduced from 20
assessment = 'basic'
else:
assessment = 'basic' # Changed from 'very poor' to 'basic'
# Simple AI detection (basic heuristic)
is_ai_generated = len(text) > 500 and (
'beautiful' in text.lower() and 'excellent' in text.lower() and 'prime' in text.lower() or
text.count('.') > 10 and len(text.split()) > 100
)
return {
'assessment': assessment,
'score': quality_score,
'reasoning': f'Quality score: {quality_score}/100 based on {len(top_classifications)} classifications.',
'is_ai_generated': is_ai_generated,
'quality_metrics': {
'text_length': len(text),
'word_count': len(text.split()),
'sentence_count': text.count('.') + text.count('!') + text.count('?'),
'positive_score': positive_score,
'negative_score': negative_score
},
'top_classifications': top_classifications,
'model_used': getattr(classifier, 'fallback_model', 'primary_model')
}
except Exception as e:
logger.error(f"Error in text quality assessment: {str(e)}")
# Return much more reasonable fallback instead of 0
text_length = len(str(text)) if text else 0
fallback_score = max(25, min(60, text_length // 2 + 20)) # Much more lenient scoring based on length
return {
'assessment': 'basic',
'score': fallback_score,
'reasoning': f'Text quality assessment failed: {str(e)}. Using fallback scoring.',
'is_ai_generated': False,
'quality_metrics': {
'text_length': text_length,
'word_count': len(str(text).split()) if text else 0,
'sentence_count': str(text).count('.') + str(text).count('!') + str(text).count('?') if text else 0
},
'model_used': 'error_fallback'
}