Spaces:

sksameermujahid
/

propertyverification

Sleeping

App Files Files Community

propertyverification / models /text_quality.py

sksameermujahid

Upload 21 files

7ac74a0 verified 8 days ago

raw

history blame contribute delete

6.95 kB

	# models/text_quality.py

	from .model_loader import load_model
	from .logging_config import logger

	def assess_text_quality(text):
	try:
	# Handle very short or empty text with more reasonable scoring
	if not text or len(str(text).strip()) < 5:
	return {
	'assessment': 'insufficient',
	'score': 5, # Give minimum score instead of 0
	'reasoning': 'Text too short or empty.',
	'is_ai_generated': False,
	'quality_metrics': {},
	'model_used': 'static_fallback'
	}

	# For very short text (5-20 characters), give basic score
	if len(str(text).strip()) < 20:
	return {
	'assessment': 'basic',
	'score': 15, # Basic score for minimal text
	'reasoning': 'Very short text provided.',
	'is_ai_generated': False,
	'quality_metrics': {
	'text_length': len(text),
	'word_count': len(text.split()),
	'sentence_count': text.count('.') + text.count('!') + text.count('?')
	},
	'model_used': 'static_fallback'
	}

	try:
	classifier = load_model("zero-shot-classification") # Use standard model instead of typeform
	except Exception as e:
	logger.error(f"Error loading model in text quality: {str(e)}")
	# Much more lenient fallback scoring for when model fails
	text_length = len(text)
	if text_length > 200:
	fallback_score = 70 # Increased from 60
	assessment = 'good'
	elif text_length > 100:
	fallback_score = 50 # Increased from 40
	assessment = 'adequate'
	elif text_length > 50:
	fallback_score = 35 # Increased from 25
	assessment = 'basic'
	else:
	fallback_score = 25 # Increased from 15
	assessment = 'basic'

	return {
	'assessment': assessment,
	'score': fallback_score,
	'reasoning': f'Model loading error, using fallback scoring based on text length ({text_length} chars).',
	'is_ai_generated': False,
	'quality_metrics': {
	'text_length': text_length,
	'word_count': len(text.split()),
	'sentence_count': text.count('.') + text.count('!') + text.count('?')
	},
	'model_used': 'static_fallback'
	}

	# Enhanced quality categories with more specific indicators
	quality_categories = [
	"detailed and informative",
	"adequately detailed",
	"basic information",
	"vague description",
	"misleading content",
	"professional listing",
	"amateur listing",
	"spam-like content",
	"template-based content",
	"authentic description"
	]

	# Analyze text with multiple aspects
	quality_result = classifier(text[:1000], quality_categories, multi_label=True)

	# Get top classifications with confidence scores
	top_classifications = []
	for label, score in zip(quality_result['labels'][:3], quality_result['scores'][:3]):
	if score > 0.3: # Only include if confidence is above 30%
	top_classifications.append({
	'classification': label,
	'confidence': float(score)
	})

	# Calculate overall quality score
	positive_categories = ["detailed and informative", "adequately detailed", "professional listing", "authentic description"]
	negative_categories = ["vague description", "misleading content", "amateur listing", "spam-like content", "template-based content"]

	positive_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
	if label in positive_categories)
	negative_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
	if label in negative_categories)

	# Calculate final score (0-100) with better handling of edge cases
	base_score = (positive_score - negative_score + 1) * 50
	quality_score = max(20, min(100, int(base_score))) # Increased minimum from 10% to 20%

	# Much more lenient assessment thresholds
	if quality_score >= 70: # Reduced from 80
	assessment = 'excellent'
	elif quality_score >= 50: # Reduced from 60
	assessment = 'good'
	elif quality_score >= 30: # Reduced from 40
	assessment = 'adequate'
	elif quality_score >= 20: # Reduced from 20
	assessment = 'basic'
	else:
	assessment = 'basic' # Changed from 'very poor' to 'basic'

	# Simple AI detection (basic heuristic)
	is_ai_generated = len(text) > 500 and (
	'beautiful' in text.lower() and 'excellent' in text.lower() and 'prime' in text.lower() or
	text.count('.') > 10 and len(text.split()) > 100
	)

	return {
	'assessment': assessment,
	'score': quality_score,
	'reasoning': f'Quality score: {quality_score}/100 based on {len(top_classifications)} classifications.',
	'is_ai_generated': is_ai_generated,
	'quality_metrics': {
	'text_length': len(text),
	'word_count': len(text.split()),
	'sentence_count': text.count('.') + text.count('!') + text.count('?'),
	'positive_score': positive_score,
	'negative_score': negative_score
	},
	'top_classifications': top_classifications,
	'model_used': getattr(classifier, 'fallback_model', 'primary_model')
	}

	except Exception as e:
	logger.error(f"Error in text quality assessment: {str(e)}")
	# Return much more reasonable fallback instead of 0
	text_length = len(str(text)) if text else 0
	fallback_score = max(25, min(60, text_length // 2 + 20)) # Much more lenient scoring based on length

	return {
	'assessment': 'basic',
	'score': fallback_score,
	'reasoning': f'Text quality assessment failed: {str(e)}. Using fallback scoring.',
	'is_ai_generated': False,
	'quality_metrics': {
	'text_length': text_length,
	'word_count': len(str(text).split()) if text else 0,
	'sentence_count': str(text).count('.') + str(text).count('!') + str(text).count('?') if text else 0
	},
	'model_used': 'error_fallback'
	}