Spaces:
Sleeping
Sleeping
| # app.py - CodeLab Stage 3: Semantic Analysis - Enhanced Version | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModel, T5ForConditionalGeneration, T5Tokenizer | |
| import json | |
| import re | |
| import ast | |
| import time | |
| from typing import Dict, List, Any, Optional | |
| import logging | |
| import traceback | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class SemanticAnalyzer: | |
| def __init__(self): | |
| logger.info("π Initializing CodeLab Semantic Analyzer...") | |
| self.models_loaded = False | |
| # Initialize models with error handling | |
| try: | |
| # CodeBERT for semantic understanding | |
| logger.info("π Loading CodeBERT...") | |
| self.codebert_tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base") | |
| self.codebert_model = AutoModel.from_pretrained("microsoft/codebert-base") | |
| # CodeT5 for code analysis and generation | |
| logger.info("π§ Loading CodeT5...") | |
| self.codet5_tokenizer = T5Tokenizer.from_pretrained("Salesforce/codet5-base") | |
| self.codet5_model = T5ForConditionalGeneration.from_pretrained("Salesforce/codet5-base") | |
| # Set models to eval mode for inference | |
| self.codebert_model.eval() | |
| self.codet5_model.eval() | |
| self.models_loaded = True | |
| logger.info("β All models loaded successfully!") | |
| except Exception as e: | |
| logger.error(f"β Error loading models: {str(e)}") | |
| self.models_loaded = False | |
| # Don't raise - allow fallback functionality | |
| def generate_code_embedding(self, code: str) -> List[float]: | |
| """Generate semantic embedding using CodeBERT""" | |
| if not self.models_loaded: | |
| logger.warning("β οΈ Models not loaded, returning zero embedding") | |
| return [0.0] * 768 | |
| try: | |
| # Clean and prepare code | |
| cleaned_code = self._clean_code_for_analysis(code) | |
| # Tokenize code | |
| inputs = self.codebert_tokenizer( | |
| cleaned_code, | |
| return_tensors="pt", | |
| max_length=512, | |
| truncation=True, | |
| padding=True | |
| ) | |
| # Generate embedding | |
| with torch.no_grad(): | |
| outputs = self.codebert_model(**inputs) | |
| # Use [CLS] token embedding (better for semantic representation) | |
| embedding = outputs.last_hidden_state[:, 0, :].squeeze() | |
| # Normalize embedding | |
| embedding_norm = torch.nn.functional.normalize(embedding, dim=0) | |
| return embedding_norm.tolist() | |
| except Exception as e: | |
| logger.error(f"β Error generating embedding: {str(e)}") | |
| return [0.0] * 768 # Return zero vector on error | |
| def analyze_with_codet5(self, code: str, question_text: str) -> Dict[str, Any]: | |
| """Enhanced code analysis using CodeT5""" | |
| if not self.models_loaded: | |
| return self._fallback_analysis(code) | |
| try: | |
| results = {} | |
| # Task 1: Code summarization with better prompt | |
| summarize_input = f"Summarize the following Python function: {code}" | |
| inputs = self.codet5_tokenizer( | |
| summarize_input, | |
| return_tensors="pt", | |
| max_length=512, | |
| truncation=True | |
| ) | |
| with torch.no_grad(): | |
| summary_ids = self.codet5_model.generate( | |
| inputs.input_ids, | |
| max_length=100, | |
| num_beams=3, # Increased for better quality | |
| early_stopping=True, | |
| do_sample=False, # Deterministic for consistency | |
| pad_token_id=self.codet5_tokenizer.pad_token_id | |
| ) | |
| results['code_summary'] = self.codet5_tokenizer.decode( | |
| summary_ids[0], | |
| skip_special_tokens=True | |
| ) | |
| # Task 2: Enhanced pattern extraction | |
| results['logic_patterns'] = self.extract_logic_patterns_enhanced(code) | |
| results['approach_analysis'] = self.analyze_approach_enhanced(code) | |
| results['complexity_analysis'] = self.analyze_complexity_enhanced(code) | |
| results['semantic_quality'] = self.assess_semantic_quality(code) | |
| return results | |
| except Exception as e: | |
| logger.error(f"β Error in CodeT5 analysis: {str(e)}") | |
| return self._fallback_analysis(code) | |
| def _fallback_analysis(self, code: str) -> Dict[str, Any]: | |
| """Fallback analysis when AI models fail""" | |
| return { | |
| 'code_summary': f'Python function with {len(code.split("\\n"))} lines', | |
| 'logic_patterns': self.extract_logic_patterns_enhanced(code), | |
| 'approach_analysis': self.analyze_approach_enhanced(code), | |
| 'complexity_analysis': self.analyze_complexity_enhanced(code), | |
| 'semantic_quality': self.assess_semantic_quality(code) | |
| } | |
| def extract_logic_patterns_enhanced(self, code: str) -> List[str]: | |
| """Enhanced logical pattern extraction""" | |
| patterns = [] | |
| code_lower = code.lower() | |
| # Basic patterns | |
| if 'max(' in code: patterns.append('builtin_max') | |
| if 'min(' in code: patterns.append('builtin_min') | |
| if 'sum(' in code: patterns.append('builtin_sum') | |
| if 'len(' in code: patterns.append('length_operations') | |
| if 'sorted(' in code: patterns.append('sorting_operations') | |
| # Control flow patterns | |
| if 'for' in code and 'if' in code: patterns.append('iterative_conditional') | |
| if 'while' in code: patterns.append('loop_based') | |
| if 'def' in code: patterns.append('function_definition') | |
| if 'return' in code: patterns.append('return_statement') | |
| # Advanced patterns with regex | |
| if re.search(r'for\\s+\\w+\\s+in\\s+range', code): patterns.append('indexed_iteration') | |
| if re.search(r'for\\s+\\w+\\s+in\\s+enumerate', code): patterns.append('indexed_enumeration') | |
| if re.search(r'if\\s+.*[<>]=?.*:', code): patterns.append('comparison_logic') | |
| if re.search(r'\\[.*\\]', code): patterns.append('list_operations') | |
| # Error handling patterns | |
| if 'try:' in code or 'except' in code: patterns.append('error_handling') | |
| if 'if not' in code or 'if len(' in code: patterns.append('input_validation') | |
| # Mathematical patterns | |
| if any(op in code for op in ['**', 'pow(', 'sqrt', 'math.']): patterns.append('mathematical_operations') | |
| return list(set(patterns)) # Remove duplicates | |
| def analyze_approach_enhanced(self, code: str) -> str: | |
| """Enhanced algorithmic approach analysis""" | |
| # Built-in function approaches (optimal) | |
| if 'max(' in code and 'min(' not in code: | |
| return 'builtin_maximum_approach' | |
| elif 'min(' in code and 'max(' not in code: | |
| return 'builtin_minimum_approach' | |
| elif 'max(' in code and 'min(' in code: | |
| return 'dual_builtin_approach' | |
| elif 'sum(' in code: | |
| return 'builtin_aggregation_approach' | |
| elif 'sorted(' in code: | |
| return 'sorting_based_approach' | |
| # Loop-based approaches | |
| elif 'for' in code and 'if' in code and 'range' in code: | |
| return 'indexed_iterative_approach' | |
| elif 'for' in code and 'if' in code: | |
| return 'iterative_comparison_approach' | |
| elif 'while' in code: | |
| return 'loop_based_approach' | |
| # Advanced approaches | |
| elif 'enumerate' in code: | |
| return 'enumerated_iteration_approach' | |
| elif re.search(r'def\\s+\\w+.*def\\s+\\w+', code): | |
| return 'nested_function_approach' | |
| else: | |
| return 'custom_logic_approach' | |
| def analyze_complexity_enhanced(self, code: str) -> Dict[str, str]: | |
| """Enhanced complexity analysis""" | |
| def estimate_time_complexity(code): | |
| nested_loops = len(re.findall(r'for.*for|while.*for|for.*while', code)) | |
| single_loops = code.count('for') + code.count('while') - (nested_loops * 2) | |
| if 'max(' in code or 'min(' in code or 'sum(' in code: | |
| return 'O(n)' | |
| elif 'sorted(' in code: | |
| return 'O(n log n)' | |
| elif nested_loops >= 1: | |
| return 'O(nΒ²)' if nested_loops == 1 else 'O(nΒ³)' | |
| elif single_loops >= 1: | |
| return 'O(n)' | |
| else: | |
| return 'O(1)' | |
| def estimate_space_complexity(code): | |
| if 'sorted(' in code or re.search(r'\\[.*for.*\\]', code): | |
| return 'O(n)' | |
| elif '[' in code and ']' in code: | |
| return 'O(n)' | |
| else: | |
| return 'O(1)' | |
| return { | |
| 'time': estimate_time_complexity(code), | |
| 'space': estimate_space_complexity(code) | |
| } | |
| def assess_semantic_quality(self, code: str) -> Dict[str, Any]: | |
| """Assess the semantic quality of code""" | |
| quality_metrics = { | |
| 'readability_score': 0, | |
| 'logic_clarity': 'unclear', | |
| 'efficiency_level': 'low', | |
| 'best_practices': [] | |
| } | |
| # Readability assessment | |
| lines = code.split('\\n') | |
| total_score = 10 | |
| # Check for comments or docstrings | |
| if '"""' in code or "'''" in code or '#' in code: | |
| quality_metrics['best_practices'].append('documented_code') | |
| total_score += 1 | |
| # Check for meaningful variable names | |
| if re.search(r'\\b(max_val|min_val|result|answer|total)\\b', code): | |
| quality_metrics['best_practices'].append('meaningful_variables') | |
| total_score += 1 | |
| # Check for input validation | |
| if 'if not' in code or 'if len(' in code: | |
| quality_metrics['best_practices'].append('input_validation') | |
| total_score += 1 | |
| # Efficiency assessment | |
| if any(builtin in code for builtin in ['max(', 'min(', 'sum(']): | |
| quality_metrics['efficiency_level'] = 'high' | |
| quality_metrics['best_practices'].append('builtin_functions') | |
| elif 'for' in code and 'if' in code: | |
| quality_metrics['efficiency_level'] = 'medium' | |
| # Logic clarity | |
| if len(lines) <= 10 and 'def' in code and 'return' in code: | |
| quality_metrics['logic_clarity'] = 'clear' | |
| elif len(lines) <= 20: | |
| quality_metrics['logic_clarity'] = 'moderate' | |
| quality_metrics['readability_score'] = min(10, max(1, total_score)) | |
| return quality_metrics | |
| def generate_optimal_solution(self, question_text: str, question_type: str = "auto_detect") -> Dict[str, Any]: | |
| """Enhanced optimal solution generation""" | |
| try: | |
| question_lower = question_text.lower() | |
| # Pattern-based solution generation (more reliable than AI generation) | |
| if 'max' in question_lower and 'min' not in question_lower: | |
| return { | |
| 'code': 'def find_max(numbers):\\n """Find maximum value in a list"""\\n if not numbers:\\n return None\\n return max(numbers)', | |
| 'explanation': 'Optimal solution using built-in max() function with input validation', | |
| 'approach': 'builtin_optimized', | |
| 'complexity': {'time': 'O(n)', 'space': 'O(1)'}, | |
| 'generated_by': 'pattern_optimized', | |
| 'quality_score': 10 | |
| } | |
| elif 'min' in question_lower and 'max' not in question_lower: | |
| return { | |
| 'code': 'def find_min(numbers):\\n """Find minimum value in a list"""\\n if not numbers:\\n return None\\n return min(numbers)', | |
| 'explanation': 'Optimal solution using built-in min() function with input validation', | |
| 'approach': 'builtin_optimized', | |
| 'complexity': {'time': 'O(n)', 'space': 'O(1)'}, | |
| 'generated_by': 'pattern_optimized', | |
| 'quality_score': 10 | |
| } | |
| elif 'sum' in question_lower or 'total' in question_lower: | |
| return { | |
| 'code': 'def calculate_sum(numbers):\\n """Calculate sum of numbers in a list"""\\n return sum(numbers)', | |
| 'explanation': 'Optimal solution using built-in sum() function', | |
| 'approach': 'builtin_optimized', | |
| 'complexity': {'time': 'O(n)', 'space': 'O(1)'}, | |
| 'generated_by': 'pattern_optimized', | |
| 'quality_score': 10 | |
| } | |
| else: | |
| # Try AI generation as fallback | |
| if self.models_loaded: | |
| return self._ai_generate_solution(question_text) | |
| else: | |
| return self._template_solution(question_text) | |
| except Exception as e: | |
| logger.error(f"β Error generating optimal solution: {str(e)}") | |
| return self._template_solution(question_text) | |
| def _ai_generate_solution(self, question_text: str) -> Dict[str, Any]: | |
| """AI-based solution generation using CodeT5""" | |
| try: | |
| generate_input = f"Generate optimal Python function for: {question_text}" | |
| inputs = self.codet5_tokenizer( | |
| generate_input, | |
| return_tensors="pt", | |
| max_length=256, | |
| truncation=True | |
| ) | |
| with torch.no_grad(): | |
| generated_ids = self.codet5_model.generate( | |
| inputs.input_ids, | |
| max_length=200, | |
| num_beams=3, | |
| early_stopping=True, | |
| do_sample=False, # Deterministic | |
| pad_token_id=self.codet5_tokenizer.pad_token_id | |
| ) | |
| generated_code = self.codet5_tokenizer.decode( | |
| generated_ids[0], | |
| skip_special_tokens=True | |
| ) | |
| return { | |
| 'code': generated_code, | |
| 'explanation': 'AI-generated solution using CodeT5', | |
| 'approach': 'ai_generated', | |
| 'complexity': 'O(n)', | |
| 'generated_by': 'codet5', | |
| 'quality_score': 7 | |
| } | |
| except Exception as e: | |
| logger.error(f"β Error in AI generation: {str(e)}") | |
| return self._template_solution(question_text) | |
| def _template_solution(self, question_text: str) -> Dict[str, Any]: | |
| """Template-based fallback solution""" | |
| return { | |
| 'code': 'def solution(data):\\n """Template solution"""\\n # Implementation needed\\n return data[0] if data else None', | |
| 'explanation': 'Template solution - implementation needed based on specific requirements', | |
| 'approach': 'template_fallback', | |
| 'complexity': 'O(1)', | |
| 'generated_by': 'template', | |
| 'quality_score': 5 | |
| } | |
| def compare_solutions(self, student_code: str, optimal_code: str) -> Dict[str, Any]: | |
| """Enhanced solution comparison""" | |
| try: | |
| # Generate embeddings for semantic comparison | |
| student_embedding = self.generate_code_embedding(student_code) | |
| optimal_embedding = self.generate_code_embedding(optimal_code) | |
| # Calculate semantic similarity | |
| similarity = self.calculate_cosine_similarity(student_embedding, optimal_embedding) | |
| # Pattern analysis | |
| student_patterns = self.extract_logic_patterns_enhanced(student_code) | |
| optimal_patterns = self.extract_logic_patterns_enhanced(optimal_code) | |
| # Approach comparison | |
| student_approach = self.analyze_approach_enhanced(student_code) | |
| optimal_approach = self.analyze_approach_enhanced(optimal_code) | |
| # Quality comparison | |
| student_quality = self.assess_semantic_quality(student_code) | |
| optimal_quality = self.assess_semantic_quality(optimal_code) | |
| return { | |
| 'semantic_similarity': float(similarity), | |
| 'student_patterns': student_patterns, | |
| 'optimal_patterns': optimal_patterns, | |
| 'pattern_overlap': len(set(student_patterns) & set(optimal_patterns)), | |
| 'approach_comparison': { | |
| 'student': student_approach, | |
| 'optimal': optimal_approach, | |
| 'matches': student_approach == optimal_approach | |
| }, | |
| 'quality_comparison': { | |
| 'student_readability': student_quality['readability_score'], | |
| 'optimal_readability': optimal_quality['readability_score'], | |
| 'student_efficiency': student_quality['efficiency_level'], | |
| 'optimal_efficiency': optimal_quality['efficiency_level'] | |
| }, | |
| 'complexity_comparison': self.compare_complexity_enhanced(student_code, optimal_code) | |
| } | |
| except Exception as e: | |
| logger.error(f"β Error comparing solutions: {str(e)}") | |
| return { | |
| 'semantic_similarity': 0.0, | |
| 'student_patterns': [], | |
| 'optimal_patterns': [], | |
| 'pattern_overlap': 0, | |
| 'approach_comparison': {'error': str(e)}, | |
| 'quality_comparison': {'error': str(e)}, | |
| 'complexity_comparison': 'unable_to_compare' | |
| } | |
| def calculate_cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float: | |
| """Enhanced cosine similarity calculation""" | |
| try: | |
| if len(vec1) != len(vec2) or not vec1 or not vec2: | |
| return 0.0 | |
| # Convert to tensors for more accurate calculation | |
| vec1_tensor = torch.tensor(vec1) | |
| vec2_tensor = torch.tensor(vec2) | |
| # Calculate cosine similarity | |
| similarity = torch.nn.functional.cosine_similarity( | |
| vec1_tensor.unsqueeze(0), | |
| vec2_tensor.unsqueeze(0) | |
| ) | |
| return float(similarity.item()) | |
| except Exception as e: | |
| logger.error(f"β Error calculating similarity: {str(e)}") | |
| return 0.0 | |
| def compare_complexity_enhanced(self, code1: str, code2: str) -> Dict[str, Any]: | |
| """Enhanced complexity comparison""" | |
| complexity1 = self.analyze_complexity_enhanced(code1) | |
| complexity2 = self.analyze_complexity_enhanced(code2) | |
| # Complexity ranking for comparison | |
| complexity_rank = { | |
| 'O(1)': 1, 'O(log n)': 2, 'O(n)': 3, | |
| 'O(n log n)': 4, 'O(nΒ²)': 5, 'O(nΒ³)': 6 | |
| } | |
| rank1 = complexity_rank.get(complexity1['time'], 999) | |
| rank2 = complexity_rank.get(complexity2['time'], 999) | |
| return { | |
| 'student_complexity': complexity1, | |
| 'optimal_complexity': complexity2, | |
| 'efficiency_comparison': 'better' if rank1 < rank2 else 'worse' if rank1 > rank2 else 'same', | |
| 'recommendation': self._get_complexity_recommendation(complexity1, complexity2) | |
| } | |
| def _get_complexity_recommendation(self, student_comp: Dict, optimal_comp: Dict) -> str: | |
| """Generate complexity-based recommendations""" | |
| if student_comp['time'] == optimal_comp['time']: | |
| return "Excellent! Your solution has optimal time complexity" | |
| elif student_comp['time'] in ['O(nΒ²)', 'O(nΒ³)'] and optimal_comp['time'] == 'O(n)': | |
| return "Consider using built-in functions to improve from quadratic to linear complexity" | |
| elif student_comp['time'] == 'O(n)' and optimal_comp['time'] == 'O(1)': | |
| return "Good approach, but there might be a constant-time solution" | |
| else: | |
| return "Your complexity is acceptable, but optimization is possible" | |
| def _clean_code_for_analysis(self, code: str) -> str: | |
| """Clean code for better analysis""" | |
| # Remove excessive whitespace | |
| lines = [line.strip() for line in code.split('\\n') if line.strip()] | |
| return '\\n'.join(lines) | |
| # Initialize the analyzer (with lazy loading) | |
| analyzer = None | |
| def get_analyzer(): | |
| """Get analyzer instance with lazy initialization""" | |
| global analyzer | |
| if analyzer is None: | |
| analyzer = SemanticAnalyzer() | |
| return analyzer | |
| def process_semantic_analysis( | |
| student_code: str, | |
| question_text: str, | |
| question_id: str = "default", | |
| need_optimal_solution: bool = True | |
| ) -> str: | |
| """Enhanced main function for semantic analysis""" | |
| start_time = time.time() | |
| try: | |
| logger.info(f"π§ Starting enhanced semantic analysis for question: {question_id}") | |
| # Get analyzer instance | |
| semantic_analyzer = get_analyzer() | |
| # Input validation | |
| if not student_code or not student_code.strip(): | |
| return json.dumps({ | |
| 'success': False, | |
| 'error': 'Empty code provided', | |
| 'processing_time_ms': int((time.time() - start_time) * 1000) | |
| }) | |
| # Step 1: Generate code embedding | |
| logger.info("π Generating code embedding...") | |
| code_embedding = semantic_analyzer.generate_code_embedding(student_code) | |
| # Step 2: Enhanced analysis with CodeT5 | |
| logger.info("π Performing enhanced analysis...") | |
| codet5_analysis = semantic_analyzer.analyze_with_codet5(student_code, question_text) | |
| # Step 3: Generate optimal solution if needed | |
| optimal_solution = None | |
| if need_optimal_solution: | |
| logger.info("π‘ Generating optimal solution...") | |
| optimal_solution = semantic_analyzer.generate_optimal_solution(question_text) | |
| # Step 4: Enhanced solution comparison | |
| comparison = None | |
| if optimal_solution: | |
| logger.info("βοΈ Performing enhanced comparison...") | |
| comparison = semantic_analyzer.compare_solutions(student_code, optimal_solution['code']) | |
| # Step 5: Generate comprehensive insights | |
| insights = generate_comprehensive_insights( | |
| student_code, | |
| codet5_analysis, | |
| comparison, | |
| optimal_solution | |
| ) | |
| processing_time = time.time() - start_time | |
| # Prepare enhanced results | |
| results = { | |
| 'success': True, | |
| 'processing_time_ms': int(processing_time * 1000), | |
| 'semantic_analysis': { | |
| 'code_embedding': code_embedding[:100], # More dimensions for better representation | |
| 'embedding_size': len(code_embedding), | |
| 'logic_patterns': codet5_analysis['logic_patterns'], | |
| 'approach_analysis': codet5_analysis['approach_analysis'], | |
| 'complexity_analysis': codet5_analysis['complexity_analysis'], | |
| 'semantic_quality': codet5_analysis['semantic_quality'], | |
| 'code_summary': codet5_analysis['code_summary'] | |
| }, | |
| 'optimal_solution': optimal_solution, | |
| 'solution_comparison': comparison, | |
| 'semantic_insights': insights, | |
| 'recommendations': generate_recommendations(codet5_analysis, comparison), | |
| 'metadata': { | |
| 'question_id': question_id, | |
| 'analysis_version': '3.1-enhanced-ai', | |
| 'models_used': ['CodeBERT', 'CodeT5'] if semantic_analyzer.models_loaded else ['Fallback'], | |
| 'models_status': 'loaded' if semantic_analyzer.models_loaded else 'fallback', | |
| 'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'), | |
| 'processing_stage': 'semantic_analysis' | |
| } | |
| } | |
| logger.info(f"β Enhanced semantic analysis completed in {processing_time:.2f}s") | |
| return json.dumps(results, indent=2) | |
| except Exception as e: | |
| logger.error(f"β Error in semantic analysis: {str(e)}") | |
| logger.error(traceback.format_exc()) | |
| return json.dumps({ | |
| 'success': False, | |
| 'error': str(e), | |
| 'processing_time_ms': int((time.time() - start_time) * 1000), | |
| 'fallback_analysis': 'Enhanced analysis unavailable due to error', | |
| 'metadata': { | |
| 'analysis_version': '3.1-enhanced-ai', | |
| 'error_occurred': True, | |
| 'timestamp': time.strftime('%Y-%m-%d %H:%M:%S') | |
| } | |
| }) | |
| def generate_comprehensive_insights( | |
| student_code: str, | |
| codet5_analysis: Dict, | |
| comparison: Optional[Dict] = None, | |
| optimal_solution: Optional[Dict] = None | |
| ) -> List[str]: | |
| """Generate comprehensive insights about the student's code""" | |
| insights = [] | |
| # Logic understanding insights | |
| patterns = codet5_analysis['logic_patterns'] | |
| if 'builtin_max' in patterns or 'builtin_min' in patterns or 'builtin_sum' in patterns: | |
| insights.append("Excellent! Student demonstrates advanced understanding by using Python built-in functions") | |
| elif 'iterative_conditional' in patterns: | |
| insights.append("Good logical thinking demonstrated with iterative comparison approach") | |
| elif 'function_definition' in patterns and 'return_statement' in patterns: | |
| insights.append("Proper function structure with clear return logic") | |
| # Approach analysis insights | |
| approach = codet5_analysis['approach_analysis'] | |
| if 'builtin' in approach: | |
| insights.append("Optimal algorithmic approach chosen - highly efficient solution") | |
| elif 'iterative' in approach: | |
| insights.append("Solid iterative approach, shows good programming fundamentals") | |
| elif 'custom' in approach: | |
| insights.append("Creative custom approach, demonstrates independent problem-solving") | |
| # Complexity insights | |
| complexity = codet5_analysis['complexity_analysis'] | |
| if complexity['time'] == 'O(n)' and complexity['space'] == 'O(1)': | |
| insights.append("Excellent time and space complexity - very efficient solution") | |
| elif complexity['time'] in ['O(nΒ²)', 'O(nΒ³)']: | |
| insights.append("Solution works correctly but could benefit from complexity optimization") | |
| # Quality insights | |
| quality = codet5_analysis['semantic_quality'] | |
| if quality['readability_score'] >= 8: | |
| insights.append("Code is highly readable with good programming practices") | |
| elif quality['efficiency_level'] == 'high': | |
| insights.append("Solution demonstrates awareness of efficient programming techniques") | |
| # Comparison insights | |
| if comparison: | |
| similarity = comparison['semantic_similarity'] | |
| if similarity > 0.8: | |
| insights.append("Student's solution is semantically very similar to the optimal approach") | |
| elif similarity > 0.6: | |
| insights.append("Good understanding shown, with opportunities for further optimization") | |
| elif similarity > 0.4: | |
| insights.append("Correct approach with different implementation style") | |
| # Pattern overlap insights | |
| overlap = comparison['pattern_overlap'] | |
| total_patterns = len(comparison['optimal_patterns']) | |
| if total_patterns > 0 and overlap / total_patterns > 0.7: | |
| insights.append("Strong pattern recognition - matches most optimal solution patterns") | |
| # Default insight if none found | |
| if not insights: | |
| insights.append("Student shows basic understanding of the problem and provides a working solution") | |
| return insights | |
| def generate_recommendations(codet5_analysis: Dict, comparison: Optional[Dict] = None) -> List[str]: | |
| """Generate actionable recommendations for improvement""" | |
| recommendations = [] | |
| # Efficiency recommendations | |
| patterns = codet5_analysis['logic_patterns'] | |
| if 'iterative_conditional' in patterns and 'builtin_max' not in patterns: | |
| recommendations.append("Consider using built-in max() or min() functions for better efficiency") | |
| # Complexity recommendations | |
| complexity = codet5_analysis['complexity_analysis'] | |
| if complexity['time'] in ['O(nΒ²)', 'O(nΒ³)']: | |
| recommendations.append("Try to reduce algorithmic complexity using more efficient approaches") | |
| # Quality recommendations | |
| quality = codet5_analysis['semantic_quality'] | |
| if quality['readability_score'] < 7: | |
| recommendations.append("Add comments or use more descriptive variable names for better readability") | |
| if 'input_validation' not in quality['best_practices']: | |
| recommendations.append("Consider adding input validation for more robust code") | |
| # Comparison-based recommendations | |
| if comparison and comparison['semantic_similarity'] < 0.6: | |
| recommendations.append("Review the optimal solution to learn alternative approaches") | |
| return recommendations | |
| # Enhanced Gradio Interface | |
| def gradio_interface(student_code, question_text, need_optimal): | |
| """Enhanced Gradio interface wrapper""" | |
| if not student_code.strip(): | |
| return json.dumps({ | |
| 'error': 'Please provide student code for analysis', | |
| 'success': False | |
| }, indent=2) | |
| return process_semantic_analysis( | |
| student_code=student_code, | |
| question_text=question_text, | |
| question_id="gradio_test", | |
| need_optimal_solution=need_optimal | |
| ) | |
| # Create enhanced Gradio interface | |
| demo = gr.Interface( | |
| fn=gradio_interface, | |
| inputs=[ | |
| gr.Textbox( | |
| label="Student Code", | |
| placeholder="Enter Python code here...", | |
| lines=12, | |
| value="def find_max(numbers):\\n max_val = numbers[0]\\n for num in numbers:\\n if num > max_val:\\n max_val = num\\n return max_val" | |
| ), | |
| gr.Textbox( | |
| label="Question Text", | |
| placeholder="Enter the question...", | |
| lines=2, | |
| value="Find the maximum number in a list" | |
| ), | |
| gr.Checkbox( | |
| label="Generate Optimal Solution", | |
| value=True | |
| ) | |
| ], | |
| outputs=gr.Textbox( | |
| label="Semantic Analysis Results (JSON)", | |
| lines=25, | |
| show_copy_button=True | |
| ), | |
| title="π§ CodeLab Semantic Analysis - Stage 3 (Enhanced)", | |
| description=""" | |
| Advanced semantic analysis using CodeBERT and CodeT5 models for educational code evaluation. | |
| This system analyzes code semantics, generates optimal solutions, and provides educational insights. | |
| """, | |
| examples=[ | |
| [ | |
| "def find_max(numbers):\\n return max(numbers)", | |
| "Find the maximum number in a list", | |
| True | |
| ], | |
| [ | |
| "def find_min(arr):\\n minimum = arr[0]\\n for i in range(1, len(arr)):\\n if arr[i] < minimum:\\n minimum = arr[i]\\n return minimum", | |
| "Find the minimum number in an array", | |
| True | |
| ], | |
| [ | |
| "def calculate_sum(nums):\\n total = 0\\n for num in nums:\\n total += num\\n return total", | |
| "Calculate the sum of all numbers in a list", | |
| True | |
| ] | |
| ], | |
| theme=gr.themes.Soft(), | |
| analytics_enabled=False | |
| ) | |
| # Launch the interface | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| show_error=True, | |
| show_tips=True | |
| ) | |