Spaces:

Docfile
/

Mariam-cards

Running

File size: 9,216 Bytes

from flask import Flask, render_template, request, jsonify
from werkzeug.utils import secure_filename
import os
import json
from textwrap import dedent
from crewai import Agent, Crew, Process, Task
from crewai_tools import SerperDevTool, PDFSearchTool
from crewai import LLM
from typing import List, Dict, Union, Optional
import tempfile
import logging
from datetime import datetime
import hashlib

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

app = Flask(__name__)

# Configuration
UPLOAD_FOLDER = tempfile.gettempdir()
ALLOWED_EXTENSIONS = {'pdf'}
MAX_FILE_SIZE = 16 * 1024 * 1024  # 16MB
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['MAX_CONTENT_LENGTH'] = MAX_FILE_SIZE

# Load environment variables
def get_env_variable(var_name: str) -> str:
    """Safely get environment variable with error handling."""
    value = os.environ.get(var_name)
    if value is None:
        raise ValueError(f"Environment variable {var_name} is not set")
    return value

try:
    # API Keys configuration
    os.environ["GOOGLE_API_KEY"] = get_env_variable("GEMINI_API_KEY")
    os.environ["GEMINI_API_KEY"] = get_env_variable("GEMINI_API_KEY")
    os.environ["SERPER_API_KEY"] = get_env_variable("SERPER_API_KEY")
except ValueError as e:
    logger.error(f"Configuration error: {e}")
    raise

# Initialize LLM
llm = LLM(
    model="gemini/gemini-1.5-flash",
    temperature=0.7,
    timeout=120,
    max_tokens=8000,
)

# Initialize tools
search_tool = SerperDevTool()

def create_pdf_tool(pdf_path: Optional[str] = None) -> PDFSearchTool:
    """Create a PDFSearchTool with optional PDF path."""
    config = {
        'llm': {
            'provider': 'google',
            'config': {
                'model': 'gemini-1.5-flash',
            },
        },
        'embedder': {
            'provider': 'google',
            'config': {
                'model': 'models/embedding-001',
                'task_type': 'retrieval_document',
            },
        },
    }
    
    if pdf_path:
        return PDFSearchTool(pdf=pdf_path, config=config)
    return PDFSearchTool(config=config)

def allowed_file(filename: str) -> bool:
    """Check if the file extension is allowed."""
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def generate_safe_filename(filename: str) -> str:
    """Generate a safe filename with timestamp and hash."""
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    file_hash = hashlib.md5(filename.encode()).hexdigest()[:10]
    ext = filename.rsplit('.', 1)[1].lower()
    return f"upload_{timestamp}_{file_hash}.{ext}"

class FlashcardGenerator:
    def __init__(self, topic: str, pdf_path: Optional[str] = None):
        self.topic = topic
        self.pdf_path = pdf_path
        self.researcher = self._create_researcher()
        self.writer = self._create_writer()

    def _create_researcher(self) -> Agent:
        """Create the researcher agent with appropriate tools."""
        tools = [search_tool]
        if self.pdf_path:
            tools.append(create_pdf_tool(self.pdf_path))
        
        return Agent(
            role='Chercheur de Sujets',
            goal=dedent(f"""Trouver les informations les plus pertinentes et précises sur {self.topic}
            en utilisant l'API SerpApi et en analysant les PDFs fournis."""),
            backstory=dedent("""Un chercheur expert spécialisé dans la collecte d'informations sur divers sujets.
            Capable d'utiliser l'API SerpApi pour des recherches précises et d'analyser des documents PDF."""),
            tools=tools,
            llm=llm,
            verbose=True,
            allow_delegation=False
        )

    def _create_writer(self) -> Agent:
        """Create the writer agent."""
        return Agent(
            role='Rédacteur de Flashcards',
            goal=dedent("""Créer des flashcards claires et concises en format question-réponse
            basées sur les informations fournies par le Chercheur."""),
            backstory=dedent("""Un expert en pédagogie et en création de matériel d'apprentissage.
            Capable de transformer des informations complexes en flashcards simples et mémorisables."""),
            llm=llm,
            verbose=True,
            allow_delegation=False
        )

    def create_research_task(self) -> Task:
        """Create the research task."""
        description = f"""Effectuer une recherche approfondie sur le sujet '{self.topic}'."""
        if self.pdf_path:
            description += f" Analyser également le contenu du PDF fourni: {self.pdf_path}"
        
        return Task(
            description=dedent(description),
            expected_output="Une liste d'informations pertinentes sur le sujet.",
            agent=self.researcher
        )

    def create_flashcard_task(self, research_task: Task) -> Task:
        """Create the flashcard creation task."""
        return Task(
            description=dedent("""Transformer les informations fournies par le Chercheur
            en une série de flashcards au format JSON. je veux une vingtaine de flashcard très robuste et difficile . Chaque flashcard doit avoir une question
            d'un côté et une réponse concise de l'autre. Les réponses doivent être claires et informatives."""),
            expected_output="Une liste de flashcards au format JSON.",
            agent=self.writer,
            context=[research_task]
        )

    def generate(self) -> List[Dict[str, str]]:
        """Generate flashcards using the crew workflow."""
        research_task = self.create_research_task()
        flashcard_task = self.create_flashcard_task(research_task)

        crew = Crew(
            agents=[self.researcher, self.writer],
            tasks=[research_task, flashcard_task],
            process=Process.sequential,
            verbose=True
        )

        result = crew.kickoff()
        return self.extract_json_from_result(result.tasks_output[-1].raw)

    @staticmethod
    def extract_json_from_result(result_text: str) -> List[Dict[str, str]]:
        """Extract and validate JSON from the result text."""
        try:
            json_start = result_text.find('[')
            json_end = result_text.rfind(']') + 1
            if json_start == -1 or json_end == 0:
                raise ValueError("JSON non trouvé dans le résultat")
            
            json_str = result_text[json_start:json_end]
            flashcards = json.loads(json_str)
            
            # Validate flashcard format
            for card in flashcards:
                if not isinstance(card, dict) or 'question' not in card or 'answer' not in card:
                    raise ValueError("Format de flashcard invalide")
            
            return flashcards
        except (json.JSONDecodeError, ValueError) as e:
            logger.error(f"Error extracting JSON: {str(e)}")
            raise ValueError(f"Erreur lors de l'extraction du JSON : {str(e)}")

@app.route('/')
def index():
    """Render the main page."""
    return render_template('index.html')

@app.route('/generate', methods=['POST'])
def generate_flashcards():
    """Handle flashcard generation requests."""
    try:
        # Validate topic
        topic = request.form.get('topic')
        if not topic:
            return jsonify({'error': 'Veuillez entrer un sujet.'}), 400

        # Handle file upload
        pdf_path = None
        if 'file' in request.files:
            file = request.files['file']
            if file and file.filename:
                if not allowed_file(file.filename):
                    return jsonify({'error': 'Format de fichier non supporté. Veuillez utiliser un PDF.'}), 400
                
                if file.content_length and file.content_length > MAX_FILE_SIZE:
                    return jsonify({'error': 'Le fichier est trop volumineux. Maximum 16MB.'}), 400
                
                # Generate safe filename and save file
                safe_filename = generate_safe_filename(file.filename)
                pdf_path = os.path.join(app.config['UPLOAD_FOLDER'], safe_filename)
                file.save(pdf_path)
                logger.info(f"File saved: {pdf_path}")

        try:
            # Generate flashcards
            generator = FlashcardGenerator(topic, pdf_path)
            flashcards = generator.generate()
            
            return jsonify({
                'success': True,
                'flashcards': flashcards
            })
        
        finally:
            # Clean up PDF file
            if pdf_path and os.path.exists(pdf_path):
                os.remove(pdf_path)
                logger.info(f"File cleaned up: {pdf_path}")

    except Exception as e:
        logger.error(f"Error generating flashcards: {str(e)}")
        return jsonify({'error': str(e)}), 500

@app.errorhandler(413)
def request_entity_too_large(error):
    """Handle file size limit exceeded error."""
    return jsonify({'error': 'Le fichier est trop volumineux. Maximum 16MB.'}), 413

if __name__ == '__main__':
    app.run(debug=True)