Spaces:

Drago1995
/

AI-New-Question-Generator

Running

+from fastapi import FastAPI
+from backend.api import items
+from backend.api import pdfreader
+from backend.api import textreader
+from fastapi.middleware.cors import CORSMiddleware
+app = FastAPI(title="Multi-layered FastAPI Example")
+# Allow requests from your frontend origin
+origins = [
+    "http://localhost:3000",  # React frontend
+    # Add more origins if needed
+]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,            # List of allowed origins
+    allow_credentials=True,
+    allow_methods=["*"],              # Allow all HTTP methods (GET, POST, etc.)
+    allow_headers=["*"],              # Allow all headers
+)
+app.include_router(items.router)
+app.include_router(pdfreader.router)
+app.include_router(textreader.router)

app1.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import gradio as gr
+def greet(name):
+    return "Hello " + name + "!!"
+demo = gr.Interface(fn=greet, inputs="text", outputs="text")
+demo.launch(ssr_mode=False)

backend/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (153 Bytes). View file

backend/__pycache__/main.cpython-312.pyc ADDED Viewed

Binary file (903 Bytes). View file

backend/api/__init__.py ADDED Viewed

File without changes

backend/api/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (157 Bytes). View file

backend/api/__pycache__/items.cpython-312.pyc ADDED Viewed

Binary file (1.14 kB). View file

backend/api/__pycache__/pdfreader.cpython-312.pyc ADDED Viewed

Binary file (1.02 kB). View file

backend/api/__pycache__/textreader.cpython-312.pyc ADDED Viewed

Binary file (1.21 kB). View file

backend/api/items.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from fastapi import APIRouter, HTTPException
+from typing import List
+from backend.models.item import Item
+from backend.services.item_service import ItemService
+router = APIRouter(prefix="/items", tags=["items"])
+service = ItemService()
+@router.get("/", response_model=List[Item])
+def get_items():
+    return service.list_items()
+@router.get("/{item_id}", response_model=Item)
+def get_item(item_id: int):
+    item = service.get_item(item_id)
+    if item is None:
+        raise HTTPException(status_code=404, detail="Item not found")
+    return item

backend/api/pdfreader.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from fastapi import APIRouter, UploadFile, File, HTTPException
+from backend.services.pdfreader_service import PDFService
+router = APIRouter(prefix="/pdfreader", tags=["items"])
+service = PDFService()
+@router.post("/upload")
+async def upload_pdf(file: UploadFile = File(...)):
+    if not file.filename.endswith(".pdf"):
+        raise HTTPException(status_code=400, detail="Only PDF files are allowed")
+    result = await service.process_uploaded_pdf(file)
+    return result

backend/api/textreader.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from fastapi import APIRouter, HTTPException
+from backend.services.TextReaderQuestionGenerator import TextReaderQuestionGenerator
+from pydantic import BaseModel
+router = APIRouter(prefix="/txt", tags=["items"])
+service = TextReaderQuestionGenerator()
+# Define the request model
+class TextRequest(BaseModel):
+    txt: str
+@router.post("/read_text")
+async def read_text(request: TextRequest):
+    if not request.txt:
+        raise HTTPException(status_code=400, detail="No text provided")
+    result = await service.textreader_question_generator(request.txt)
+    return result

backend/models/AIParamModel.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from dataclasses import dataclass
+@dataclass
+class AIParam:
+        max_length:int=64
+        num_return_sequences:int=10
+        do_sample:bool=True
+        top_k:int=50
+        top_p:float=0.95
+        temperature:float=0.8

backend/models/AIResponseModel.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from dataclasses import dataclass
+@dataclass
+class AIResult:
+        Chunks: list
+        TotalChunks: int
+        EstimatedTotalTimeSeconds: float
+        EstimatedMinutes: float
+@dataclass
+class AIResponseModel:
+        OriginalFileName:str
+        StoredFileName:str
+        SavedTo:str
+        AIResult: AIResult
+        ContentSize:int=0

backend/models/__init__.py ADDED Viewed

File without changes

backend/models/__pycache__/AIParamModel.cpython-312.pyc ADDED Viewed

Binary file (721 Bytes). View file

backend/models/__pycache__/AIResponseModel.cpython-312.pyc ADDED Viewed

Binary file (937 Bytes). View file

backend/models/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (160 Bytes). View file

backend/models/__pycache__/item.cpython-312.pyc ADDED Viewed

Binary file (469 Bytes). View file

backend/models/item.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from pydantic import BaseModel
+class Item(BaseModel):
+    id: int
+    name: str
+    description: str

backend/repositories/__init__.py ADDED Viewed

File without changes

backend/repositories/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (166 Bytes). View file

backend/repositories/__pycache__/item_repo.cpython-312.pyc ADDED Viewed

Binary file (1.28 kB). View file

backend/repositories/item_repo.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from typing import List
+from backend.models.item import Item
+# Simulate a database with an in-memory list
+_items_db = [
+    Item(id=1, name="Item 1", description="The first item"),
+    Item(id=2, name="Item 2", description="The second item"),
+]
+class ItemRepository:
+    def get_all(self) -> List[Item]:
+        return _items_db
+    def get_by_id(self, item_id: int) -> Item | None:
+        return next((item for item in _items_db if item.id == item_id), None)

backend/services/ChunkGenerator.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from .IChunkGenerator import IChunkGenerator
+import nltk
+from nltk.tokenize import sent_tokenize
+class ChunkGenerator(IChunkGenerator):
+        def chunk_text(self, text: str,max_words: int=100) -> list:
+            sentences = sent_tokenize(text)
+            chunks, chunk = [], []
+            word_count = 0
+            for sentence in sentences:
+                word_count += len(sentence.split())
+                chunk.append(sentence)
+                if word_count >= max_words:
+                    chunks.append(" ".join(chunk))
+                    chunk = []
+                    word_count = 0
+            if chunk:
+                chunks.append(" ".join(chunk))
+            return chunks

backend/services/DataReader.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from .IDataReader import IDataReader
+from PyPDF2 import PdfReader
+from docx import Document
+class DataReader(IDataReader):
+    def read_pdf(self, file_path: str) -> str:
+        """
+        Reads a PDF file and returns its text content.
+        :param file_path: Path to the PDF file.
+        :return: Text content of the PDF file.
+        """
+        try:
+            text = ""
+            with open(file_path, "rb") as f:
+                reader = PdfReader(f)
+                for page in reader.pages:
+                    page_text = page.extract_text()
+                    if page_text:
+                        text += page_text + "\n"
+            return text
+        except Exception as e:
+            print(f"Error reading PDF file: {e}")
+            return ""
+    def read_docx(self, file_path: str) -> str:
+        """
+        Reads a DOCX file and returns its text content.
+        :param file_path: Path to the DOCX file.
+        :return: Text content of the DOCX file.
+        """
+        try:
+            doc = Document(file_path)
+            text = "\n".join([para.text for para in doc.paragraphs])
+            return text
+        except Exception as e:
+            print(f"Error reading DOCX file: {e}")
+            return ""
+    def read_txt(self, file_path: str) -> str:
+        """
+        Reads a TXT file and returns its text content.
+        :param file_path: Path to the TXT file.
+        :return: Text content of the TXT file.
+        """
+        try:
+            with open(file_path, "r", encoding="utf-8") as f:
+                text = f.read()
+            return text
+        except Exception as e:
+            print(f"Error reading TXT file: {e}")
+            return ""

backend/services/IChunkGenerator.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from abc import ABC, abstractmethod
+class IChunkGenerator(ABC):
+    @abstractmethod
+    def chunk_text(self, text: str,words: int=100) -> list:
+        """Splits the text into smaller chunks."""
+        pass

backend/services/IDataReader.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from abc import ABC, abstractmethod
+class IDataReader(ABC):
+    @abstractmethod
+    def read_pdf(self, file_path: str) -> str:
+        pass
+    @abstractmethod
+    def read_docx(self, file_path: str) -> str:
+        pass
+    @abstractmethod
+    def read_txt(self, file_path: str) -> str:
+        pass

backend/services/IQuestionGenerator.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from abc import ABC, abstractmethod
+from backend.models.AIParamModel import AIParam
+class IQuestionGenerator(ABC):
+    @abstractmethod
+    def generate_questions_advance(self, text: str, aIParam:AIParam) -> list:
+        """Generates questions from the given text."""
+        pass
+    @abstractmethod
+    def generate_questions_simple(self, text: str,aIParam:AIParam) -> list:
+        """Generates questions from the given text."""
+        pass

backend/services/ISentenceCheck.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from abc import ABC, abstractmethod
+class ISentenceCheck(ABC):
+    @abstractmethod
+    def IsSentenceCorrect(self, sentence: str) -> bool:
+        pass

backend/services/PDFQuestionService.py ADDED Viewed

	@@ -0,0 +1,146 @@

+from backend.services.DataReader import DataReader
+from backend.services.ChunkGenerator import ChunkGenerator
+from backend.services.QuestionGenerator import QuestionGenerator
+from backend.models.AIParamModel import AIParam
+from backend.models.AIResponseModel import AIResult
+from pathlib import Path
+import time
+class PDFQuestionService:
+    def __init__(self):
+        self.reader = DataReader()
+        self.chunker = ChunkGenerator()
+        self.qgen = QuestionGenerator()
+    def read_file(self, filename: str) -> str:
+        ext = Path(filename).suffix.lower()
+        if ext == ".txt":
+            return self.reader.read_txt(filename)
+        elif ext == ".pdf":
+            return self.reader.read_pdf(filename)
+        elif ext == ".docx":
+            return self.reader.read_docx(filename)
+        else:
+            raise ValueError("Unsupported file format")
+    def generate_questions(self, filepath: str) -> dict:
+        ai_param = AIParam()
+        text = self.read_file(filepath)
+        if len(text) <= 100:
+            total_chunks = len(text)
+            sample_size = min(2, total_chunks)
+            sample_chunks = chunks[:sample_size]
+            start_time = time.time()
+            for chunk in sample_chunks:
+                self.qgen.generate_questions_advance(text, ai_param)
+            elapsed = time.time() - start_time
+            avg_time = elapsed / sample_size
+            est_total_time = avg_time * total_chunks
+            all_questions = []
+            for idx, chunk in enumerate(chunks):
+                questions = self.qgen.generate_questions_advance(chunk, ai_param)
+                all_questions.append({
+                    "chunk": idx + 1,
+                    "questions": questions
+                })
+            return {
+                "estimated_total_time_seconds": round(est_total_time, 2),
+                "estimated_minutes": round(est_total_time / 60, 2),
+                "total_chunks": total_chunks,
+                "chunks": all_questions
+            }
+        chunks = self.chunker.chunk_text(text, 100)
+        total_chunks = len(chunks)
+        sample_size = min(2, total_chunks)
+        sample_chunks = chunks[:sample_size]
+        start_time = time.time()
+        for chunk in sample_chunks:
+            self.qgen.generate_questions_advance(chunk, ai_param)
+        elapsed = time.time() - start_time
+        avg_time = elapsed / sample_size
+        est_total_time = avg_time * total_chunks
+        all_questions = []
+        for idx, chunk in enumerate(chunks):
+            questions = self.qgen.generate_questions_advance(chunk, ai_param)
+            all_questions.append({
+                "chunk": idx + 1,
+                "questions": questions
+            })
+        return {
+            "estimated_total_time_seconds": round(est_total_time, 2),
+            "estimated_minutes": round(est_total_time / 60, 2),
+            "total_chunks": total_chunks,
+            "chunks": all_questions
+        }
+    def react_generate_questions(self, filepath: str) -> AIResult:
+        ai_param = AIParam()
+        text = self.read_file(filepath)
+        if len(text) <= 100:
+            total_chunks = len(text)
+            sample_size = min(2, total_chunks)
+            sample_chunks = chunks[:sample_size]
+            start_time = time.time()
+            for chunk in sample_chunks:
+                self.qgen.generate_questions_advance(text, ai_param)
+            elapsed = time.time() - start_time
+            avg_time = elapsed / sample_size
+            est_total_time = avg_time * total_chunks
+            all_questions = []
+            for idx, chunk in enumerate(chunks):
+                questions = self.qgen.generate_questions_advance(chunk, ai_param)
+                all_questions.append({
+                    "questions": questions
+                })
+            return  AIResult(
+                    EstimatedTotalTimeSeconds=round(est_total_time, 2),
+                    EstimatedMinutes=round(est_total_time / 60, 2),
+                    TotalChunks=total_chunks,
+                    Chunks=all_questions
+                )
+        chunks = self.chunker.chunk_text(text, 100)
+        total_chunks = len(chunks)
+        sample_size = min(2, total_chunks)
+        sample_chunks = chunks[:sample_size]
+        start_time = time.time()
+        for chunk in sample_chunks:
+            self.qgen.generate_questions_advance(chunk, ai_param)
+        elapsed = time.time() - start_time
+        avg_time = elapsed / sample_size
+        est_total_time = avg_time * total_chunks
+        all_questions = []
+        for idx, chunk in enumerate(chunks):
+            questions = self.qgen.generate_questions_advance(chunk, ai_param)
+            if (questions !=[]):
+                all_questions.append({
+                    "questions": questions
+                    })
+        return AIResult(
+                    EstimatedTotalTimeSeconds=round(est_total_time, 2),
+                    EstimatedMinutes=round(est_total_time / 60, 2),
+                    TotalChunks=total_chunks,
+                    Chunks=all_questions
+                )

backend/services/QuestionGenerator.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+from .IQuestionGenerator import IQuestionGenerator
+from backend.services.SentenceCheck import SentenceCheck
+from backend.models.AIParamModel import AIParam
+import torch
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"[QuestionGenerator] Using device: {device}")
+# valhalla model with slow tokenizer
+tokenizer_qg_simple = AutoTokenizer.from_pretrained("valhalla/t5-small-qg-hl", use_fast=False)
+model_qg_simple = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-small-qg-hl")
+qg_simple = pipeline(
+    "text2text-generation",
+    model=model_qg_simple,
+    tokenizer=tokenizer_qg_simple,
+    device=0 if torch.cuda.is_available() else -1
+)
+# iarfmoose model with slow tokenizer
+tokenizer_qg_advanced = AutoTokenizer.from_pretrained("iarfmoose/t5-base-question-generator", use_fast=False)
+model_qg_advanced = AutoModelForSeq2SeqLM.from_pretrained("iarfmoose/t5-base-question-generator")
+qg_advanced = pipeline(
+    "text2text-generation",
+    model=model_qg_advanced,
+    tokenizer=tokenizer_qg_advanced,
+    device=0 if torch.cuda.is_available() else -1
+)
+sentenceCheck = SentenceCheck()
+class QuestionGenerator(IQuestionGenerator):
+    def generate_questions_advance(self, text: str, aIParam: AIParam) -> list:
+        input_text = f"generate questions: {text}"
+        outputs = qg_advanced(
+            input_text,
+            max_length=aIParam.max_length,
+            num_return_sequences=aIParam.num_return_sequences,
+            do_sample=aIParam.do_sample,
+            top_k=aIParam.top_k,
+            top_p=aIParam.top_p,
+            temperature=aIParam.temperature
+        )
+        raw_sentences = [o["generated_text"] for o in outputs]
+        filtered = [s for s in raw_sentences if sentenceCheck.IsSentenceCorrect(s)]
+        return filtered
+    def generate_questions_simple(self, text: str, aIParam: AIParam) -> list:
+        input_text = f"generate questions: {text}"
+        outputs = qg_simple(
+            input_text,
+            max_length=aIParam.max_length,
+            num_return_sequences=aIParam.num_return_sequences,
+            do_sample=aIParam.do_sample,
+            top_k=aIParam.top_k,
+            top_p=aIParam.top_p,
+            temperature=aIParam.temperature
+        )
+        return [o["generated_text"] for o in outputs]

backend/services/SentenceCheck.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from .ISentenceCheck import ISentenceCheck
+from transformers import GPT2LMHeadModel, GPT2TokenizerFast
+import language_tool_python
+import torch
+import nltk
+nltk.download('punkt')
+class SentenceCheck(ISentenceCheck):
+    def __init__(self):
+        self.tool = language_tool_python.LanguageTool('en-US')
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"[SentenceCheck] Using device: {self.device}")
+        self.model = GPT2LMHeadModel.from_pretrained("gpt2").to(self.device)
+        self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
+    def is_grammatically_correct(self, text):
+        matches = self.tool.check(text)
+        return len(matches) == 0
+    def is_single_word_sentence(self, text):
+        return "nosentence" if len(text.split()) <= 1 else text
+    def looks_meaningful(self, text):
+        words = nltk.word_tokenize(text)
+        english_words = [word for word in words if word.isalpha()]
+        return len(english_words) / len(words) > 0.5
+    def get_perplexity(self, sentence):
+        inputs = self.tokenizer(sentence, return_tensors="pt").to(self.device)
+        with torch.no_grad():
+            outputs = self.model(**inputs, labels=inputs["input_ids"])
+            loss = outputs.loss
+        return torch.exp(loss).item()
+    def IsSentenceCorrect(self, question: str) -> bool:
+        if self.is_single_word_sentence(question) == "nosentence":
+            return False
+        if not self.looks_meaningful(question):
+            return False
+        if not self.is_grammatically_correct(question):
+            return False
+        if self.get_perplexity(question) > 80:
+            return False
+        if len(question.split()) < 4 or len(question.split()) > 20:
+            return False
+        if not question.strip().endswith("?"):
+            return False
+        if question.split()[0].lower() not in [
+            "what", "how", "why", "when", "where", "is", "are", "can",
+            "should", "could", "who", "does", "do"
+        ]:
+            return False
+        return True

backend/services/TextReaderQuestionGenerator.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from backend.services.DataReader import DataReader
+from backend.services.ChunkGenerator import ChunkGenerator
+from backend.services.QuestionGenerator import QuestionGenerator
+from backend.models.AIParamModel import AIParam
+from backend.models.AIResponseModel import AIResult
+class TextReaderQuestionGenerator:
+    def __init__(self):
+        self.reader = DataReader()
+        self.chunker = ChunkGenerator()
+        self.qgen = QuestionGenerator()
+    async def textreader_question_generator(self, text: str) -> dict:
+        ai_param = AIParam()
+        if len(text) <= 100:
+            print("Text length is less than 100 characters.")
+            all_questions = []
+            questions = self.qgen.generate_questions_advance(text, ai_param)
+            all_questions.append({
+                "questions": questions
+            })
+            return  all_questions
+        else:
+            print("Text length is less than 100 characters.")
+            all_questions = []
+            questions = self.qgen.generate_questions_advance(text, ai_param)
+            all_questions.append({
+                "questions": questions
+            })
+            return  all_questions

backend/services/__init__.py ADDED Viewed

File without changes

backend/services/__pycache__/ChunkGenerator.cpython-312.pyc ADDED Viewed

Binary file (1.21 kB). View file

backend/services/__pycache__/DataReader.cpython-312.pyc ADDED Viewed

Binary file (2.88 kB). View file

backend/services/__pycache__/IChunkGenerator.cpython-312.pyc ADDED Viewed

Binary file (702 Bytes). View file

backend/services/__pycache__/IDataReader.cpython-312.pyc ADDED Viewed

Binary file (941 Bytes). View file

backend/services/__pycache__/IQuestionGenerator.cpython-312.pyc ADDED Viewed

Binary file (1.02 kB). View file

backend/services/__pycache__/ISentenceCheck.cpython-312.pyc ADDED Viewed

Binary file (639 Bytes). View file

backend/services/__pycache__/PDFQuestionService.cpython-312.pyc ADDED Viewed

Binary file (5.73 kB). View file

backend/services/__pycache__/QuestionGenerator.cpython-312.pyc ADDED Viewed

Binary file (2.77 kB). View file

backend/services/__pycache__/SentenceCheck.cpython-312.pyc ADDED Viewed

Binary file (4.09 kB). View file

backend/services/__pycache__/TextReaderQuestionGenerator.cpython-312.pyc ADDED Viewed

Binary file (1.82 kB). View file

backend/services/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (162 Bytes). View file

backend/services/__pycache__/item_service.cpython-312.pyc ADDED Viewed

Binary file (1.16 kB). View file

backend/services/__pycache__/pdfreader_service.cpython-312.pyc ADDED Viewed

Binary file (2.73 kB). View file