diff --git a/Space.yaml b/Space.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5c333792efc08086781d78631614940f49d96d96 --- /dev/null +++ b/Space.yaml @@ -0,0 +1,3 @@ +sdk: fastapi +app_file: main.py +python_version: "3.10" diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..fa66ca5fdf5a37ff46ec7ffaa690d36f1ca57447 --- /dev/null +++ b/app.py @@ -0,0 +1,26 @@ +from fastapi import FastAPI +from backend.api import items +from backend.api import pdfreader +from backend.api import textreader +from fastapi.middleware.cors import CORSMiddleware + +app = FastAPI(title="Multi-layered FastAPI Example") + +# Allow requests from your frontend origin +origins = [ + "http://localhost:3000", # React frontend + # Add more origins if needed +] + +app.add_middleware( + CORSMiddleware, + allow_origins=origins, # List of allowed origins + allow_credentials=True, + allow_methods=["*"], # Allow all HTTP methods (GET, POST, etc.) + allow_headers=["*"], # Allow all headers +) + + +app.include_router(items.router) +app.include_router(pdfreader.router) +app.include_router(textreader.router) \ No newline at end of file diff --git a/app1.py b/app1.py new file mode 100644 index 0000000000000000000000000000000000000000..4bc659e4cf0ea0394fb181c121157e5e8b727520 --- /dev/null +++ b/app1.py @@ -0,0 +1,7 @@ +import gradio as gr + +def greet(name): + return "Hello " + name + "!!" + +demo = gr.Interface(fn=greet, inputs="text", outputs="text") +demo.launch(ssr_mode=False) \ No newline at end of file diff --git a/backend/__pycache__/__init__.cpython-312.pyc b/backend/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e23120552b067f604b712368f822de25e90b461 Binary files /dev/null and b/backend/__pycache__/__init__.cpython-312.pyc differ diff --git a/backend/__pycache__/main.cpython-312.pyc b/backend/__pycache__/main.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e08ad04e5301e8203bf06aef2ad505519f8163e8 Binary files /dev/null and b/backend/__pycache__/main.cpython-312.pyc differ diff --git a/backend/api/__init__.py b/backend/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/api/__pycache__/__init__.cpython-312.pyc b/backend/api/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..46b37c7f4902eeb567fd6773697c13403ff195d9 Binary files /dev/null and b/backend/api/__pycache__/__init__.cpython-312.pyc differ diff --git a/backend/api/__pycache__/items.cpython-312.pyc b/backend/api/__pycache__/items.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..92e1c95214e03b19dcefa8a0f8f4ed0fc3f8beea Binary files /dev/null and b/backend/api/__pycache__/items.cpython-312.pyc differ diff --git a/backend/api/__pycache__/pdfreader.cpython-312.pyc b/backend/api/__pycache__/pdfreader.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b161d36ba2f6f8f537709dba4ae2f1f30eae1d75 Binary files /dev/null and b/backend/api/__pycache__/pdfreader.cpython-312.pyc differ diff --git a/backend/api/__pycache__/textreader.cpython-312.pyc b/backend/api/__pycache__/textreader.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9a6a4eff2c2d03afe6b290e558878bd451cb4d50 Binary files /dev/null and b/backend/api/__pycache__/textreader.cpython-312.pyc differ diff --git a/backend/api/items.py b/backend/api/items.py new file mode 100644 index 0000000000000000000000000000000000000000..d05f80476bd4d170689e28be834965cf78b18782 --- /dev/null +++ b/backend/api/items.py @@ -0,0 +1,18 @@ +from fastapi import APIRouter, HTTPException +from typing import List +from backend.models.item import Item +from backend.services.item_service import ItemService + +router = APIRouter(prefix="/items", tags=["items"]) +service = ItemService() + +@router.get("/", response_model=List[Item]) +def get_items(): + return service.list_items() + +@router.get("/{item_id}", response_model=Item) +def get_item(item_id: int): + item = service.get_item(item_id) + if item is None: + raise HTTPException(status_code=404, detail="Item not found") + return item \ No newline at end of file diff --git a/backend/api/pdfreader.py b/backend/api/pdfreader.py new file mode 100644 index 0000000000000000000000000000000000000000..b056ed44f6e4c11198e0ff42a22877054442e7a0 --- /dev/null +++ b/backend/api/pdfreader.py @@ -0,0 +1,13 @@ +from fastapi import APIRouter, UploadFile, File, HTTPException +from backend.services.pdfreader_service import PDFService + +router = APIRouter(prefix="/pdfreader", tags=["items"]) +service = PDFService() + +@router.post("/upload") +async def upload_pdf(file: UploadFile = File(...)): + if not file.filename.endswith(".pdf"): + raise HTTPException(status_code=400, detail="Only PDF files are allowed") + + result = await service.process_uploaded_pdf(file) + return result diff --git a/backend/api/textreader.py b/backend/api/textreader.py new file mode 100644 index 0000000000000000000000000000000000000000..6c4cd5c1a343e91ac86c6b8ed63c857eab12d263 --- /dev/null +++ b/backend/api/textreader.py @@ -0,0 +1,17 @@ +from fastapi import APIRouter, HTTPException +from backend.services.TextReaderQuestionGenerator import TextReaderQuestionGenerator +from pydantic import BaseModel + +router = APIRouter(prefix="/txt", tags=["items"]) +service = TextReaderQuestionGenerator() + +# Define the request model +class TextRequest(BaseModel): + txt: str + +@router.post("/read_text") +async def read_text(request: TextRequest): + if not request.txt: + raise HTTPException(status_code=400, detail="No text provided") + result = await service.textreader_question_generator(request.txt) + return result diff --git a/backend/models/AIParamModel.py b/backend/models/AIParamModel.py new file mode 100644 index 0000000000000000000000000000000000000000..ad80a775013fcb9b9badf25a4aabcd11b73e324f --- /dev/null +++ b/backend/models/AIParamModel.py @@ -0,0 +1,10 @@ +from dataclasses import dataclass + +@dataclass +class AIParam: + max_length:int=64 + num_return_sequences:int=10 + do_sample:bool=True + top_k:int=50 + top_p:float=0.95 + temperature:float=0.8 \ No newline at end of file diff --git a/backend/models/AIResponseModel.py b/backend/models/AIResponseModel.py new file mode 100644 index 0000000000000000000000000000000000000000..5ef203fbce3cd485462cdd0ba0b40b33e0b3e0bf --- /dev/null +++ b/backend/models/AIResponseModel.py @@ -0,0 +1,19 @@ +from dataclasses import dataclass + + +@dataclass +class AIResult: + Chunks: list + TotalChunks: int + EstimatedTotalTimeSeconds: float + EstimatedMinutes: float + +@dataclass +class AIResponseModel: + OriginalFileName:str + StoredFileName:str + SavedTo:str + AIResult: AIResult + ContentSize:int=0 + + diff --git a/backend/models/__init__.py b/backend/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/models/__pycache__/AIParamModel.cpython-312.pyc b/backend/models/__pycache__/AIParamModel.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b1c51de1bcac3092c4445c78db4e45d827e1574 Binary files /dev/null and b/backend/models/__pycache__/AIParamModel.cpython-312.pyc differ diff --git a/backend/models/__pycache__/AIResponseModel.cpython-312.pyc b/backend/models/__pycache__/AIResponseModel.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..22ec0214497e7354e5a2d53b643e4a3c9d3ce5e3 Binary files /dev/null and b/backend/models/__pycache__/AIResponseModel.cpython-312.pyc differ diff --git a/backend/models/__pycache__/__init__.cpython-312.pyc b/backend/models/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9870dc93d08a04bda1df5f8d71c787f461ede046 Binary files /dev/null and b/backend/models/__pycache__/__init__.cpython-312.pyc differ diff --git a/backend/models/__pycache__/item.cpython-312.pyc b/backend/models/__pycache__/item.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c3822c0a71e74a566ab6d83708d34a18489ebbf2 Binary files /dev/null and b/backend/models/__pycache__/item.cpython-312.pyc differ diff --git a/backend/models/item.py b/backend/models/item.py new file mode 100644 index 0000000000000000000000000000000000000000..2503b1b6f31303e4b9adb7ac968c43b144e4e0a8 --- /dev/null +++ b/backend/models/item.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel + +class Item(BaseModel): + id: int + name: str + description: str \ No newline at end of file diff --git a/backend/repositories/__init__.py b/backend/repositories/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/repositories/__pycache__/__init__.cpython-312.pyc b/backend/repositories/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b4c1cd3eaa7c1aca122f1d31ed42015e06ccefca Binary files /dev/null and b/backend/repositories/__pycache__/__init__.cpython-312.pyc differ diff --git a/backend/repositories/__pycache__/item_repo.cpython-312.pyc b/backend/repositories/__pycache__/item_repo.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7a4608428266d0d83e319f714b8ff1b89154c603 Binary files /dev/null and b/backend/repositories/__pycache__/item_repo.cpython-312.pyc differ diff --git a/backend/repositories/item_repo.py b/backend/repositories/item_repo.py new file mode 100644 index 0000000000000000000000000000000000000000..eefdccc93059099b9920c65664c51ccb3b06dc86 --- /dev/null +++ b/backend/repositories/item_repo.py @@ -0,0 +1,15 @@ +from typing import List +from backend.models.item import Item + +# Simulate a database with an in-memory list +_items_db = [ + Item(id=1, name="Item 1", description="The first item"), + Item(id=2, name="Item 2", description="The second item"), +] + +class ItemRepository: + def get_all(self) -> List[Item]: + return _items_db + + def get_by_id(self, item_id: int) -> Item | None: + return next((item for item in _items_db if item.id == item_id), None) diff --git a/backend/services/ChunkGenerator.py b/backend/services/ChunkGenerator.py new file mode 100644 index 0000000000000000000000000000000000000000..0d5d1f5e154f11cade4accd02e211e35fc0d993a --- /dev/null +++ b/backend/services/ChunkGenerator.py @@ -0,0 +1,21 @@ +from .IChunkGenerator import IChunkGenerator +import nltk +from nltk.tokenize import sent_tokenize +class ChunkGenerator(IChunkGenerator): + def chunk_text(self, text: str,max_words: int=100) -> list: + sentences = sent_tokenize(text) + chunks, chunk = [], [] + word_count = 0 + + for sentence in sentences: + word_count += len(sentence.split()) + chunk.append(sentence) + if word_count >= max_words: + chunks.append(" ".join(chunk)) + chunk = [] + word_count = 0 + + if chunk: + chunks.append(" ".join(chunk)) + + return chunks \ No newline at end of file diff --git a/backend/services/DataReader.py b/backend/services/DataReader.py new file mode 100644 index 0000000000000000000000000000000000000000..774e9064edfe8b928464d8171c1d456a8a143b04 --- /dev/null +++ b/backend/services/DataReader.py @@ -0,0 +1,54 @@ +from .IDataReader import IDataReader +from PyPDF2 import PdfReader +from docx import Document + +class DataReader(IDataReader): + def read_pdf(self, file_path: str) -> str: + """ + Reads a PDF file and returns its text content. + + :param file_path: Path to the PDF file. + :return: Text content of the PDF file. + """ + try: + text = "" + with open(file_path, "rb") as f: + reader = PdfReader(f) + for page in reader.pages: + page_text = page.extract_text() + if page_text: + text += page_text + "\n" + return text + except Exception as e: + print(f"Error reading PDF file: {e}") + return "" + + def read_docx(self, file_path: str) -> str: + """ + Reads a DOCX file and returns its text content. + + :param file_path: Path to the DOCX file. + :return: Text content of the DOCX file. + """ + try: + doc = Document(file_path) + text = "\n".join([para.text for para in doc.paragraphs]) + return text + except Exception as e: + print(f"Error reading DOCX file: {e}") + return "" + + def read_txt(self, file_path: str) -> str: + """ + Reads a TXT file and returns its text content. + + :param file_path: Path to the TXT file. + :return: Text content of the TXT file. + """ + try: + with open(file_path, "r", encoding="utf-8") as f: + text = f.read() + return text + except Exception as e: + print(f"Error reading TXT file: {e}") + return "" diff --git a/backend/services/IChunkGenerator.py b/backend/services/IChunkGenerator.py new file mode 100644 index 0000000000000000000000000000000000000000..a16259849b1f9074d24bf7311fd997eba85e4a5b --- /dev/null +++ b/backend/services/IChunkGenerator.py @@ -0,0 +1,7 @@ +from abc import ABC, abstractmethod + +class IChunkGenerator(ABC): + @abstractmethod + def chunk_text(self, text: str,words: int=100) -> list: + """Splits the text into smaller chunks.""" + pass \ No newline at end of file diff --git a/backend/services/IDataReader.py b/backend/services/IDataReader.py new file mode 100644 index 0000000000000000000000000000000000000000..7a638dee6784b0f622f3fe323bac146002a8e035 --- /dev/null +++ b/backend/services/IDataReader.py @@ -0,0 +1,12 @@ +from abc import ABC, abstractmethod + +class IDataReader(ABC): + @abstractmethod + def read_pdf(self, file_path: str) -> str: + pass + @abstractmethod + def read_docx(self, file_path: str) -> str: + pass + @abstractmethod + def read_txt(self, file_path: str) -> str: + pass \ No newline at end of file diff --git a/backend/services/IQuestionGenerator.py b/backend/services/IQuestionGenerator.py new file mode 100644 index 0000000000000000000000000000000000000000..de4b9047d87ce2e43e1f92dd1da28fea1d5701e8 --- /dev/null +++ b/backend/services/IQuestionGenerator.py @@ -0,0 +1,13 @@ +from abc import ABC, abstractmethod +from backend.models.AIParamModel import AIParam + +class IQuestionGenerator(ABC): + @abstractmethod + def generate_questions_advance(self, text: str, aIParam:AIParam) -> list: + """Generates questions from the given text.""" + pass + + @abstractmethod + def generate_questions_simple(self, text: str,aIParam:AIParam) -> list: + """Generates questions from the given text.""" + pass \ No newline at end of file diff --git a/backend/services/ISentenceCheck.py b/backend/services/ISentenceCheck.py new file mode 100644 index 0000000000000000000000000000000000000000..ed5edfdd98437f7773c2492041a56ed01e304b38 --- /dev/null +++ b/backend/services/ISentenceCheck.py @@ -0,0 +1,7 @@ +from abc import ABC, abstractmethod + +class ISentenceCheck(ABC): + + @abstractmethod + def IsSentenceCorrect(self, sentence: str) -> bool: + pass \ No newline at end of file diff --git a/backend/services/PDFQuestionService.py b/backend/services/PDFQuestionService.py new file mode 100644 index 0000000000000000000000000000000000000000..37028758f85f086dfd9ded743590235835cc0a45 --- /dev/null +++ b/backend/services/PDFQuestionService.py @@ -0,0 +1,146 @@ +from backend.services.DataReader import DataReader +from backend.services.ChunkGenerator import ChunkGenerator +from backend.services.QuestionGenerator import QuestionGenerator +from backend.models.AIParamModel import AIParam +from backend.models.AIResponseModel import AIResult +from pathlib import Path +import time + +class PDFQuestionService: + def __init__(self): + self.reader = DataReader() + self.chunker = ChunkGenerator() + self.qgen = QuestionGenerator() + + def read_file(self, filename: str) -> str: + ext = Path(filename).suffix.lower() + if ext == ".txt": + return self.reader.read_txt(filename) + elif ext == ".pdf": + return self.reader.read_pdf(filename) + elif ext == ".docx": + return self.reader.read_docx(filename) + else: + raise ValueError("Unsupported file format") + + def generate_questions(self, filepath: str) -> dict: + ai_param = AIParam() + text = self.read_file(filepath) + + if len(text) <= 100: + + total_chunks = len(text) + + sample_size = min(2, total_chunks) + sample_chunks = chunks[:sample_size] + + start_time = time.time() + for chunk in sample_chunks: + self.qgen.generate_questions_advance(text, ai_param) + elapsed = time.time() - start_time + avg_time = elapsed / sample_size + est_total_time = avg_time * total_chunks + + all_questions = [] + for idx, chunk in enumerate(chunks): + questions = self.qgen.generate_questions_advance(chunk, ai_param) + all_questions.append({ + "chunk": idx + 1, + "questions": questions + }) + + return { + "estimated_total_time_seconds": round(est_total_time, 2), + "estimated_minutes": round(est_total_time / 60, 2), + "total_chunks": total_chunks, + "chunks": all_questions + } + + chunks = self.chunker.chunk_text(text, 100) + total_chunks = len(chunks) + + sample_size = min(2, total_chunks) + sample_chunks = chunks[:sample_size] + + start_time = time.time() + for chunk in sample_chunks: + self.qgen.generate_questions_advance(chunk, ai_param) + elapsed = time.time() - start_time + avg_time = elapsed / sample_size + est_total_time = avg_time * total_chunks + + all_questions = [] + for idx, chunk in enumerate(chunks): + questions = self.qgen.generate_questions_advance(chunk, ai_param) + all_questions.append({ + "chunk": idx + 1, + "questions": questions + }) + + return { + "estimated_total_time_seconds": round(est_total_time, 2), + "estimated_minutes": round(est_total_time / 60, 2), + "total_chunks": total_chunks, + "chunks": all_questions + } + + def react_generate_questions(self, filepath: str) -> AIResult: + ai_param = AIParam() + text = self.read_file(filepath) + + if len(text) <= 100: + + total_chunks = len(text) + + sample_size = min(2, total_chunks) + sample_chunks = chunks[:sample_size] + + start_time = time.time() + for chunk in sample_chunks: + self.qgen.generate_questions_advance(text, ai_param) + elapsed = time.time() - start_time + avg_time = elapsed / sample_size + est_total_time = avg_time * total_chunks + + all_questions = [] + for idx, chunk in enumerate(chunks): + questions = self.qgen.generate_questions_advance(chunk, ai_param) + all_questions.append({ + "questions": questions + }) + + return AIResult( + EstimatedTotalTimeSeconds=round(est_total_time, 2), + EstimatedMinutes=round(est_total_time / 60, 2), + TotalChunks=total_chunks, + Chunks=all_questions + ) + + chunks = self.chunker.chunk_text(text, 100) + total_chunks = len(chunks) + + sample_size = min(2, total_chunks) + sample_chunks = chunks[:sample_size] + + start_time = time.time() + for chunk in sample_chunks: + self.qgen.generate_questions_advance(chunk, ai_param) + elapsed = time.time() - start_time + avg_time = elapsed / sample_size + est_total_time = avg_time * total_chunks + + all_questions = [] + for idx, chunk in enumerate(chunks): + questions = self.qgen.generate_questions_advance(chunk, ai_param) + if (questions !=[]): + all_questions.append({ + "questions": questions + }) + + return AIResult( + EstimatedTotalTimeSeconds=round(est_total_time, 2), + EstimatedMinutes=round(est_total_time / 60, 2), + TotalChunks=total_chunks, + Chunks=all_questions + ) + diff --git a/backend/services/QuestionGenerator.py b/backend/services/QuestionGenerator.py new file mode 100644 index 0000000000000000000000000000000000000000..7c429f84adfd2e9907a4663c57177f86be0bdc64 --- /dev/null +++ b/backend/services/QuestionGenerator.py @@ -0,0 +1,60 @@ +from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM +from .IQuestionGenerator import IQuestionGenerator +from backend.services.SentenceCheck import SentenceCheck +from backend.models.AIParamModel import AIParam +import torch + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +print(f"[QuestionGenerator] Using device: {device}") + +# valhalla model with slow tokenizer +tokenizer_qg_simple = AutoTokenizer.from_pretrained("valhalla/t5-small-qg-hl", use_fast=False) +model_qg_simple = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-small-qg-hl") + +qg_simple = pipeline( + "text2text-generation", + model=model_qg_simple, + tokenizer=tokenizer_qg_simple, + device=0 if torch.cuda.is_available() else -1 +) + +# iarfmoose model with slow tokenizer +tokenizer_qg_advanced = AutoTokenizer.from_pretrained("iarfmoose/t5-base-question-generator", use_fast=False) +model_qg_advanced = AutoModelForSeq2SeqLM.from_pretrained("iarfmoose/t5-base-question-generator") + +qg_advanced = pipeline( + "text2text-generation", + model=model_qg_advanced, + tokenizer=tokenizer_qg_advanced, + device=0 if torch.cuda.is_available() else -1 +) +sentenceCheck = SentenceCheck() + +class QuestionGenerator(IQuestionGenerator): + def generate_questions_advance(self, text: str, aIParam: AIParam) -> list: + input_text = f"generate questions: {text}" + outputs = qg_advanced( + input_text, + max_length=aIParam.max_length, + num_return_sequences=aIParam.num_return_sequences, + do_sample=aIParam.do_sample, + top_k=aIParam.top_k, + top_p=aIParam.top_p, + temperature=aIParam.temperature + ) + raw_sentences = [o["generated_text"] for o in outputs] + filtered = [s for s in raw_sentences if sentenceCheck.IsSentenceCorrect(s)] + return filtered + + def generate_questions_simple(self, text: str, aIParam: AIParam) -> list: + input_text = f"generate questions: {text}" + outputs = qg_simple( + input_text, + max_length=aIParam.max_length, + num_return_sequences=aIParam.num_return_sequences, + do_sample=aIParam.do_sample, + top_k=aIParam.top_k, + top_p=aIParam.top_p, + temperature=aIParam.temperature + ) + return [o["generated_text"] for o in outputs] diff --git a/backend/services/SentenceCheck.py b/backend/services/SentenceCheck.py new file mode 100644 index 0000000000000000000000000000000000000000..1ededc5e369cc6e1202af2076344124a9c88dc25 --- /dev/null +++ b/backend/services/SentenceCheck.py @@ -0,0 +1,54 @@ +from .ISentenceCheck import ISentenceCheck +from transformers import GPT2LMHeadModel, GPT2TokenizerFast +import language_tool_python +import torch +import nltk + +nltk.download('punkt') + +class SentenceCheck(ISentenceCheck): + def __init__(self): + self.tool = language_tool_python.LanguageTool('en-US') + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"[SentenceCheck] Using device: {self.device}") + self.model = GPT2LMHeadModel.from_pretrained("gpt2").to(self.device) + self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") + + def is_grammatically_correct(self, text): + matches = self.tool.check(text) + return len(matches) == 0 + + def is_single_word_sentence(self, text): + return "nosentence" if len(text.split()) <= 1 else text + + def looks_meaningful(self, text): + words = nltk.word_tokenize(text) + english_words = [word for word in words if word.isalpha()] + return len(english_words) / len(words) > 0.5 + + def get_perplexity(self, sentence): + inputs = self.tokenizer(sentence, return_tensors="pt").to(self.device) + with torch.no_grad(): + outputs = self.model(**inputs, labels=inputs["input_ids"]) + loss = outputs.loss + return torch.exp(loss).item() + + def IsSentenceCorrect(self, question: str) -> bool: + if self.is_single_word_sentence(question) == "nosentence": + return False + if not self.looks_meaningful(question): + return False + if not self.is_grammatically_correct(question): + return False + if self.get_perplexity(question) > 80: + return False + if len(question.split()) < 4 or len(question.split()) > 20: + return False + if not question.strip().endswith("?"): + return False + if question.split()[0].lower() not in [ + "what", "how", "why", "when", "where", "is", "are", "can", + "should", "could", "who", "does", "do" + ]: + return False + return True diff --git a/backend/services/TextReaderQuestionGenerator.py b/backend/services/TextReaderQuestionGenerator.py new file mode 100644 index 0000000000000000000000000000000000000000..909f9e4ce8adb6f5de9d674f19485b7b5a107c67 --- /dev/null +++ b/backend/services/TextReaderQuestionGenerator.py @@ -0,0 +1,34 @@ +from backend.services.DataReader import DataReader +from backend.services.ChunkGenerator import ChunkGenerator +from backend.services.QuestionGenerator import QuestionGenerator +from backend.models.AIParamModel import AIParam +from backend.models.AIResponseModel import AIResult + + +class TextReaderQuestionGenerator: + def __init__(self): + self.reader = DataReader() + self.chunker = ChunkGenerator() + self.qgen = QuestionGenerator() + + + async def textreader_question_generator(self, text: str) -> dict: + ai_param = AIParam() + if len(text) <= 100: + print("Text length is less than 100 characters.") + all_questions = [] + questions = self.qgen.generate_questions_advance(text, ai_param) + all_questions.append({ + "questions": questions + }) + + return all_questions + else: + print("Text length is less than 100 characters.") + all_questions = [] + questions = self.qgen.generate_questions_advance(text, ai_param) + all_questions.append({ + "questions": questions + }) + + return all_questions \ No newline at end of file diff --git a/backend/services/__init__.py b/backend/services/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/services/__pycache__/ChunkGenerator.cpython-312.pyc b/backend/services/__pycache__/ChunkGenerator.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f3dd7ce1514ccef4f1cb4ba99c841c032c9f6b7e Binary files /dev/null and b/backend/services/__pycache__/ChunkGenerator.cpython-312.pyc differ diff --git a/backend/services/__pycache__/DataReader.cpython-312.pyc b/backend/services/__pycache__/DataReader.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ea26672bbd5de28d2396cf8036a84b7e72803443 Binary files /dev/null and b/backend/services/__pycache__/DataReader.cpython-312.pyc differ diff --git a/backend/services/__pycache__/IChunkGenerator.cpython-312.pyc b/backend/services/__pycache__/IChunkGenerator.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b468c8e886f3fbc796927d0e1d16ea96f821cfce Binary files /dev/null and b/backend/services/__pycache__/IChunkGenerator.cpython-312.pyc differ diff --git a/backend/services/__pycache__/IDataReader.cpython-312.pyc b/backend/services/__pycache__/IDataReader.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a47eac11e3d3f5731931b2c0333ceef712cdd80 Binary files /dev/null and b/backend/services/__pycache__/IDataReader.cpython-312.pyc differ diff --git a/backend/services/__pycache__/IQuestionGenerator.cpython-312.pyc b/backend/services/__pycache__/IQuestionGenerator.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d85d0d6b7e0e660b8643eba46d2b22c5f0abdab9 Binary files /dev/null and b/backend/services/__pycache__/IQuestionGenerator.cpython-312.pyc differ diff --git a/backend/services/__pycache__/ISentenceCheck.cpython-312.pyc b/backend/services/__pycache__/ISentenceCheck.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..247c5a68938079f764d170194315a3648268d3c9 Binary files /dev/null and b/backend/services/__pycache__/ISentenceCheck.cpython-312.pyc differ diff --git a/backend/services/__pycache__/PDFQuestionService.cpython-312.pyc b/backend/services/__pycache__/PDFQuestionService.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d996f779fa055541c9dfe0a0acf62a18cfc71701 Binary files /dev/null and b/backend/services/__pycache__/PDFQuestionService.cpython-312.pyc differ diff --git a/backend/services/__pycache__/QuestionGenerator.cpython-312.pyc b/backend/services/__pycache__/QuestionGenerator.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8c6c5276a87c173753cc22c67170b2a02b28a907 Binary files /dev/null and b/backend/services/__pycache__/QuestionGenerator.cpython-312.pyc differ diff --git a/backend/services/__pycache__/SentenceCheck.cpython-312.pyc b/backend/services/__pycache__/SentenceCheck.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bf2eb73993a0633a0a9e8efd3cc5876866c142df Binary files /dev/null and b/backend/services/__pycache__/SentenceCheck.cpython-312.pyc differ diff --git a/backend/services/__pycache__/TextReaderQuestionGenerator.cpython-312.pyc b/backend/services/__pycache__/TextReaderQuestionGenerator.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d7f9dc8b7ff67e104875ea6a96f5719b50e1bef3 Binary files /dev/null and b/backend/services/__pycache__/TextReaderQuestionGenerator.cpython-312.pyc differ diff --git a/backend/services/__pycache__/__init__.cpython-312.pyc b/backend/services/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e223ba04c226d034e71126145fd20bcd3e26ad6f Binary files /dev/null and b/backend/services/__pycache__/__init__.cpython-312.pyc differ diff --git a/backend/services/__pycache__/item_service.cpython-312.pyc b/backend/services/__pycache__/item_service.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..51761dcd052375df83579fcbaec6b8c65c6a0fef Binary files /dev/null and b/backend/services/__pycache__/item_service.cpython-312.pyc differ diff --git a/backend/services/__pycache__/pdfreader_service.cpython-312.pyc b/backend/services/__pycache__/pdfreader_service.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5804eeb55071bf72d5a26b25d1e594aebf66c468 Binary files /dev/null and b/backend/services/__pycache__/pdfreader_service.cpython-312.pyc differ diff --git a/backend/services/item_service.py b/backend/services/item_service.py new file mode 100644 index 0000000000000000000000000000000000000000..3ff662d9975ab90e2f9ef8f31a64c718b11c299a --- /dev/null +++ b/backend/services/item_service.py @@ -0,0 +1,13 @@ +from typing import List +from backend.models.item import Item +from backend.repositories.item_repo import ItemRepository + +class ItemService: + def __init__(self): + self.repo = ItemRepository() + + def list_items(self) -> List[Item]: + return self.repo.get_all() + + def get_item(self, item_id: int) -> Item | None: + return self.repo.get_by_id(item_id) diff --git a/backend/services/pdfreader_service.py b/backend/services/pdfreader_service.py new file mode 100644 index 0000000000000000000000000000000000000000..f411dd4200971f97cbc8dbb17fd8b08793121899 --- /dev/null +++ b/backend/services/pdfreader_service.py @@ -0,0 +1,44 @@ +import os +import uuid +from fastapi import UploadFile, HTTPException +from typing import Dict, Any +from backend.services.PDFQuestionService import PDFQuestionService # AI logic class +from backend.models.AIResponseModel import AIResponseModel # AI response model +class PDFService: + def __init__(self): + self.upload_dir = "uploaded_files" + os.makedirs(self.upload_dir, exist_ok=True) + self.question_service = PDFQuestionService() + + def list_items(self): + # Your existing list logic + return [] + + async def process_uploaded_pdf(self, file: UploadFile) -> AIResponseModel: + ext = os.path.splitext(file.filename)[1].lower() + if ext not in [".pdf", ".txt", ".docx"]: + raise HTTPException(status_code=400, detail="Unsupported file format") + + contents = await file.read() + + # Generate unique filename with original extension + unique_filename = f"{uuid.uuid4()}{ext}" + file_path = os.path.join(self.upload_dir, unique_filename) + + with open(file_path, "wb") as f: + f.write(contents) + + # Call the AI processing logic + try: + result = self.question_service.react_generate_questions(file_path) + responsemodel = AIResponseModel( + OriginalFileName=file.filename, + StoredFileName=unique_filename, + ContentSize=len(contents), + SavedTo=file_path, + AIResult=result + ) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error during AI processing: {str(e)}") + + return responsemodel diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..e02a94d0ecb0666fdc2a4b1a830ec7b01d13b0ee --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +gradio +fastapi +uvicorn +PyPDF2 +python-docx +nltk +blobfile +tiktoken +transformers +language-tool-python +torch \ No newline at end of file