|
from backend.services.DataReader import DataReader |
|
from backend.services.ChunkGenerator import ChunkGenerator |
|
from backend.services.QuestionGenerator import QuestionGenerator |
|
from backend.models.AIParamModel import AIParam |
|
from backend.models.AIResponseModel import AIResult |
|
from pathlib import Path |
|
import time |
|
|
|
class PDFQuestionService: |
|
def __init__(self): |
|
self.reader = DataReader() |
|
self.chunker = ChunkGenerator() |
|
self.qgen = QuestionGenerator() |
|
|
|
def read_file(self, filename: str) -> str: |
|
ext = Path(filename).suffix.lower() |
|
if ext == ".txt": |
|
return self.reader.read_txt(filename) |
|
elif ext == ".pdf": |
|
return self.reader.read_pdf(filename) |
|
elif ext == ".docx": |
|
return self.reader.read_docx(filename) |
|
else: |
|
raise ValueError("Unsupported file format") |
|
|
|
def generate_questions(self, filepath: str) -> dict: |
|
ai_param = AIParam() |
|
text = self.read_file(filepath) |
|
|
|
if len(text) <= 100: |
|
|
|
total_chunks = len(text) |
|
|
|
sample_size = min(2, total_chunks) |
|
sample_chunks = chunks[:sample_size] |
|
|
|
start_time = time.time() |
|
for chunk in sample_chunks: |
|
self.qgen.generate_questions_advance(text, ai_param) |
|
elapsed = time.time() - start_time |
|
avg_time = elapsed / sample_size |
|
est_total_time = avg_time * total_chunks |
|
|
|
all_questions = [] |
|
for idx, chunk in enumerate(chunks): |
|
questions = self.qgen.generate_questions_advance(chunk, ai_param) |
|
all_questions.append({ |
|
"chunk": idx + 1, |
|
"questions": questions |
|
}) |
|
|
|
return { |
|
"estimated_total_time_seconds": round(est_total_time, 2), |
|
"estimated_minutes": round(est_total_time / 60, 2), |
|
"total_chunks": total_chunks, |
|
"chunks": all_questions |
|
} |
|
|
|
chunks = self.chunker.chunk_text(text, 100) |
|
total_chunks = len(chunks) |
|
|
|
sample_size = min(2, total_chunks) |
|
sample_chunks = chunks[:sample_size] |
|
|
|
start_time = time.time() |
|
for chunk in sample_chunks: |
|
self.qgen.generate_questions_advance(chunk, ai_param) |
|
elapsed = time.time() - start_time |
|
avg_time = elapsed / sample_size |
|
est_total_time = avg_time * total_chunks |
|
|
|
all_questions = [] |
|
for idx, chunk in enumerate(chunks): |
|
questions = self.qgen.generate_questions_advance(chunk, ai_param) |
|
all_questions.append({ |
|
"chunk": idx + 1, |
|
"questions": questions |
|
}) |
|
|
|
return { |
|
"estimated_total_time_seconds": round(est_total_time, 2), |
|
"estimated_minutes": round(est_total_time / 60, 2), |
|
"total_chunks": total_chunks, |
|
"chunks": all_questions |
|
} |
|
|
|
def react_generate_questions(self, filepath: str) -> AIResult: |
|
ai_param = AIParam() |
|
text = self.read_file(filepath) |
|
|
|
if len(text) <= 100: |
|
|
|
total_chunks = len(text) |
|
|
|
sample_size = min(2, total_chunks) |
|
sample_chunks = chunks[:sample_size] |
|
|
|
start_time = time.time() |
|
for chunk in sample_chunks: |
|
self.qgen.generate_questions_advance(text, ai_param) |
|
elapsed = time.time() - start_time |
|
avg_time = elapsed / sample_size |
|
est_total_time = avg_time * total_chunks |
|
|
|
all_questions = [] |
|
for idx, chunk in enumerate(chunks): |
|
questions = self.qgen.generate_questions_advance(chunk, ai_param) |
|
all_questions.append({ |
|
"questions": questions |
|
}) |
|
|
|
return AIResult( |
|
EstimatedTotalTimeSeconds=round(est_total_time, 2), |
|
EstimatedMinutes=round(est_total_time / 60, 2), |
|
TotalChunks=total_chunks, |
|
Chunks=all_questions |
|
) |
|
|
|
chunks = self.chunker.chunk_text(text, 100) |
|
total_chunks = len(chunks) |
|
|
|
sample_size = min(2, total_chunks) |
|
sample_chunks = chunks[:sample_size] |
|
|
|
start_time = time.time() |
|
for chunk in sample_chunks: |
|
self.qgen.generate_questions_advance(chunk, ai_param) |
|
elapsed = time.time() - start_time |
|
avg_time = elapsed / sample_size |
|
est_total_time = avg_time * total_chunks |
|
|
|
all_questions = [] |
|
for idx, chunk in enumerate(chunks): |
|
questions = self.qgen.generate_questions_advance(chunk, ai_param) |
|
if (questions !=[]): |
|
all_questions.append({ |
|
"questions": questions |
|
}) |
|
|
|
return AIResult( |
|
EstimatedTotalTimeSeconds=round(est_total_time, 2), |
|
EstimatedMinutes=round(est_total_time / 60, 2), |
|
TotalChunks=total_chunks, |
|
Chunks=all_questions |
|
) |
|
|
|
|