Spaces:
Sleeping
Sleeping
from backend.services.DataReader import DataReader | |
from backend.services.ChunkGenerator import ChunkGenerator | |
from backend.services.QuestionGenerator import QuestionGenerator | |
from backend.models.AIParamModel import AIParam | |
from backend.models.AIResponseModel import AIResult | |
from pathlib import Path | |
import time | |
class PDFQuestionService: | |
def __init__(self): | |
self.reader = DataReader() | |
self.chunker = ChunkGenerator() | |
self.qgen = QuestionGenerator() | |
def read_file(self, filename: str) -> str: | |
ext = Path(filename).suffix.lower() | |
if ext == ".txt": | |
return self.reader.read_txt(filename) | |
elif ext == ".pdf": | |
return self.reader.read_pdf(filename) | |
elif ext == ".docx": | |
return self.reader.read_docx(filename) | |
else: | |
raise ValueError("Unsupported file format") | |
def generate_questions(self, filepath: str) -> dict: | |
ai_param = AIParam() | |
text = self.read_file(filepath) | |
if len(text) <= 100: | |
total_chunks = len(text) | |
sample_size = min(2, total_chunks) | |
sample_chunks = chunks[:sample_size] | |
start_time = time.time() | |
for chunk in sample_chunks: | |
self.qgen.generate_questions_advance(text, ai_param) | |
elapsed = time.time() - start_time | |
avg_time = elapsed / sample_size | |
est_total_time = avg_time * total_chunks | |
all_questions = [] | |
for idx, chunk in enumerate(chunks): | |
questions = self.qgen.generate_questions_advance(chunk, ai_param) | |
all_questions.append({ | |
"chunk": idx + 1, | |
"questions": questions | |
}) | |
return { | |
"estimated_total_time_seconds": round(est_total_time, 2), | |
"estimated_minutes": round(est_total_time / 60, 2), | |
"total_chunks": total_chunks, | |
"chunks": all_questions | |
} | |
chunks = self.chunker.chunk_text(text, 100) | |
total_chunks = len(chunks) | |
sample_size = min(2, total_chunks) | |
sample_chunks = chunks[:sample_size] | |
start_time = time.time() | |
for chunk in sample_chunks: | |
self.qgen.generate_questions_advance(chunk, ai_param) | |
elapsed = time.time() - start_time | |
avg_time = elapsed / sample_size | |
est_total_time = avg_time * total_chunks | |
all_questions = [] | |
for idx, chunk in enumerate(chunks): | |
questions = self.qgen.generate_questions_advance(chunk, ai_param) | |
all_questions.append({ | |
"chunk": idx + 1, | |
"questions": questions | |
}) | |
return { | |
"estimated_total_time_seconds": round(est_total_time, 2), | |
"estimated_minutes": round(est_total_time / 60, 2), | |
"total_chunks": total_chunks, | |
"chunks": all_questions | |
} | |
def react_generate_questions(self, filepath: str) -> AIResult: | |
ai_param = AIParam() | |
text = self.read_file(filepath) | |
if len(text) <= 100: | |
total_chunks = len(text) | |
sample_size = min(2, total_chunks) | |
sample_chunks = chunks[:sample_size] | |
start_time = time.time() | |
for chunk in sample_chunks: | |
self.qgen.generate_questions_advance(text, ai_param) | |
elapsed = time.time() - start_time | |
avg_time = elapsed / sample_size | |
est_total_time = avg_time * total_chunks | |
all_questions = [] | |
for idx, chunk in enumerate(chunks): | |
questions = self.qgen.generate_questions_advance(chunk, ai_param) | |
all_questions.append({ | |
"questions": questions | |
}) | |
return AIResult( | |
EstimatedTotalTimeSeconds=round(est_total_time, 2), | |
EstimatedMinutes=round(est_total_time / 60, 2), | |
TotalChunks=total_chunks, | |
Chunks=all_questions | |
) | |
chunks = self.chunker.chunk_text(text, 100) | |
total_chunks = len(chunks) | |
sample_size = min(2, total_chunks) | |
sample_chunks = chunks[:sample_size] | |
start_time = time.time() | |
for chunk in sample_chunks: | |
self.qgen.generate_questions_advance(chunk, ai_param) | |
elapsed = time.time() - start_time | |
avg_time = elapsed / sample_size | |
est_total_time = avg_time * total_chunks | |
all_questions = [] | |
for idx, chunk in enumerate(chunks): | |
questions = self.qgen.generate_questions_advance(chunk, ai_param) | |
if (questions !=[]): | |
all_questions.append({ | |
"questions": questions | |
}) | |
return AIResult( | |
EstimatedTotalTimeSeconds=round(est_total_time, 2), | |
EstimatedMinutes=round(est_total_time / 60, 2), | |
TotalChunks=total_chunks, | |
Chunks=all_questions | |
) | |