File size: 5,161 Bytes
a2ff264 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
from backend.services.DataReader import DataReader
from backend.services.ChunkGenerator import ChunkGenerator
from backend.services.QuestionGenerator import QuestionGenerator
from backend.models.AIParamModel import AIParam
from backend.models.AIResponseModel import AIResult
from pathlib import Path
import time
class PDFQuestionService:
def __init__(self):
self.reader = DataReader()
self.chunker = ChunkGenerator()
self.qgen = QuestionGenerator()
def read_file(self, filename: str) -> str:
ext = Path(filename).suffix.lower()
if ext == ".txt":
return self.reader.read_txt(filename)
elif ext == ".pdf":
return self.reader.read_pdf(filename)
elif ext == ".docx":
return self.reader.read_docx(filename)
else:
raise ValueError("Unsupported file format")
def generate_questions(self, filepath: str) -> dict:
ai_param = AIParam()
text = self.read_file(filepath)
if len(text) <= 100:
total_chunks = len(text)
sample_size = min(2, total_chunks)
sample_chunks = chunks[:sample_size]
start_time = time.time()
for chunk in sample_chunks:
self.qgen.generate_questions_advance(text, ai_param)
elapsed = time.time() - start_time
avg_time = elapsed / sample_size
est_total_time = avg_time * total_chunks
all_questions = []
for idx, chunk in enumerate(chunks):
questions = self.qgen.generate_questions_advance(chunk, ai_param)
all_questions.append({
"chunk": idx + 1,
"questions": questions
})
return {
"estimated_total_time_seconds": round(est_total_time, 2),
"estimated_minutes": round(est_total_time / 60, 2),
"total_chunks": total_chunks,
"chunks": all_questions
}
chunks = self.chunker.chunk_text(text, 100)
total_chunks = len(chunks)
sample_size = min(2, total_chunks)
sample_chunks = chunks[:sample_size]
start_time = time.time()
for chunk in sample_chunks:
self.qgen.generate_questions_advance(chunk, ai_param)
elapsed = time.time() - start_time
avg_time = elapsed / sample_size
est_total_time = avg_time * total_chunks
all_questions = []
for idx, chunk in enumerate(chunks):
questions = self.qgen.generate_questions_advance(chunk, ai_param)
all_questions.append({
"chunk": idx + 1,
"questions": questions
})
return {
"estimated_total_time_seconds": round(est_total_time, 2),
"estimated_minutes": round(est_total_time / 60, 2),
"total_chunks": total_chunks,
"chunks": all_questions
}
def react_generate_questions(self, filepath: str) -> AIResult:
ai_param = AIParam()
text = self.read_file(filepath)
if len(text) <= 100:
total_chunks = len(text)
sample_size = min(2, total_chunks)
sample_chunks = chunks[:sample_size]
start_time = time.time()
for chunk in sample_chunks:
self.qgen.generate_questions_advance(text, ai_param)
elapsed = time.time() - start_time
avg_time = elapsed / sample_size
est_total_time = avg_time * total_chunks
all_questions = []
for idx, chunk in enumerate(chunks):
questions = self.qgen.generate_questions_advance(chunk, ai_param)
all_questions.append({
"questions": questions
})
return AIResult(
EstimatedTotalTimeSeconds=round(est_total_time, 2),
EstimatedMinutes=round(est_total_time / 60, 2),
TotalChunks=total_chunks,
Chunks=all_questions
)
chunks = self.chunker.chunk_text(text, 100)
total_chunks = len(chunks)
sample_size = min(2, total_chunks)
sample_chunks = chunks[:sample_size]
start_time = time.time()
for chunk in sample_chunks:
self.qgen.generate_questions_advance(chunk, ai_param)
elapsed = time.time() - start_time
avg_time = elapsed / sample_size
est_total_time = avg_time * total_chunks
all_questions = []
for idx, chunk in enumerate(chunks):
questions = self.qgen.generate_questions_advance(chunk, ai_param)
if (questions !=[]):
all_questions.append({
"questions": questions
})
return AIResult(
EstimatedTotalTimeSeconds=round(est_total_time, 2),
EstimatedMinutes=round(est_total_time / 60, 2),
TotalChunks=total_chunks,
Chunks=all_questions
)
|