File size: 5,161 Bytes
a2ff264
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
from backend.services.DataReader import DataReader
from backend.services.ChunkGenerator import ChunkGenerator
from backend.services.QuestionGenerator import QuestionGenerator
from backend.models.AIParamModel import AIParam
from backend.models.AIResponseModel import AIResult
from pathlib import Path
import time

class PDFQuestionService:
    def __init__(self):
        self.reader = DataReader()
        self.chunker = ChunkGenerator()
        self.qgen = QuestionGenerator()

    def read_file(self, filename: str) -> str:
        ext = Path(filename).suffix.lower()
        if ext == ".txt":
            return self.reader.read_txt(filename)
        elif ext == ".pdf":
            return self.reader.read_pdf(filename)
        elif ext == ".docx":
            return self.reader.read_docx(filename)
        else:
            raise ValueError("Unsupported file format")

    def generate_questions(self, filepath: str) -> dict:
        ai_param = AIParam()
        text = self.read_file(filepath)

        if len(text) <= 100:
            
            total_chunks = len(text)

            sample_size = min(2, total_chunks)
            sample_chunks = chunks[:sample_size]

            start_time = time.time()
            for chunk in sample_chunks:
                self.qgen.generate_questions_advance(text, ai_param)
            elapsed = time.time() - start_time
            avg_time = elapsed / sample_size
            est_total_time = avg_time * total_chunks

            all_questions = []
            for idx, chunk in enumerate(chunks):
                questions = self.qgen.generate_questions_advance(chunk, ai_param)
                all_questions.append({
                    "chunk": idx + 1,
                    "questions": questions
                })

            return {
                "estimated_total_time_seconds": round(est_total_time, 2),
                "estimated_minutes": round(est_total_time / 60, 2),
                "total_chunks": total_chunks,
                "chunks": all_questions
            }

        chunks = self.chunker.chunk_text(text, 100)
        total_chunks = len(chunks)

        sample_size = min(2, total_chunks)
        sample_chunks = chunks[:sample_size]

        start_time = time.time()
        for chunk in sample_chunks:
            self.qgen.generate_questions_advance(chunk, ai_param)
        elapsed = time.time() - start_time
        avg_time = elapsed / sample_size
        est_total_time = avg_time * total_chunks

        all_questions = []
        for idx, chunk in enumerate(chunks):
            questions = self.qgen.generate_questions_advance(chunk, ai_param)
            all_questions.append({
                "chunk": idx + 1,
                "questions": questions
            })

        return {
            "estimated_total_time_seconds": round(est_total_time, 2),
            "estimated_minutes": round(est_total_time / 60, 2),
            "total_chunks": total_chunks,
            "chunks": all_questions
        }

    def react_generate_questions(self, filepath: str) -> AIResult:
        ai_param = AIParam()
        text = self.read_file(filepath)

        if len(text) <= 100:
            
            total_chunks = len(text)

            sample_size = min(2, total_chunks)
            sample_chunks = chunks[:sample_size]

            start_time = time.time()
            for chunk in sample_chunks:
                self.qgen.generate_questions_advance(text, ai_param)
            elapsed = time.time() - start_time
            avg_time = elapsed / sample_size
            est_total_time = avg_time * total_chunks

            all_questions = []
            for idx, chunk in enumerate(chunks):
                questions = self.qgen.generate_questions_advance(chunk, ai_param)
                all_questions.append({
                    "questions": questions
                })

            return  AIResult(
                    EstimatedTotalTimeSeconds=round(est_total_time, 2),
                    EstimatedMinutes=round(est_total_time / 60, 2),
                    TotalChunks=total_chunks,
                    Chunks=all_questions
                )

        chunks = self.chunker.chunk_text(text, 100)
        total_chunks = len(chunks)

        sample_size = min(2, total_chunks)
        sample_chunks = chunks[:sample_size]

        start_time = time.time()
        for chunk in sample_chunks:
            self.qgen.generate_questions_advance(chunk, ai_param)
        elapsed = time.time() - start_time
        avg_time = elapsed / sample_size
        est_total_time = avg_time * total_chunks

        all_questions = []
        for idx, chunk in enumerate(chunks):
            questions = self.qgen.generate_questions_advance(chunk, ai_param)
            if (questions !=[]):
                all_questions.append({
                    "questions": questions
                    })

        return AIResult(
                    EstimatedTotalTimeSeconds=round(est_total_time, 2),
                    EstimatedMinutes=round(est_total_time / 60, 2),
                    TotalChunks=total_chunks,
                    Chunks=all_questions
                )