VishwaTechnologiesPvtLtd commited on
Commit
a2ff264
·
1 Parent(s): 8a80df2
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Space.yaml +3 -0
  2. __init__.py +0 -0
  3. app.py +26 -0
  4. app1.py +7 -0
  5. backend/__pycache__/__init__.cpython-312.pyc +0 -0
  6. backend/__pycache__/main.cpython-312.pyc +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/__pycache__/__init__.cpython-312.pyc +0 -0
  9. backend/api/__pycache__/items.cpython-312.pyc +0 -0
  10. backend/api/__pycache__/pdfreader.cpython-312.pyc +0 -0
  11. backend/api/__pycache__/textreader.cpython-312.pyc +0 -0
  12. backend/api/items.py +18 -0
  13. backend/api/pdfreader.py +13 -0
  14. backend/api/textreader.py +17 -0
  15. backend/models/AIParamModel.py +10 -0
  16. backend/models/AIResponseModel.py +19 -0
  17. backend/models/__init__.py +0 -0
  18. backend/models/__pycache__/AIParamModel.cpython-312.pyc +0 -0
  19. backend/models/__pycache__/AIResponseModel.cpython-312.pyc +0 -0
  20. backend/models/__pycache__/__init__.cpython-312.pyc +0 -0
  21. backend/models/__pycache__/item.cpython-312.pyc +0 -0
  22. backend/models/item.py +6 -0
  23. backend/repositories/__init__.py +0 -0
  24. backend/repositories/__pycache__/__init__.cpython-312.pyc +0 -0
  25. backend/repositories/__pycache__/item_repo.cpython-312.pyc +0 -0
  26. backend/repositories/item_repo.py +15 -0
  27. backend/services/ChunkGenerator.py +21 -0
  28. backend/services/DataReader.py +54 -0
  29. backend/services/IChunkGenerator.py +7 -0
  30. backend/services/IDataReader.py +12 -0
  31. backend/services/IQuestionGenerator.py +13 -0
  32. backend/services/ISentenceCheck.py +7 -0
  33. backend/services/PDFQuestionService.py +146 -0
  34. backend/services/QuestionGenerator.py +60 -0
  35. backend/services/SentenceCheck.py +54 -0
  36. backend/services/TextReaderQuestionGenerator.py +34 -0
  37. backend/services/__init__.py +0 -0
  38. backend/services/__pycache__/ChunkGenerator.cpython-312.pyc +0 -0
  39. backend/services/__pycache__/DataReader.cpython-312.pyc +0 -0
  40. backend/services/__pycache__/IChunkGenerator.cpython-312.pyc +0 -0
  41. backend/services/__pycache__/IDataReader.cpython-312.pyc +0 -0
  42. backend/services/__pycache__/IQuestionGenerator.cpython-312.pyc +0 -0
  43. backend/services/__pycache__/ISentenceCheck.cpython-312.pyc +0 -0
  44. backend/services/__pycache__/PDFQuestionService.cpython-312.pyc +0 -0
  45. backend/services/__pycache__/QuestionGenerator.cpython-312.pyc +0 -0
  46. backend/services/__pycache__/SentenceCheck.cpython-312.pyc +0 -0
  47. backend/services/__pycache__/TextReaderQuestionGenerator.cpython-312.pyc +0 -0
  48. backend/services/__pycache__/__init__.cpython-312.pyc +0 -0
  49. backend/services/__pycache__/item_service.cpython-312.pyc +0 -0
  50. backend/services/__pycache__/pdfreader_service.cpython-312.pyc +0 -0
Space.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ sdk: fastapi
2
+ app_file: main.py
3
+ python_version: "3.10"
__init__.py ADDED
File without changes
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from backend.api import items
3
+ from backend.api import pdfreader
4
+ from backend.api import textreader
5
+ from fastapi.middleware.cors import CORSMiddleware
6
+
7
+ app = FastAPI(title="Multi-layered FastAPI Example")
8
+
9
+ # Allow requests from your frontend origin
10
+ origins = [
11
+ "http://localhost:3000", # React frontend
12
+ # Add more origins if needed
13
+ ]
14
+
15
+ app.add_middleware(
16
+ CORSMiddleware,
17
+ allow_origins=origins, # List of allowed origins
18
+ allow_credentials=True,
19
+ allow_methods=["*"], # Allow all HTTP methods (GET, POST, etc.)
20
+ allow_headers=["*"], # Allow all headers
21
+ )
22
+
23
+
24
+ app.include_router(items.router)
25
+ app.include_router(pdfreader.router)
26
+ app.include_router(textreader.router)
app1.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
+
6
+ demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ demo.launch(ssr_mode=False)
backend/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (153 Bytes). View file
 
backend/__pycache__/main.cpython-312.pyc ADDED
Binary file (903 Bytes). View file
 
backend/api/__init__.py ADDED
File without changes
backend/api/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (157 Bytes). View file
 
backend/api/__pycache__/items.cpython-312.pyc ADDED
Binary file (1.14 kB). View file
 
backend/api/__pycache__/pdfreader.cpython-312.pyc ADDED
Binary file (1.02 kB). View file
 
backend/api/__pycache__/textreader.cpython-312.pyc ADDED
Binary file (1.21 kB). View file
 
backend/api/items.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from typing import List
3
+ from backend.models.item import Item
4
+ from backend.services.item_service import ItemService
5
+
6
+ router = APIRouter(prefix="/items", tags=["items"])
7
+ service = ItemService()
8
+
9
+ @router.get("/", response_model=List[Item])
10
+ def get_items():
11
+ return service.list_items()
12
+
13
+ @router.get("/{item_id}", response_model=Item)
14
+ def get_item(item_id: int):
15
+ item = service.get_item(item_id)
16
+ if item is None:
17
+ raise HTTPException(status_code=404, detail="Item not found")
18
+ return item
backend/api/pdfreader.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File, HTTPException
2
+ from backend.services.pdfreader_service import PDFService
3
+
4
+ router = APIRouter(prefix="/pdfreader", tags=["items"])
5
+ service = PDFService()
6
+
7
+ @router.post("/upload")
8
+ async def upload_pdf(file: UploadFile = File(...)):
9
+ if not file.filename.endswith(".pdf"):
10
+ raise HTTPException(status_code=400, detail="Only PDF files are allowed")
11
+
12
+ result = await service.process_uploaded_pdf(file)
13
+ return result
backend/api/textreader.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from backend.services.TextReaderQuestionGenerator import TextReaderQuestionGenerator
3
+ from pydantic import BaseModel
4
+
5
+ router = APIRouter(prefix="/txt", tags=["items"])
6
+ service = TextReaderQuestionGenerator()
7
+
8
+ # Define the request model
9
+ class TextRequest(BaseModel):
10
+ txt: str
11
+
12
+ @router.post("/read_text")
13
+ async def read_text(request: TextRequest):
14
+ if not request.txt:
15
+ raise HTTPException(status_code=400, detail="No text provided")
16
+ result = await service.textreader_question_generator(request.txt)
17
+ return result
backend/models/AIParamModel.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+ @dataclass
4
+ class AIParam:
5
+ max_length:int=64
6
+ num_return_sequences:int=10
7
+ do_sample:bool=True
8
+ top_k:int=50
9
+ top_p:float=0.95
10
+ temperature:float=0.8
backend/models/AIResponseModel.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class AIResult:
6
+ Chunks: list
7
+ TotalChunks: int
8
+ EstimatedTotalTimeSeconds: float
9
+ EstimatedMinutes: float
10
+
11
+ @dataclass
12
+ class AIResponseModel:
13
+ OriginalFileName:str
14
+ StoredFileName:str
15
+ SavedTo:str
16
+ AIResult: AIResult
17
+ ContentSize:int=0
18
+
19
+
backend/models/__init__.py ADDED
File without changes
backend/models/__pycache__/AIParamModel.cpython-312.pyc ADDED
Binary file (721 Bytes). View file
 
backend/models/__pycache__/AIResponseModel.cpython-312.pyc ADDED
Binary file (937 Bytes). View file
 
backend/models/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (160 Bytes). View file
 
backend/models/__pycache__/item.cpython-312.pyc ADDED
Binary file (469 Bytes). View file
 
backend/models/item.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class Item(BaseModel):
4
+ id: int
5
+ name: str
6
+ description: str
backend/repositories/__init__.py ADDED
File without changes
backend/repositories/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (166 Bytes). View file
 
backend/repositories/__pycache__/item_repo.cpython-312.pyc ADDED
Binary file (1.28 kB). View file
 
backend/repositories/item_repo.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from backend.models.item import Item
3
+
4
+ # Simulate a database with an in-memory list
5
+ _items_db = [
6
+ Item(id=1, name="Item 1", description="The first item"),
7
+ Item(id=2, name="Item 2", description="The second item"),
8
+ ]
9
+
10
+ class ItemRepository:
11
+ def get_all(self) -> List[Item]:
12
+ return _items_db
13
+
14
+ def get_by_id(self, item_id: int) -> Item | None:
15
+ return next((item for item in _items_db if item.id == item_id), None)
backend/services/ChunkGenerator.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .IChunkGenerator import IChunkGenerator
2
+ import nltk
3
+ from nltk.tokenize import sent_tokenize
4
+ class ChunkGenerator(IChunkGenerator):
5
+ def chunk_text(self, text: str,max_words: int=100) -> list:
6
+ sentences = sent_tokenize(text)
7
+ chunks, chunk = [], []
8
+ word_count = 0
9
+
10
+ for sentence in sentences:
11
+ word_count += len(sentence.split())
12
+ chunk.append(sentence)
13
+ if word_count >= max_words:
14
+ chunks.append(" ".join(chunk))
15
+ chunk = []
16
+ word_count = 0
17
+
18
+ if chunk:
19
+ chunks.append(" ".join(chunk))
20
+
21
+ return chunks
backend/services/DataReader.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .IDataReader import IDataReader
2
+ from PyPDF2 import PdfReader
3
+ from docx import Document
4
+
5
+ class DataReader(IDataReader):
6
+ def read_pdf(self, file_path: str) -> str:
7
+ """
8
+ Reads a PDF file and returns its text content.
9
+
10
+ :param file_path: Path to the PDF file.
11
+ :return: Text content of the PDF file.
12
+ """
13
+ try:
14
+ text = ""
15
+ with open(file_path, "rb") as f:
16
+ reader = PdfReader(f)
17
+ for page in reader.pages:
18
+ page_text = page.extract_text()
19
+ if page_text:
20
+ text += page_text + "\n"
21
+ return text
22
+ except Exception as e:
23
+ print(f"Error reading PDF file: {e}")
24
+ return ""
25
+
26
+ def read_docx(self, file_path: str) -> str:
27
+ """
28
+ Reads a DOCX file and returns its text content.
29
+
30
+ :param file_path: Path to the DOCX file.
31
+ :return: Text content of the DOCX file.
32
+ """
33
+ try:
34
+ doc = Document(file_path)
35
+ text = "\n".join([para.text for para in doc.paragraphs])
36
+ return text
37
+ except Exception as e:
38
+ print(f"Error reading DOCX file: {e}")
39
+ return ""
40
+
41
+ def read_txt(self, file_path: str) -> str:
42
+ """
43
+ Reads a TXT file and returns its text content.
44
+
45
+ :param file_path: Path to the TXT file.
46
+ :return: Text content of the TXT file.
47
+ """
48
+ try:
49
+ with open(file_path, "r", encoding="utf-8") as f:
50
+ text = f.read()
51
+ return text
52
+ except Exception as e:
53
+ print(f"Error reading TXT file: {e}")
54
+ return ""
backend/services/IChunkGenerator.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+
3
+ class IChunkGenerator(ABC):
4
+ @abstractmethod
5
+ def chunk_text(self, text: str,words: int=100) -> list:
6
+ """Splits the text into smaller chunks."""
7
+ pass
backend/services/IDataReader.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+
3
+ class IDataReader(ABC):
4
+ @abstractmethod
5
+ def read_pdf(self, file_path: str) -> str:
6
+ pass
7
+ @abstractmethod
8
+ def read_docx(self, file_path: str) -> str:
9
+ pass
10
+ @abstractmethod
11
+ def read_txt(self, file_path: str) -> str:
12
+ pass
backend/services/IQuestionGenerator.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from backend.models.AIParamModel import AIParam
3
+
4
+ class IQuestionGenerator(ABC):
5
+ @abstractmethod
6
+ def generate_questions_advance(self, text: str, aIParam:AIParam) -> list:
7
+ """Generates questions from the given text."""
8
+ pass
9
+
10
+ @abstractmethod
11
+ def generate_questions_simple(self, text: str,aIParam:AIParam) -> list:
12
+ """Generates questions from the given text."""
13
+ pass
backend/services/ISentenceCheck.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+
3
+ class ISentenceCheck(ABC):
4
+
5
+ @abstractmethod
6
+ def IsSentenceCorrect(self, sentence: str) -> bool:
7
+ pass
backend/services/PDFQuestionService.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from backend.services.DataReader import DataReader
2
+ from backend.services.ChunkGenerator import ChunkGenerator
3
+ from backend.services.QuestionGenerator import QuestionGenerator
4
+ from backend.models.AIParamModel import AIParam
5
+ from backend.models.AIResponseModel import AIResult
6
+ from pathlib import Path
7
+ import time
8
+
9
+ class PDFQuestionService:
10
+ def __init__(self):
11
+ self.reader = DataReader()
12
+ self.chunker = ChunkGenerator()
13
+ self.qgen = QuestionGenerator()
14
+
15
+ def read_file(self, filename: str) -> str:
16
+ ext = Path(filename).suffix.lower()
17
+ if ext == ".txt":
18
+ return self.reader.read_txt(filename)
19
+ elif ext == ".pdf":
20
+ return self.reader.read_pdf(filename)
21
+ elif ext == ".docx":
22
+ return self.reader.read_docx(filename)
23
+ else:
24
+ raise ValueError("Unsupported file format")
25
+
26
+ def generate_questions(self, filepath: str) -> dict:
27
+ ai_param = AIParam()
28
+ text = self.read_file(filepath)
29
+
30
+ if len(text) <= 100:
31
+
32
+ total_chunks = len(text)
33
+
34
+ sample_size = min(2, total_chunks)
35
+ sample_chunks = chunks[:sample_size]
36
+
37
+ start_time = time.time()
38
+ for chunk in sample_chunks:
39
+ self.qgen.generate_questions_advance(text, ai_param)
40
+ elapsed = time.time() - start_time
41
+ avg_time = elapsed / sample_size
42
+ est_total_time = avg_time * total_chunks
43
+
44
+ all_questions = []
45
+ for idx, chunk in enumerate(chunks):
46
+ questions = self.qgen.generate_questions_advance(chunk, ai_param)
47
+ all_questions.append({
48
+ "chunk": idx + 1,
49
+ "questions": questions
50
+ })
51
+
52
+ return {
53
+ "estimated_total_time_seconds": round(est_total_time, 2),
54
+ "estimated_minutes": round(est_total_time / 60, 2),
55
+ "total_chunks": total_chunks,
56
+ "chunks": all_questions
57
+ }
58
+
59
+ chunks = self.chunker.chunk_text(text, 100)
60
+ total_chunks = len(chunks)
61
+
62
+ sample_size = min(2, total_chunks)
63
+ sample_chunks = chunks[:sample_size]
64
+
65
+ start_time = time.time()
66
+ for chunk in sample_chunks:
67
+ self.qgen.generate_questions_advance(chunk, ai_param)
68
+ elapsed = time.time() - start_time
69
+ avg_time = elapsed / sample_size
70
+ est_total_time = avg_time * total_chunks
71
+
72
+ all_questions = []
73
+ for idx, chunk in enumerate(chunks):
74
+ questions = self.qgen.generate_questions_advance(chunk, ai_param)
75
+ all_questions.append({
76
+ "chunk": idx + 1,
77
+ "questions": questions
78
+ })
79
+
80
+ return {
81
+ "estimated_total_time_seconds": round(est_total_time, 2),
82
+ "estimated_minutes": round(est_total_time / 60, 2),
83
+ "total_chunks": total_chunks,
84
+ "chunks": all_questions
85
+ }
86
+
87
+ def react_generate_questions(self, filepath: str) -> AIResult:
88
+ ai_param = AIParam()
89
+ text = self.read_file(filepath)
90
+
91
+ if len(text) <= 100:
92
+
93
+ total_chunks = len(text)
94
+
95
+ sample_size = min(2, total_chunks)
96
+ sample_chunks = chunks[:sample_size]
97
+
98
+ start_time = time.time()
99
+ for chunk in sample_chunks:
100
+ self.qgen.generate_questions_advance(text, ai_param)
101
+ elapsed = time.time() - start_time
102
+ avg_time = elapsed / sample_size
103
+ est_total_time = avg_time * total_chunks
104
+
105
+ all_questions = []
106
+ for idx, chunk in enumerate(chunks):
107
+ questions = self.qgen.generate_questions_advance(chunk, ai_param)
108
+ all_questions.append({
109
+ "questions": questions
110
+ })
111
+
112
+ return AIResult(
113
+ EstimatedTotalTimeSeconds=round(est_total_time, 2),
114
+ EstimatedMinutes=round(est_total_time / 60, 2),
115
+ TotalChunks=total_chunks,
116
+ Chunks=all_questions
117
+ )
118
+
119
+ chunks = self.chunker.chunk_text(text, 100)
120
+ total_chunks = len(chunks)
121
+
122
+ sample_size = min(2, total_chunks)
123
+ sample_chunks = chunks[:sample_size]
124
+
125
+ start_time = time.time()
126
+ for chunk in sample_chunks:
127
+ self.qgen.generate_questions_advance(chunk, ai_param)
128
+ elapsed = time.time() - start_time
129
+ avg_time = elapsed / sample_size
130
+ est_total_time = avg_time * total_chunks
131
+
132
+ all_questions = []
133
+ for idx, chunk in enumerate(chunks):
134
+ questions = self.qgen.generate_questions_advance(chunk, ai_param)
135
+ if (questions !=[]):
136
+ all_questions.append({
137
+ "questions": questions
138
+ })
139
+
140
+ return AIResult(
141
+ EstimatedTotalTimeSeconds=round(est_total_time, 2),
142
+ EstimatedMinutes=round(est_total_time / 60, 2),
143
+ TotalChunks=total_chunks,
144
+ Chunks=all_questions
145
+ )
146
+
backend/services/QuestionGenerator.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
2
+ from .IQuestionGenerator import IQuestionGenerator
3
+ from backend.services.SentenceCheck import SentenceCheck
4
+ from backend.models.AIParamModel import AIParam
5
+ import torch
6
+
7
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
+ print(f"[QuestionGenerator] Using device: {device}")
9
+
10
+ # valhalla model with slow tokenizer
11
+ tokenizer_qg_simple = AutoTokenizer.from_pretrained("valhalla/t5-small-qg-hl", use_fast=False)
12
+ model_qg_simple = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-small-qg-hl")
13
+
14
+ qg_simple = pipeline(
15
+ "text2text-generation",
16
+ model=model_qg_simple,
17
+ tokenizer=tokenizer_qg_simple,
18
+ device=0 if torch.cuda.is_available() else -1
19
+ )
20
+
21
+ # iarfmoose model with slow tokenizer
22
+ tokenizer_qg_advanced = AutoTokenizer.from_pretrained("iarfmoose/t5-base-question-generator", use_fast=False)
23
+ model_qg_advanced = AutoModelForSeq2SeqLM.from_pretrained("iarfmoose/t5-base-question-generator")
24
+
25
+ qg_advanced = pipeline(
26
+ "text2text-generation",
27
+ model=model_qg_advanced,
28
+ tokenizer=tokenizer_qg_advanced,
29
+ device=0 if torch.cuda.is_available() else -1
30
+ )
31
+ sentenceCheck = SentenceCheck()
32
+
33
+ class QuestionGenerator(IQuestionGenerator):
34
+ def generate_questions_advance(self, text: str, aIParam: AIParam) -> list:
35
+ input_text = f"generate questions: {text}"
36
+ outputs = qg_advanced(
37
+ input_text,
38
+ max_length=aIParam.max_length,
39
+ num_return_sequences=aIParam.num_return_sequences,
40
+ do_sample=aIParam.do_sample,
41
+ top_k=aIParam.top_k,
42
+ top_p=aIParam.top_p,
43
+ temperature=aIParam.temperature
44
+ )
45
+ raw_sentences = [o["generated_text"] for o in outputs]
46
+ filtered = [s for s in raw_sentences if sentenceCheck.IsSentenceCorrect(s)]
47
+ return filtered
48
+
49
+ def generate_questions_simple(self, text: str, aIParam: AIParam) -> list:
50
+ input_text = f"generate questions: {text}"
51
+ outputs = qg_simple(
52
+ input_text,
53
+ max_length=aIParam.max_length,
54
+ num_return_sequences=aIParam.num_return_sequences,
55
+ do_sample=aIParam.do_sample,
56
+ top_k=aIParam.top_k,
57
+ top_p=aIParam.top_p,
58
+ temperature=aIParam.temperature
59
+ )
60
+ return [o["generated_text"] for o in outputs]
backend/services/SentenceCheck.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .ISentenceCheck import ISentenceCheck
2
+ from transformers import GPT2LMHeadModel, GPT2TokenizerFast
3
+ import language_tool_python
4
+ import torch
5
+ import nltk
6
+
7
+ nltk.download('punkt')
8
+
9
+ class SentenceCheck(ISentenceCheck):
10
+ def __init__(self):
11
+ self.tool = language_tool_python.LanguageTool('en-US')
12
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+ print(f"[SentenceCheck] Using device: {self.device}")
14
+ self.model = GPT2LMHeadModel.from_pretrained("gpt2").to(self.device)
15
+ self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
16
+
17
+ def is_grammatically_correct(self, text):
18
+ matches = self.tool.check(text)
19
+ return len(matches) == 0
20
+
21
+ def is_single_word_sentence(self, text):
22
+ return "nosentence" if len(text.split()) <= 1 else text
23
+
24
+ def looks_meaningful(self, text):
25
+ words = nltk.word_tokenize(text)
26
+ english_words = [word for word in words if word.isalpha()]
27
+ return len(english_words) / len(words) > 0.5
28
+
29
+ def get_perplexity(self, sentence):
30
+ inputs = self.tokenizer(sentence, return_tensors="pt").to(self.device)
31
+ with torch.no_grad():
32
+ outputs = self.model(**inputs, labels=inputs["input_ids"])
33
+ loss = outputs.loss
34
+ return torch.exp(loss).item()
35
+
36
+ def IsSentenceCorrect(self, question: str) -> bool:
37
+ if self.is_single_word_sentence(question) == "nosentence":
38
+ return False
39
+ if not self.looks_meaningful(question):
40
+ return False
41
+ if not self.is_grammatically_correct(question):
42
+ return False
43
+ if self.get_perplexity(question) > 80:
44
+ return False
45
+ if len(question.split()) < 4 or len(question.split()) > 20:
46
+ return False
47
+ if not question.strip().endswith("?"):
48
+ return False
49
+ if question.split()[0].lower() not in [
50
+ "what", "how", "why", "when", "where", "is", "are", "can",
51
+ "should", "could", "who", "does", "do"
52
+ ]:
53
+ return False
54
+ return True
backend/services/TextReaderQuestionGenerator.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from backend.services.DataReader import DataReader
2
+ from backend.services.ChunkGenerator import ChunkGenerator
3
+ from backend.services.QuestionGenerator import QuestionGenerator
4
+ from backend.models.AIParamModel import AIParam
5
+ from backend.models.AIResponseModel import AIResult
6
+
7
+
8
+ class TextReaderQuestionGenerator:
9
+ def __init__(self):
10
+ self.reader = DataReader()
11
+ self.chunker = ChunkGenerator()
12
+ self.qgen = QuestionGenerator()
13
+
14
+
15
+ async def textreader_question_generator(self, text: str) -> dict:
16
+ ai_param = AIParam()
17
+ if len(text) <= 100:
18
+ print("Text length is less than 100 characters.")
19
+ all_questions = []
20
+ questions = self.qgen.generate_questions_advance(text, ai_param)
21
+ all_questions.append({
22
+ "questions": questions
23
+ })
24
+
25
+ return all_questions
26
+ else:
27
+ print("Text length is less than 100 characters.")
28
+ all_questions = []
29
+ questions = self.qgen.generate_questions_advance(text, ai_param)
30
+ all_questions.append({
31
+ "questions": questions
32
+ })
33
+
34
+ return all_questions
backend/services/__init__.py ADDED
File without changes
backend/services/__pycache__/ChunkGenerator.cpython-312.pyc ADDED
Binary file (1.21 kB). View file
 
backend/services/__pycache__/DataReader.cpython-312.pyc ADDED
Binary file (2.88 kB). View file
 
backend/services/__pycache__/IChunkGenerator.cpython-312.pyc ADDED
Binary file (702 Bytes). View file
 
backend/services/__pycache__/IDataReader.cpython-312.pyc ADDED
Binary file (941 Bytes). View file
 
backend/services/__pycache__/IQuestionGenerator.cpython-312.pyc ADDED
Binary file (1.02 kB). View file
 
backend/services/__pycache__/ISentenceCheck.cpython-312.pyc ADDED
Binary file (639 Bytes). View file
 
backend/services/__pycache__/PDFQuestionService.cpython-312.pyc ADDED
Binary file (5.73 kB). View file
 
backend/services/__pycache__/QuestionGenerator.cpython-312.pyc ADDED
Binary file (2.77 kB). View file
 
backend/services/__pycache__/SentenceCheck.cpython-312.pyc ADDED
Binary file (4.09 kB). View file
 
backend/services/__pycache__/TextReaderQuestionGenerator.cpython-312.pyc ADDED
Binary file (1.82 kB). View file
 
backend/services/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (162 Bytes). View file
 
backend/services/__pycache__/item_service.cpython-312.pyc ADDED
Binary file (1.16 kB). View file
 
backend/services/__pycache__/pdfreader_service.cpython-312.pyc ADDED
Binary file (2.73 kB). View file