sachin commited on
Commit
224556e
·
1 Parent(s): a0887d0

-co-locate-translatio

Browse files
Files changed (3) hide show
  1. Dockerfile +5 -5
  2. requirements.txt +2 -1
  3. src/server/main.py +139 -67
Dockerfile CHANGED
@@ -3,19 +3,19 @@ WORKDIR /app
3
 
4
  RUN apt-get update && apt-get install -y \
5
  python3 \
6
- python3-pip \
7
  git \
8
  ffmpeg \
9
- sudo \
10
- wget libvips\
11
- build-essential \
12
- curl \
13
  && ln -s /usr/bin/python3 /usr/bin/python \
14
  && rm -rf /var/lib/apt/lists/*
15
 
16
  RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
17
  ENV PATH="/root/.cargo/bin:${PATH}"
18
 
 
 
 
19
  RUN pip install --upgrade pip setuptools setuptools-rust torch
20
  COPY requirements.txt .
21
  #RUN pip install --no-cache-dir torch==2.6.0 torchvision
 
3
 
4
  RUN apt-get update && apt-get install -y \
5
  python3 \
6
+ python3-pip python3-distutils python3-dev python3-venv\
7
  git \
8
  ffmpeg \
9
+ sudo wget curl software-properties-common build-essential gcc g++ \
 
 
 
10
  && ln -s /usr/bin/python3 /usr/bin/python \
11
  && rm -rf /var/lib/apt/lists/*
12
 
13
  RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
14
  ENV PATH="/root/.cargo/bin:${PATH}"
15
 
16
+ RUN export CC=/usr/bin/gcc
17
+ RUN export CXX=/usr/bin/g++
18
+
19
  RUN pip install --upgrade pip setuptools setuptools-rust torch
20
  COPY requirements.txt .
21
  #RUN pip install --no-cache-dir torch==2.6.0 torchvision
requirements.txt CHANGED
@@ -7,4 +7,5 @@ uvicorn
7
  fastapi
8
  pydantic_settings
9
  slowapi
10
- python-multipart
 
 
7
  fastapi
8
  pydantic_settings
9
  slowapi
10
+ python-multipart
11
+ IndicTransToolkit @ git+https://github.com/VarunGumma/IndicTransToolkit.git@399b3fec93d2ee85cb998cb7a4fb7a7d83afcbcf
src/server/main.py CHANGED
@@ -14,12 +14,14 @@ from pydantic import BaseModel, field_validator
14
  from pydantic_settings import BaseSettings
15
  from slowapi import Limiter
16
  from slowapi.util import get_remote_address
17
- import requests
 
 
18
 
19
  from logging_config import logger
20
  from tts_config import SPEED, ResponseFormat, config as tts_config
21
  from gemma_llm import LLMManager
22
- #from auth import get_api_key, settings as auth_settings
23
 
24
  # Supported language codes
25
  SUPPORTED_LANGUAGES = {
@@ -68,6 +70,73 @@ app.state.limiter = limiter
68
 
69
  llm_manager = LLMManager(settings.llm_model_name)
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  class ChatRequest(BaseModel):
72
  prompt: str
73
  src_lang: str = "kan_Knda" # Default to Kannada
@@ -93,48 +162,61 @@ class TranslationRequest(BaseModel):
93
  src_lang: str
94
  tgt_lang: str
95
 
96
- @field_validator("src_lang", "tgt_lang")
97
- def validate_language(cls, v):
98
- if v not in SUPPORTED_LANGUAGES:
99
- raise ValueError(f"Unsupported language code: {v}. Supported codes: {', '.join(SUPPORTED_LANGUAGES)}")
100
- return v
101
-
102
  class TranslationResponse(BaseModel):
103
  translations: List[str]
104
 
105
- async def call_external_translation(sentences: List[str], src_lang: str, tgt_lang: str) -> List[str]:
106
- external_url = "https://gaganyatri-dhwani-server.hf.space/v1/translate"
107
- payload = {
108
- "sentences": sentences,
109
- "src_lang": src_lang,
110
- "tgt_lang": tgt_lang
111
- }
112
- try:
113
- response = requests.post(
114
- external_url,
115
- json=payload,
116
- headers={
117
- "accept": "application/json",
118
- "Content-Type": "application/json"
119
- },
120
- timeout=60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  )
122
- response.raise_for_status()
123
- translations = response.json().get("translations", [])
124
- if not translations or len(translations) != len(sentences):
125
- logger.warning(f"Unexpected response format: {response.json()}")
126
- raise ValueError("Invalid response from translation service")
127
- return translations
128
- except requests.Timeout:
129
- logger.error("Translation request timed out")
130
- raise HTTPException(status_code=504, detail="Translation service timeout")
131
- except requests.RequestException as e:
132
- logger.error(f"Error during translation: {str(e)}")
133
- raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
134
- except ValueError as e:
135
- logger.error(f"Invalid response: {str(e)}")
136
- raise HTTPException(status_code=500, detail=str(e))
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  @app.get("/v1/health")
139
  async def health_check():
140
  return {"status": "healthy", "model": settings.llm_model_name}
@@ -144,9 +226,7 @@ async def home():
144
  return RedirectResponse(url="/docs")
145
 
146
  @app.post("/v1/unload_all_models")
147
- async def unload_all_models(
148
- #api_key: str = Depends(get_api_key)
149
- ):
150
  try:
151
  logger.info("Starting to unload all models...")
152
  llm_manager.unload()
@@ -157,9 +237,7 @@ async def unload_all_models(
157
  raise HTTPException(status_code=500, detail=f"Failed to unload models: {str(e)}")
158
 
159
  @app.post("/v1/load_all_models")
160
- async def load_all_models(
161
- #api_key: str = Depends(get_api_key)
162
- ):
163
  try:
164
  logger.info("Starting to load all models...")
165
  llm_manager.load()
@@ -170,34 +248,30 @@ async def load_all_models(
170
  raise HTTPException(status_code=500, detail=f"Failed to load models: {str(e)}")
171
 
172
  @app.post("/v1/translate", response_model=TranslationResponse)
173
- async def translate(request: TranslationRequest):
174
  logger.info(f"Received translation request: {request.dict()}")
175
  try:
176
- translations = await call_external_translation(
177
  sentences=request.sentences,
178
  src_lang=request.src_lang,
179
  tgt_lang=request.tgt_lang
180
  )
181
  logger.info(f"Translation successful: {translations}")
182
  return TranslationResponse(translations=translations)
183
- except HTTPException as e:
184
- raise e
185
  except Exception as e:
186
  logger.error(f"Unexpected error during translation: {str(e)}")
187
  raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
188
 
189
  @app.post("/v1/chat", response_model=ChatResponse)
190
  @limiter.limit(settings.chat_rate_limit)
191
- async def chat(request: Request, chat_request: ChatRequest,
192
- #api_key: str = Depends(get_api_key)
193
- ):
194
  if not chat_request.prompt:
195
  raise HTTPException(status_code=400, detail="Prompt cannot be empty")
196
  logger.info(f"Received prompt: {chat_request.prompt}, src_lang: {chat_request.src_lang}, tgt_lang: {chat_request.tgt_lang}")
197
  try:
198
  # Translate prompt to English if src_lang is not English
199
  if chat_request.src_lang != "eng_Latn":
200
- translated_prompt = await call_external_translation(
201
  sentences=[chat_request.prompt],
202
  src_lang=chat_request.src_lang,
203
  tgt_lang="eng_Latn"
@@ -214,7 +288,7 @@ async def chat(request: Request, chat_request: ChatRequest,
214
 
215
  # Translate response to target language if tgt_lang is not English
216
  if chat_request.tgt_lang != "eng_Latn":
217
- translated_response = await call_external_translation(
218
  sentences=[response],
219
  src_lang="eng_Latn",
220
  tgt_lang=chat_request.tgt_lang
@@ -236,16 +310,15 @@ async def visual_query(
236
  query: str = Body(...),
237
  src_lang: str = Query("kan_Knda", enum=list(SUPPORTED_LANGUAGES)),
238
  tgt_lang: str = Query("kan_Knda", enum=list(SUPPORTED_LANGUAGES)),
239
- #api_key: str = Depends(get_api_key)
240
  ):
241
  try:
242
  image = Image.open(file.file)
243
  if image.size == (0, 0):
244
  raise HTTPException(status_code=400, detail="Uploaded image is empty or invalid")
245
-
246
  # Translate query to English if src_lang is not English
247
  if src_lang != "eng_Latn":
248
- translated_query = await call_external_translation(
249
  sentences=[query],
250
  src_lang=src_lang,
251
  tgt_lang="eng_Latn"
@@ -262,7 +335,7 @@ async def visual_query(
262
 
263
  # Translate answer to target language if tgt_lang is not English
264
  if tgt_lang != "eng_Latn":
265
- translated_answer = await call_external_translation(
266
  sentences=[answer],
267
  src_lang="eng_Latn",
268
  tgt_lang=tgt_lang
@@ -286,13 +359,12 @@ async def chat_v2(
286
  image: UploadFile = File(default=None),
287
  src_lang: str = Form("kan_Knda"),
288
  tgt_lang: str = Form("kan_Knda"),
289
- #api_key: str = Depends(get_api_key)
290
  ):
291
  if not prompt:
292
  raise HTTPException(status_code=400, detail="Prompt cannot be empty")
293
  if src_lang not in SUPPORTED_LANGUAGES or tgt_lang not in SUPPORTED_LANGUAGES:
294
  raise HTTPException(status_code=400, detail=f"Unsupported language code. Supported codes: {', '.join(SUPPORTED_LANGUAGES)}")
295
-
296
  logger.info(f"Received prompt: {prompt}, src_lang: {src_lang}, tgt_lang: {tgt_lang}, Image provided: {image is not None}")
297
 
298
  try:
@@ -301,10 +373,10 @@ async def chat_v2(
301
  if not image_data:
302
  raise HTTPException(status_code=400, detail="Uploaded image is empty")
303
  img = Image.open(io.BytesIO(image_data))
304
-
305
  # Translate prompt to English if src_lang is not English
306
  if src_lang != "eng_Latn":
307
- translated_prompt = await call_external_translation(
308
  sentences=[prompt],
309
  src_lang=src_lang,
310
  tgt_lang="eng_Latn"
@@ -320,7 +392,7 @@ async def chat_v2(
320
 
321
  # Translate response to target language if tgt_lang is not English
322
  if tgt_lang != "eng_Latn":
323
- translated_response = await call_external_translation(
324
  sentences=[decoded],
325
  src_lang="eng_Latn",
326
  tgt_lang=tgt_lang
@@ -333,7 +405,7 @@ async def chat_v2(
333
  else:
334
  # Translate prompt to English if src_lang is not English
335
  if src_lang != "eng_Latn":
336
- translated_prompt = await call_external_translation(
337
  sentences=[prompt],
338
  src_lang=src_lang,
339
  tgt_lang="eng_Latn"
@@ -343,13 +415,13 @@ async def chat_v2(
343
  else:
344
  prompt_to_process = prompt
345
  logger.info("Prompt already in English, no translation needed")
346
-
347
  decoded = await llm_manager.generate(prompt_to_process, settings.max_tokens)
348
  logger.info(f"Generated English response: {decoded}")
349
-
350
  # Translate response to target language if tgt_lang is not English
351
  if tgt_lang != "eng_Latn":
352
- translated_response = await call_external_translation(
353
  sentences=[decoded],
354
  src_lang="eng_Latn",
355
  tgt_lang=tgt_lang
 
14
  from pydantic_settings import BaseSettings
15
  from slowapi import Limiter
16
  from slowapi.util import get_remote_address
17
+ import torch
18
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
19
+ from IndicTransToolkit import IndicProcessor
20
 
21
  from logging_config import logger
22
  from tts_config import SPEED, ResponseFormat, config as tts_config
23
  from gemma_llm import LLMManager
24
+ # from auth import get_api_key, settings as auth_settings
25
 
26
  # Supported language codes
27
  SUPPORTED_LANGUAGES = {
 
70
 
71
  llm_manager = LLMManager(settings.llm_model_name)
72
 
73
+ # Translation Manager and Model Manager
74
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
75
+
76
+ class TranslateManager:
77
+ def __init__(self, src_lang, tgt_lang, device_type=DEVICE, use_distilled=True):
78
+ self.device_type = device_type
79
+ self.tokenizer, self.model = self.initialize_model(src_lang, tgt_lang, use_distilled)
80
+
81
+ def initialize_model(self, src_lang, tgt_lang, use_distilled):
82
+ if src_lang.startswith("eng") and not tgt_lang.startswith("eng"):
83
+ model_name = "ai4bharat/indictrans2-en-indic-dist-200M" if use_distilled else "ai4bharat/indictrans2-en-indic-1B"
84
+ elif not src_lang.startswith("eng") and tgt_lang.startswith("eng"):
85
+ model_name = "ai4bharat/indictrans2-indic-en-dist-200M" if use_distilled else "ai4bharat/indictrans2-indic-en-1B"
86
+ elif not src_lang.startswith("eng") and not tgt_lang.startswith("eng"):
87
+ model_name = "ai4bharat/indictrans2-indic-indic-dist-320M" if use_distilled else "ai4bharat/indictrans2-indic-indic-1B"
88
+ else:
89
+ raise ValueError("Invalid language combination: English to English translation is not supported.")
90
+
91
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
92
+ model = AutoModelForSeq2SeqLM.from_pretrained(
93
+ model_name,
94
+ trust_remote_code=True,
95
+ torch_dtype=torch.float16,
96
+ attn_implementation="flash_attention_2"
97
+ ).to(self.device_type)
98
+ return tokenizer, model
99
+
100
+ class ModelManager:
101
+ def __init__(self, device_type=DEVICE, use_distilled=True, is_lazy_loading=False):
102
+ self.models: dict[str, TranslateManager] = {}
103
+ self.device_type = device_type
104
+ self.use_distilled = use_distilled
105
+ self.is_lazy_loading = is_lazy_loading
106
+ if not is_lazy_loading:
107
+ self.preload_models()
108
+
109
+ def preload_models(self):
110
+ self.models['eng_indic'] = TranslateManager('eng_Latn', 'kan_Knda', self.device_type, self.use_distilled)
111
+ self.models['indic_eng'] = TranslateManager('kan_Knda', 'eng_Latn', self.device_type, self.use_distilled)
112
+ self.models['indic_indic'] = TranslateManager('kan_Knda', 'hin_Deva', self.device_type, self.use_distilled)
113
+
114
+ def get_model(self, src_lang, tgt_lang) -> TranslateManager:
115
+ if src_lang.startswith("eng") and not tgt_lang.startswith("eng"):
116
+ key = 'eng_indic'
117
+ elif not src_lang.startswith("eng") and tgt_lang.startswith("eng"):
118
+ key = 'indic_eng'
119
+ elif not src_lang.startswith("eng") and not tgt_lang.startswith("eng"):
120
+ key = 'indic_indic'
121
+ else:
122
+ raise ValueError("Invalid language combination: English to English translation is not supported.")
123
+
124
+ if key not in self.models:
125
+ if self.is_lazy_loading:
126
+ if key == 'eng_indic':
127
+ self.models[key] = TranslateManager('eng_Latn', 'kan_Knda', self.device_type, self.use_distilled)
128
+ elif key == 'indic_eng':
129
+ self.models[key] = TranslateManager('kan_Knda', 'eng_Latn', self.device_type, self.use_distilled)
130
+ elif key == 'indic_indic':
131
+ self.models[key] = TranslateManager('kan_Knda', 'hin_Deva', self.device_type, self.use_distilled)
132
+ else:
133
+ raise ValueError(f"Model for {key} is not preloaded and lazy loading is disabled.")
134
+ return self.models[key]
135
+
136
+ ip = IndicProcessor(inference=True)
137
+ model_manager = ModelManager()
138
+
139
+ # Pydantic Models
140
  class ChatRequest(BaseModel):
141
  prompt: str
142
  src_lang: str = "kan_Knda" # Default to Kannada
 
162
  src_lang: str
163
  tgt_lang: str
164
 
 
 
 
 
 
 
165
  class TranslationResponse(BaseModel):
166
  translations: List[str]
167
 
168
+ # Dependency to get TranslateManager
169
+ def get_translate_manager(src_lang: str, tgt_lang: str) -> TranslateManager:
170
+ return model_manager.get_model(src_lang, tgt_lang)
171
+
172
+ # Internal Translation Endpoint
173
+ @app.post("/translate", response_model=TranslationResponse)
174
+ async def translate(request: TranslationRequest, translate_manager: TranslateManager = Depends(get_translate_manager)):
175
+ input_sentences = request.sentences
176
+ src_lang = request.src_lang
177
+ tgt_lang = request.tgt_lang
178
+
179
+ if not input_sentences:
180
+ raise HTTPException(status_code=400, detail="Input sentences are required")
181
+
182
+ batch = ip.preprocess_batch(input_sentences, src_lang=src_lang, tgt_lang=tgt_lang)
183
+
184
+ inputs = translate_manager.tokenizer(
185
+ batch,
186
+ truncation=True,
187
+ padding="longest",
188
+ return_tensors="pt",
189
+ return_attention_mask=True,
190
+ ).to(translate_manager.device_type)
191
+
192
+ with torch.no_grad():
193
+ generated_tokens = translate_manager.model.generate(
194
+ **inputs,
195
+ use_cache=True,
196
+ min_length=0,
197
+ max_length=256,
198
+ num_beams=5,
199
+ num_return_sequences=1,
200
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
+ with translate_manager.tokenizer.as_target_tokenizer():
203
+ generated_tokens = translate_manager.tokenizer.batch_decode(
204
+ generated_tokens.detach().cpu().tolist(),
205
+ skip_special_tokens=True,
206
+ clean_up_tokenization_spaces=True,
207
+ )
208
+
209
+ translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang)
210
+ return TranslationResponse(translations=translations)
211
+
212
+ # Helper function to perform internal translation
213
+ async def perform_internal_translation(sentences: List[str], src_lang: str, tgt_lang: str) -> List[str]:
214
+ translate_manager = model_manager.get_model(src_lang, tgt_lang)
215
+ request = TranslationRequest(sentences=sentences, src_lang=src_lang, tgt_lang=tgt_lang)
216
+ response = await translate(request, translate_manager)
217
+ return response.translations
218
+
219
+ # API Endpoints
220
  @app.get("/v1/health")
221
  async def health_check():
222
  return {"status": "healthy", "model": settings.llm_model_name}
 
226
  return RedirectResponse(url="/docs")
227
 
228
  @app.post("/v1/unload_all_models")
229
+ async def unload_all_models():
 
 
230
  try:
231
  logger.info("Starting to unload all models...")
232
  llm_manager.unload()
 
237
  raise HTTPException(status_code=500, detail=f"Failed to unload models: {str(e)}")
238
 
239
  @app.post("/v1/load_all_models")
240
+ async def load_all_models():
 
 
241
  try:
242
  logger.info("Starting to load all models...")
243
  llm_manager.load()
 
248
  raise HTTPException(status_code=500, detail=f"Failed to load models: {str(e)}")
249
 
250
  @app.post("/v1/translate", response_model=TranslationResponse)
251
+ async def translate_endpoint(request: TranslationRequest):
252
  logger.info(f"Received translation request: {request.dict()}")
253
  try:
254
+ translations = await perform_internal_translation(
255
  sentences=request.sentences,
256
  src_lang=request.src_lang,
257
  tgt_lang=request.tgt_lang
258
  )
259
  logger.info(f"Translation successful: {translations}")
260
  return TranslationResponse(translations=translations)
 
 
261
  except Exception as e:
262
  logger.error(f"Unexpected error during translation: {str(e)}")
263
  raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
264
 
265
  @app.post("/v1/chat", response_model=ChatResponse)
266
  @limiter.limit(settings.chat_rate_limit)
267
+ async def chat(request: Request, chat_request: ChatRequest):
 
 
268
  if not chat_request.prompt:
269
  raise HTTPException(status_code=400, detail="Prompt cannot be empty")
270
  logger.info(f"Received prompt: {chat_request.prompt}, src_lang: {chat_request.src_lang}, tgt_lang: {chat_request.tgt_lang}")
271
  try:
272
  # Translate prompt to English if src_lang is not English
273
  if chat_request.src_lang != "eng_Latn":
274
+ translated_prompt = await perform_internal_translation(
275
  sentences=[chat_request.prompt],
276
  src_lang=chat_request.src_lang,
277
  tgt_lang="eng_Latn"
 
288
 
289
  # Translate response to target language if tgt_lang is not English
290
  if chat_request.tgt_lang != "eng_Latn":
291
+ translated_response = await perform_internal_translation(
292
  sentences=[response],
293
  src_lang="eng_Latn",
294
  tgt_lang=chat_request.tgt_lang
 
310
  query: str = Body(...),
311
  src_lang: str = Query("kan_Knda", enum=list(SUPPORTED_LANGUAGES)),
312
  tgt_lang: str = Query("kan_Knda", enum=list(SUPPORTED_LANGUAGES)),
 
313
  ):
314
  try:
315
  image = Image.open(file.file)
316
  if image.size == (0, 0):
317
  raise HTTPException(status_code=400, detail="Uploaded image is empty or invalid")
318
+
319
  # Translate query to English if src_lang is not English
320
  if src_lang != "eng_Latn":
321
+ translated_query = await perform_internal_translation(
322
  sentences=[query],
323
  src_lang=src_lang,
324
  tgt_lang="eng_Latn"
 
335
 
336
  # Translate answer to target language if tgt_lang is not English
337
  if tgt_lang != "eng_Latn":
338
+ translated_answer = await perform_internal_translation(
339
  sentences=[answer],
340
  src_lang="eng_Latn",
341
  tgt_lang=tgt_lang
 
359
  image: UploadFile = File(default=None),
360
  src_lang: str = Form("kan_Knda"),
361
  tgt_lang: str = Form("kan_Knda"),
 
362
  ):
363
  if not prompt:
364
  raise HTTPException(status_code=400, detail="Prompt cannot be empty")
365
  if src_lang not in SUPPORTED_LANGUAGES or tgt_lang not in SUPPORTED_LANGUAGES:
366
  raise HTTPException(status_code=400, detail=f"Unsupported language code. Supported codes: {', '.join(SUPPORTED_LANGUAGES)}")
367
+
368
  logger.info(f"Received prompt: {prompt}, src_lang: {src_lang}, tgt_lang: {tgt_lang}, Image provided: {image is not None}")
369
 
370
  try:
 
373
  if not image_data:
374
  raise HTTPException(status_code=400, detail="Uploaded image is empty")
375
  img = Image.open(io.BytesIO(image_data))
376
+
377
  # Translate prompt to English if src_lang is not English
378
  if src_lang != "eng_Latn":
379
+ translated_prompt = await perform_internal_translation(
380
  sentences=[prompt],
381
  src_lang=src_lang,
382
  tgt_lang="eng_Latn"
 
392
 
393
  # Translate response to target language if tgt_lang is not English
394
  if tgt_lang != "eng_Latn":
395
+ translated_response = await perform_internal_translation(
396
  sentences=[decoded],
397
  src_lang="eng_Latn",
398
  tgt_lang=tgt_lang
 
405
  else:
406
  # Translate prompt to English if src_lang is not English
407
  if src_lang != "eng_Latn":
408
+ translated_prompt = await perform_internal_translation(
409
  sentences=[prompt],
410
  src_lang=src_lang,
411
  tgt_lang="eng_Latn"
 
415
  else:
416
  prompt_to_process = prompt
417
  logger.info("Prompt already in English, no translation needed")
418
+
419
  decoded = await llm_manager.generate(prompt_to_process, settings.max_tokens)
420
  logger.info(f"Generated English response: {decoded}")
421
+
422
  # Translate response to target language if tgt_lang is not English
423
  if tgt_lang != "eng_Latn":
424
+ translated_response = await perform_internal_translation(
425
  sentences=[decoded],
426
  src_lang="eng_Latn",
427
  tgt_lang=tgt_lang