Serhiy Stetskovych commited on
Commit
d5c312e
·
1 Parent(s): 194e915

Don't use ONNX because ZeroGPU doesn't support it.

Browse files
Files changed (3) hide show
  1. app.py +2 -1
  2. requirements.txt +1 -2
  3. verbalizer.py +26 -94
app.py CHANGED
@@ -65,7 +65,8 @@ def verbalize(text):
65
  parts = split_to_parts(text)
66
  verbalized = ''
67
  for part in parts:
68
- verbalized += verbalizer.generate_text(part)
 
69
  return verbalized
70
 
71
  description = f'''
 
65
  parts = split_to_parts(text)
66
  verbalized = ''
67
  for part in parts:
68
+ if part.strip():
69
+ verbalized += verbalizer.generate_text(part) + ' '
70
  return verbalized
71
 
72
  description = f'''
requirements.txt CHANGED
@@ -13,5 +13,4 @@ git+https://github.com/patriotyk/ipa-uk.git
13
  git+https://github.com/patriotyk/styletts2-inference@105aed29fa1a7698d08d920986890e9bbd03447c
14
  spaces
15
  numpy<2
16
- huggingface_hub
17
- onnxruntime
 
13
  git+https://github.com/patriotyk/styletts2-inference@105aed29fa1a7698d08d920986890e9bbd03447c
14
  spaces
15
  numpy<2
16
+ accelerate>=0.26.0
 
verbalizer.py CHANGED
@@ -1,107 +1,39 @@
1
- import onnxruntime
2
- import numpy as np
3
- from transformers import AutoTokenizer
4
- from huggingface_hub import hf_hub_download
5
 
6
- verbalizer_model_name = "skypro1111/mbart-large-50-verbalization"
7
-
8
- def cache_model_from_hf(repo_id, model_dir="./"):
9
- """Download ONNX models from HuggingFace Hub."""
10
- files = ["onnx/encoder_model.onnx", "onnx/decoder_model.onnx", "onnx/decoder_model.onnx_data"]
11
-
12
- for file in files:
13
- hf_hub_download(
14
- repo_id=repo_id,
15
- filename=file,
16
- local_dir=model_dir,
17
- )
18
 
 
19
 
20
 
21
  class Verbalizer():
22
  def __init__(self, device):
23
- cache_model_from_hf(verbalizer_model_name)
24
-
25
- print("Loading tokenizer...")
26
- self.tokenizer = AutoTokenizer.from_pretrained(verbalizer_model_name)
27
- self.tokenizer.src_lang = "uk_UA"
28
- self.tokenizer.tgt_lang = "uk_UA"
29
 
30
- print("Creating ONNX sessions...")
31
- self.encoder_session = self.create_onnx_session("onnx/encoder_model.onnx", device=='cuda')
32
- self.decoder_session = self.create_onnx_session("onnx/decoder_model.onnx", device=='cuda')
33
-
34
-
35
- def create_onnx_session(self, model_path, use_gpu=True):
36
- """Create an ONNX inference session."""
37
- session_options = onnxruntime.SessionOptions()
38
- session_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
39
- session_options.enable_mem_pattern = True
40
- session_options.enable_mem_reuse = True
41
- session_options.intra_op_num_threads = 8
42
- #session_options.log_severity_level = 1
43
-
44
- cuda_provider_options = {
45
- 'device_id': 0,
46
- 'arena_extend_strategy': 'kSameAsRequested',
47
- 'gpu_mem_limit': 0, # 0 means no limit
48
- 'cudnn_conv_algo_search': 'DEFAULT',
49
- 'do_copy_in_default_stream': True,
50
- }
51
-
52
- if use_gpu and 'CUDAExecutionProvider' in onnxruntime.get_available_providers():
53
- providers = [('CUDAExecutionProvider', cuda_provider_options)]
54
- else:
55
- providers = ['CPUExecutionProvider']
56
-
57
- session = onnxruntime.InferenceSession(
58
- model_path,
59
- providers=providers,
60
- sess_options=session_options
61
  )
62
-
63
- return session
64
 
 
 
 
65
 
66
  def generate_text(self, text):
67
  """Generate text for a single input."""
68
  # Prepare input
69
- inputs = self.tokenizer(text, return_tensors="np", padding=True, truncation=True, max_length=512)
70
- input_ids = inputs["input_ids"].astype(np.int64)
71
- attention_mask = inputs["attention_mask"].astype(np.int64)
72
-
73
- # Run encoder
74
- encoder_outputs = self.encoder_session.run(
75
- output_names=["last_hidden_state"],
76
- input_feed={
77
- "input_ids": input_ids,
78
- "attention_mask": attention_mask,
79
- }
80
- )[0]
81
-
82
- # Initialize decoder input
83
- decoder_input_ids = np.array([[self.tokenizer.pad_token_id]], dtype=np.int64)
84
-
85
- # Generate sequence
86
- for _ in range(512):
87
- # Run decoder
88
- decoder_outputs = self.decoder_session.run(
89
- output_names=["logits"],
90
- input_feed={
91
- "input_ids": decoder_input_ids,
92
- "encoder_hidden_states": encoder_outputs,
93
- "encoder_attention_mask": attention_mask,
94
- }
95
- )[0]
96
-
97
- # Get next token
98
- next_token = decoder_outputs[:, -1:].argmax(axis=-1)
99
- decoder_input_ids = np.concatenate([decoder_input_ids, next_token], axis=-1)
100
-
101
- # Check if sequence is complete
102
- if self.tokenizer.eos_token_id in decoder_input_ids[0]:
103
- break
104
-
105
- # Decode sequence
106
- output_text = self.tokenizer.decode(decoder_input_ids[0], skip_special_tokens=True)
107
- return output_text
 
 
 
 
 
1
 
2
+ from transformers import MBartForConditionalGeneration, AutoTokenizer
 
 
 
 
 
 
 
 
 
 
 
3
 
4
+ verbalizer_model_name = "skypro1111/mbart-large-50-verbalization"
5
 
6
 
7
  class Verbalizer():
8
  def __init__(self, device):
9
+ self.device = device
 
 
 
 
 
10
 
11
+ self.model = MBartForConditionalGeneration.from_pretrained(verbalizer_model_name,
12
+ low_cpu_mem_usage=True,
13
+ device_map=device,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  )
15
+ self.model.eval()
 
16
 
17
+ self.tokenizer = AutoTokenizer.from_pretrained(verbalizer_model_name)
18
+ self.tokenizer.src_lang = "uk_XX"
19
+ self.tokenizer.tgt_lang = "uk_XX"
20
 
21
  def generate_text(self, text):
22
  """Generate text for a single input."""
23
  # Prepare input
24
+ input_text = "<verbalization>:" + text
25
+
26
+ encoded_input = self.tokenizer(
27
+ input_text,
28
+ return_tensors="pt",
29
+ padding=True,
30
+ truncation=True,
31
+ max_length=1024,
32
+ ).to(self.device)
33
+ output_ids = self.model.generate(
34
+ **encoded_input, max_length=1024, num_beams=5, early_stopping=True
35
+ )
36
+ normalized_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
37
+
38
+
39
+ return normalized_text.strip()