Upload TFBilma
Browse files- config.json +1 -0
- configuration_bilma.py +4 -1
- modeling_bilma.py +8 -4
- tf_model.h5 +1 -1
config.json
CHANGED
@@ -9,6 +9,7 @@
|
|
9 |
},
|
10 |
"hidden_dropout_prob": 0.1,
|
11 |
"hidden_size": 512,
|
|
|
12 |
"model_type": "bilma",
|
13 |
"num_attention_heads": 4,
|
14 |
"num_hidden_layers": 2,
|
|
|
9 |
},
|
10 |
"hidden_dropout_prob": 0.1,
|
11 |
"hidden_size": 512,
|
12 |
+
"include_top": true,
|
13 |
"model_type": "bilma",
|
14 |
"num_attention_heads": 4,
|
15 |
"num_hidden_layers": 2,
|
configuration_bilma.py
CHANGED
@@ -6,6 +6,7 @@ class BilmaConfig(PretrainedConfig):
|
|
6 |
def __init__(
|
7 |
self,
|
8 |
weights="spanish",
|
|
|
9 |
num_attention_heads: int = 4,
|
10 |
num_hidden_layers: int = 2,
|
11 |
seq_max_length: int = 280,
|
@@ -15,9 +16,10 @@ class BilmaConfig(PretrainedConfig):
|
|
15 |
**kwargs,
|
16 |
):
|
17 |
if weights not in ["spanish", ""]:
|
18 |
-
raise ValueError(f"`weights` must be 'spanish'
|
19 |
if weights == "spanish":
|
20 |
self.weights = weights
|
|
|
21 |
self.num_attention_heads = 4
|
22 |
self.num_hidden_layers = 2
|
23 |
self.seq_max_length = 280
|
@@ -28,6 +30,7 @@ class BilmaConfig(PretrainedConfig):
|
|
28 |
return
|
29 |
|
30 |
self.weights = weights
|
|
|
31 |
self.num_attention_heads = num_attention_heads
|
32 |
self.num_hidden_layers = num_hidden_layers
|
33 |
self.seq_max_length = seq_max_length
|
|
|
6 |
def __init__(
|
7 |
self,
|
8 |
weights="spanish",
|
9 |
+
include_top=True,
|
10 |
num_attention_heads: int = 4,
|
11 |
num_hidden_layers: int = 2,
|
12 |
seq_max_length: int = 280,
|
|
|
16 |
**kwargs,
|
17 |
):
|
18 |
if weights not in ["spanish", ""]:
|
19 |
+
raise ValueError(f"`weights` must be 'spanish', got {weights}.")
|
20 |
if weights == "spanish":
|
21 |
self.weights = weights
|
22 |
+
self.include_top = include_top
|
23 |
self.num_attention_heads = 4
|
24 |
self.num_hidden_layers = 2
|
25 |
self.seq_max_length = 280
|
|
|
30 |
return
|
31 |
|
32 |
self.weights = weights
|
33 |
+
self.include_top = include_top
|
34 |
self.num_attention_heads = num_attention_heads
|
35 |
self.num_hidden_layers = num_hidden_layers
|
36 |
self.seq_max_length = seq_max_length
|
modeling_bilma.py
CHANGED
@@ -9,7 +9,7 @@ from typing import Dict
|
|
9 |
import re
|
10 |
import unicodedata
|
11 |
|
12 |
-
from
|
13 |
|
14 |
# copied from preprocessing.py
|
15 |
BLANK = ' '
|
@@ -49,7 +49,8 @@ class TFBilma(TFPreTrainedModel):
|
|
49 |
num_heads=config.num_attention_heads,
|
50 |
ff_dim=config.hidden_size,
|
51 |
vocab_size=config.vocab_size,
|
52 |
-
rate=config.hidden_dropout_prob
|
|
|
53 |
|
54 |
@property
|
55 |
def dummy_inputs(self) -> Dict[str, tf.Tensor]:
|
@@ -486,14 +487,17 @@ def accuracy_function(ignore_id=0):
|
|
486 |
return tf.math.divide_no_nan(tf.reduce_sum(accuracies), tf.reduce_sum(mask))
|
487 |
return acc_mlm
|
488 |
|
489 |
-
def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1):
|
490 |
capt_inputs_ids = Input(shape=(max_length, ), name='input_ids')
|
491 |
capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
|
492 |
capt_inputs = capt_embedding(capt_inputs_ids)
|
493 |
|
494 |
enc = Encoder(num_enc, embed_dim, max_length, num_heads, ff_dim, rate=rate, name="bilma/encoder")
|
495 |
enc_output = enc(capt_inputs)
|
496 |
-
|
|
|
|
|
|
|
497 |
|
498 |
caption_model = Model(inputs=capt_inputs_ids, outputs=[fin_output], name="bilma_model")
|
499 |
return caption_model
|
|
|
9 |
import re
|
10 |
import unicodedata
|
11 |
|
12 |
+
from configuration_bilma import BilmaConfig
|
13 |
|
14 |
# copied from preprocessing.py
|
15 |
BLANK = ' '
|
|
|
49 |
num_heads=config.num_attention_heads,
|
50 |
ff_dim=config.hidden_size,
|
51 |
vocab_size=config.vocab_size,
|
52 |
+
rate=config.hidden_dropout_prob,
|
53 |
+
include_top = config.include_top)
|
54 |
|
55 |
@property
|
56 |
def dummy_inputs(self) -> Dict[str, tf.Tensor]:
|
|
|
487 |
return tf.math.divide_no_nan(tf.reduce_sum(accuracies), tf.reduce_sum(mask))
|
488 |
return acc_mlm
|
489 |
|
490 |
+
def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1, include_top=True):
|
491 |
capt_inputs_ids = Input(shape=(max_length, ), name='input_ids')
|
492 |
capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
|
493 |
capt_inputs = capt_embedding(capt_inputs_ids)
|
494 |
|
495 |
enc = Encoder(num_enc, embed_dim, max_length, num_heads, ff_dim, rate=rate, name="bilma/encoder")
|
496 |
enc_output = enc(capt_inputs)
|
497 |
+
if include_top:
|
498 |
+
fin_output = Dense(vocab_size, use_bias=True, name="bilma/dense_final")(enc_output)
|
499 |
+
else:
|
500 |
+
fin_output = enc_output
|
501 |
|
502 |
caption_model = Model(inputs=capt_inputs_ids, outputs=[fin_output], name="bilma_model")
|
503 |
return caption_model
|
tf_model.h5
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 156564220
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d31e357973be9bf86a3676237280b3ffe852ac994efd62d6eb67e06e36cd039
|
3 |
size 156564220
|