Upload Bilma
Browse files- README.md +1 -1
- config.json +4 -2
- modeling_bilma.py +13 -13
- tf_model.h5 +2 -2
README.md
CHANGED
@@ -11,7 +11,7 @@ probably proofread and complete it, then remove this comment. -->
|
|
11 |
|
12 |
# bilma
|
13 |
|
14 |
-
This model
|
15 |
It achieves the following results on the evaluation set:
|
16 |
|
17 |
|
|
|
11 |
|
12 |
# bilma
|
13 |
|
14 |
+
This model was trained from scratch on an unknown dataset.
|
15 |
It achieves the following results on the evaluation set:
|
16 |
|
17 |
|
config.json
CHANGED
@@ -1,14 +1,16 @@
|
|
1 |
{
|
|
|
2 |
"architectures": [
|
3 |
-
"
|
4 |
],
|
5 |
"auto_map": {
|
6 |
"AutoConfig": "configuration_bilma.BilmaConfig",
|
7 |
-
"
|
8 |
},
|
9 |
"drop_rate": 0.1,
|
10 |
"embedding_dim": 512,
|
11 |
"model_type": "bilma",
|
|
|
12 |
"num_attention_heads": 4,
|
13 |
"num_encoders": 2,
|
14 |
"seq_max_length": 280,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "w",
|
3 |
"architectures": [
|
4 |
+
"lma"
|
5 |
],
|
6 |
"auto_map": {
|
7 |
"AutoConfig": "configuration_bilma.BilmaConfig",
|
8 |
+
"TFAutoModelForMaskedLM": "modeling_bilma.Bilma"
|
9 |
},
|
10 |
"drop_rate": 0.1,
|
11 |
"embedding_dim": 512,
|
12 |
"model_type": "bilma",
|
13 |
+
"name": "xxx",
|
14 |
"num_attention_heads": 4,
|
15 |
"num_encoders": 2,
|
16 |
"seq_max_length": 280,
|
modeling_bilma.py
CHANGED
@@ -9,7 +9,7 @@ from typing import Dict
|
|
9 |
import re
|
10 |
import unicodedata
|
11 |
|
12 |
-
from
|
13 |
|
14 |
# copied from preprocessing.py
|
15 |
BLANK = ' '
|
@@ -33,6 +33,7 @@ SYMBOLS = set(";:,.@\\-\"/" + SYMBOLS_)
|
|
33 |
class Bilma(TFPreTrainedModel):
|
34 |
config_class = BilmaConfig
|
35 |
main_input_name = "capt_input"
|
|
|
36 |
|
37 |
def __init__(self, config):
|
38 |
self.seq_max_length = config.seq_max_length
|
@@ -49,9 +50,7 @@ class Bilma(TFPreTrainedModel):
|
|
49 |
ff_dim=config.embedding_dim,
|
50 |
vocab_size=config.vocab_size,
|
51 |
rate=config.drop_rate)
|
52 |
-
|
53 |
-
#self.call(np.zeros((1, config.seq_max_length)))
|
54 |
-
|
55 |
@property
|
56 |
def dummy_inputs(self) -> Dict[str, tf.Tensor]:
|
57 |
|
@@ -83,8 +82,9 @@ class Bilma(TFPreTrainedModel):
|
|
83 |
# -------------------------------
|
84 |
|
85 |
class EncoderBlock(Layer):
|
86 |
-
def __init__(self, patch_dim, num_heads, ff_dim, rate=0.1, **kwargs):
|
87 |
super(EncoderBlock, self).__init__(**kwargs)
|
|
|
88 |
self.p_d = patch_dim
|
89 |
self.n_h = num_heads
|
90 |
self.f_d = ff_dim
|
@@ -94,8 +94,8 @@ class EncoderBlock(Layer):
|
|
94 |
self.ffn = Sequential(
|
95 |
#[Conv1D(ff_dim, kernel_size=1, activation=tf.nn.gelu),
|
96 |
# Conv1D(patch_dim, kernel_size=1),]
|
97 |
-
[Dense(ff_dim, activation=tf.nn.gelu),
|
98 |
-
Dense(patch_dim)
|
99 |
)
|
100 |
#self.layernorm0 = LayerNormalization(epsilon=1e-6)
|
101 |
self.layernorm1 = LayerNormalization(epsilon=1e-6)
|
@@ -105,7 +105,7 @@ class EncoderBlock(Layer):
|
|
105 |
|
106 |
def get_config(self):
|
107 |
config = super(EncoderBlock, self).get_config()
|
108 |
-
config.update({"patch_dim":self.p_d, "num_heads":self.n_h, "ff_dim":self.f_d, "rate":self.rate})
|
109 |
return config
|
110 |
|
111 |
def call(self, inputs, training=False):
|
@@ -172,7 +172,7 @@ class Encoder(Layer):
|
|
172 |
self.n_h = num_heads
|
173 |
self.f_d = ff_dim
|
174 |
self.rate = rate
|
175 |
-
self._layers = [EncoderBlock(embed_dim, num_heads, ff_dim, rate=0.1) for
|
176 |
self.pe = positional_encoding(self.max_length, self.embed_dim)
|
177 |
|
178 |
def get_config(self):
|
@@ -485,14 +485,14 @@ def accuracy_function(ignore_id=0):
|
|
485 |
|
486 |
def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1):
|
487 |
capt_inputs_ids = Input(shape=(max_length, ), name='capt_input')
|
488 |
-
capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False)
|
489 |
capt_inputs = capt_embedding(capt_inputs_ids)
|
490 |
|
491 |
-
enc = Encoder(num_enc, embed_dim, max_length, num_heads, ff_dim, rate=rate)
|
492 |
enc_output = enc(capt_inputs)
|
493 |
-
fin_output = Dense(vocab_size, use_bias=True)(enc_output)
|
494 |
|
495 |
-
caption_model = Model(inputs=capt_inputs_ids, outputs=[fin_output])
|
496 |
return caption_model
|
497 |
|
498 |
def load(model_file):
|
|
|
9 |
import re
|
10 |
import unicodedata
|
11 |
|
12 |
+
from configuration_bilma import BilmaConfig
|
13 |
|
14 |
# copied from preprocessing.py
|
15 |
BLANK = ' '
|
|
|
33 |
class Bilma(TFPreTrainedModel):
|
34 |
config_class = BilmaConfig
|
35 |
main_input_name = "capt_input"
|
36 |
+
base_model_prefix = "bilma"
|
37 |
|
38 |
def __init__(self, config):
|
39 |
self.seq_max_length = config.seq_max_length
|
|
|
50 |
ff_dim=config.embedding_dim,
|
51 |
vocab_size=config.vocab_size,
|
52 |
rate=config.drop_rate)
|
53 |
+
|
|
|
|
|
54 |
@property
|
55 |
def dummy_inputs(self) -> Dict[str, tf.Tensor]:
|
56 |
|
|
|
82 |
# -------------------------------
|
83 |
|
84 |
class EncoderBlock(Layer):
|
85 |
+
def __init__(self, layer_num, patch_dim, num_heads, ff_dim, rate=0.1, **kwargs):
|
86 |
super(EncoderBlock, self).__init__(**kwargs)
|
87 |
+
self.ln = layer_num
|
88 |
self.p_d = patch_dim
|
89 |
self.n_h = num_heads
|
90 |
self.f_d = ff_dim
|
|
|
94 |
self.ffn = Sequential(
|
95 |
#[Conv1D(ff_dim, kernel_size=1, activation=tf.nn.gelu),
|
96 |
# Conv1D(patch_dim, kernel_size=1),]
|
97 |
+
[Dense(ff_dim, activation=tf.nn.gelu, name=f"bilma/dense1_{layer_num}"),
|
98 |
+
Dense(patch_dim, name=f"bilma/dense2_{layer_num}")]
|
99 |
)
|
100 |
#self.layernorm0 = LayerNormalization(epsilon=1e-6)
|
101 |
self.layernorm1 = LayerNormalization(epsilon=1e-6)
|
|
|
105 |
|
106 |
def get_config(self):
|
107 |
config = super(EncoderBlock, self).get_config()
|
108 |
+
config.update({"layer_num":self.ln, "patch_dim":self.p_d, "num_heads":self.n_h, "ff_dim":self.f_d, "rate":self.rate})
|
109 |
return config
|
110 |
|
111 |
def call(self, inputs, training=False):
|
|
|
172 |
self.n_h = num_heads
|
173 |
self.f_d = ff_dim
|
174 |
self.rate = rate
|
175 |
+
self._layers = [EncoderBlock(i, embed_dim, num_heads, ff_dim, rate=0.1) for i in range(n)]
|
176 |
self.pe = positional_encoding(self.max_length, self.embed_dim)
|
177 |
|
178 |
def get_config(self):
|
|
|
485 |
|
486 |
def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1):
|
487 |
capt_inputs_ids = Input(shape=(max_length, ), name='capt_input')
|
488 |
+
capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
|
489 |
capt_inputs = capt_embedding(capt_inputs_ids)
|
490 |
|
491 |
+
enc = Encoder(num_enc, embed_dim, max_length, num_heads, ff_dim, rate=rate, name="bilma/encoder")
|
492 |
enc_output = enc(capt_inputs)
|
493 |
+
fin_output = Dense(vocab_size, use_bias=True, name="bilma/dense_final")(enc_output)
|
494 |
|
495 |
+
caption_model = Model(inputs=capt_inputs_ids, outputs=[fin_output], name="bilma_model")
|
496 |
return caption_model
|
497 |
|
498 |
def load(model_file):
|
tf_model.h5
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8e658b722954addfd4fe9af9d4daaa2386fd98f7838d3c763bd6e7f03c1ed79
|
3 |
+
size 156562964
|