Crystalcareai
/

Quiet-Star-Custom

Text Generation

Transformers

Safetensors

quiet

custom_code

Model card Files Files and versions Community

Crystalcareai commited on Apr 2, 2024

Commit

3e8d756

verified ·

1 Parent(s): d1ca91a

Update modeling_quiet.py

Browse files

Files changed (1) hide show

modeling_quiet.py +8 -8

modeling_quiet.py CHANGED Viewed

@@ -55,7 +55,7 @@ from transformers.utils import (
     logging,
     replace_return_docstrings,
 )
-from transformers import AutoConfig
 if is_flash_attn_2_available():
@@ -67,7 +67,7 @@ if is_flash_attn_2_available():
 logger = logging.get_logger(__name__)
-_CONFIG_FOR_DOC = "AutoConfig"
 from reportlab.pdfgen import canvas
 from reportlab.lib.pagesizes import letter
@@ -270,7 +270,7 @@ class QuietAttention(nn.Module):
     and "Generating Long Sequences with Sparse Transformers".
     """
-    def __init__(self, config: AutoConfig, layer_idx: Optional[int] = None):
         super().__init__()
         self.config = config
         self.layer_idx = layer_idx
@@ -818,7 +818,7 @@ QUIET_ATTENTION_CLASSES = {
 class QuietDecoderLayer(nn.Module):
-    def __init__(self, config: AutoConfig, layer_idx: int):
         super().__init__()
         self.hidden_size = config.hidden_size
@@ -896,7 +896,7 @@ QUIET_START_DOCSTRING = r"""
     Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
     and behavior.
     Parameters:
-        config ([`AutoConfig`]):
             Model configuration class with all the parameters of the model. Initializing with a config file does not
             load the weights associated with the model, only the configuration. Check out the
             [`~PreTrainedModel.from_pretrained`] method to load the model weights.
@@ -908,7 +908,7 @@ QUIET_START_DOCSTRING = r"""
     QUIET_START_DOCSTRING,
 )
 class QuietPreTrainedModel(PreTrainedModel):
-    config_class = AutoConfig
     base_model_prefix = "model"
     supports_gradient_checkpointing = True
     _no_split_modules = ["QuietDecoderLayer"]
@@ -995,10 +995,10 @@ class QuietModel(QuietPreTrainedModel):
     """
     Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`QuietDecoderLayer`]
     Args:
-        config: AutoConfig
     """
-    def __init__(self, config: AutoConfig):
         super().__init__(config)
         self.padding_idx = config.pad_token_id
         self.vocab_size = config.vocab_size

     logging,
     replace_return_docstrings,
 )
+from .configuration_quiet import QuietConfig
 if is_flash_attn_2_available():
 logger = logging.get_logger(__name__)
+_CONFIG_FOR_DOC = "QuietConfig"
 from reportlab.pdfgen import canvas
 from reportlab.lib.pagesizes import letter
     and "Generating Long Sequences with Sparse Transformers".
     """
+    def __init__(self, config: QuietConfig, layer_idx: Optional[int] = None):
         super().__init__()
         self.config = config
         self.layer_idx = layer_idx
 class QuietDecoderLayer(nn.Module):
+    def __init__(self, config: QuietConfig, layer_idx: int):
         super().__init__()
         self.hidden_size = config.hidden_size
     Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
     and behavior.
     Parameters:
+        config ([`QuietConfig`]):
             Model configuration class with all the parameters of the model. Initializing with a config file does not
             load the weights associated with the model, only the configuration. Check out the
             [`~PreTrainedModel.from_pretrained`] method to load the model weights.
     QUIET_START_DOCSTRING,
 )
 class QuietPreTrainedModel(PreTrainedModel):
+    config_class = QuietConfig
     base_model_prefix = "model"
     supports_gradient_checkpointing = True
     _no_split_modules = ["QuietDecoderLayer"]
     """
     Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`QuietDecoderLayer`]
     Args:
+        config: QuietConfig
     """
+    def __init__(self, config: QuietConfig):
         super().__init__(config)
         self.padding_idx = config.pad_token_id
         self.vocab_size = config.vocab_size