Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	Sync from GitHub repo
Browse filesThis Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there
- README_REPO.md +15 -3
- pyproject.toml +1 -1
- src/f5_tts/model/cfm.py +1 -1
- src/f5_tts/runtime/triton_trtllm/README.md +6 -5
- src/f5_tts/runtime/triton_trtllm/patch/__init__.py +137 -135
    	
        README_REPO.md
    CHANGED
    
    | @@ -110,6 +110,9 @@ docker container run --rm -it --gpus=all --mount 'type=volume,source=f5-tts,targ | |
| 110 |  | 
| 111 | 
             
            ## Inference
         | 
| 112 |  | 
|  | |
|  | |
|  | |
| 113 | 
             
            ### 1. Gradio App
         | 
| 114 |  | 
| 115 | 
             
            Currently supported features:
         | 
| @@ -176,10 +179,18 @@ f5-tts_infer-cli -c custom.toml | |
| 176 | 
             
            f5-tts_infer-cli -c src/f5_tts/infer/examples/multi/story.toml
         | 
| 177 | 
             
            ```
         | 
| 178 |  | 
| 179 | 
            -
            ### 3.  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 180 |  | 
| 181 | 
            -
             | 
| 182 | 
            -
            - The [Issues](https://github.com/SWivid/F5-TTS/issues?q=is%3Aissue) are very useful, please try to find the solution by properly searching the keywords of problem encountered. If no answer found, then feel free to open an issue.
         | 
| 183 |  | 
| 184 |  | 
| 185 | 
             
            ## Training
         | 
| @@ -231,6 +242,7 @@ Note: Some model components have linting exceptions for E722 to accommodate tens | |
| 231 | 
             
            - [mrfakename](https://x.com/realmrfakename) huggingface space demo ~
         | 
| 232 | 
             
            - [f5-tts-mlx](https://github.com/lucasnewman/f5-tts-mlx/tree/main) Implementation with MLX framework by [Lucas Newman](https://github.com/lucasnewman)
         | 
| 233 | 
             
            - [F5-TTS-ONNX](https://github.com/DakeQQ/F5-TTS-ONNX) ONNX Runtime version by [DakeQQ](https://github.com/DakeQQ)
         | 
|  | |
| 234 |  | 
| 235 | 
             
            ## Citation
         | 
| 236 | 
             
            If our work and codebase is useful for you, please cite as:
         | 
|  | |
| 110 |  | 
| 111 | 
             
            ## Inference
         | 
| 112 |  | 
| 113 | 
            +
            - In order to achieve desired performance, take a moment to read [detailed guidance](src/f5_tts/infer).
         | 
| 114 | 
            +
            - By properly searching the keywords of problem encountered, [issues](https://github.com/SWivid/F5-TTS/issues?q=is%3Aissue) are very helpful.
         | 
| 115 | 
            +
             | 
| 116 | 
             
            ### 1. Gradio App
         | 
| 117 |  | 
| 118 | 
             
            Currently supported features:
         | 
|  | |
| 179 | 
             
            f5-tts_infer-cli -c src/f5_tts/infer/examples/multi/story.toml
         | 
| 180 | 
             
            ```
         | 
| 181 |  | 
| 182 | 
            +
            ### 3. Runtime
         | 
| 183 | 
            +
             | 
| 184 | 
            +
            Deployment solution with Triton and TensorRT-LLM.
         | 
| 185 | 
            +
             | 
| 186 | 
            +
            #### Benchmark Results
         | 
| 187 | 
            +
            Decoding on a single L20 GPU, using 26 different prompt_audio & target_text pairs.
         | 
| 188 | 
            +
             | 
| 189 | 
            +
            | Model | Concurrency | Avg Latency    | RTF   | 
         | 
| 190 | 
            +
            |-------|-------------|----------------|-------|
         | 
| 191 | 
            +
            | F5-TTS Base (Vocos) | 1     | 253 ms | 0.0394|
         | 
| 192 |  | 
| 193 | 
            +
            See [detailed instructions](src\f5_tts\runtime\triton_trtllm\README.md) for more information.
         | 
|  | |
| 194 |  | 
| 195 |  | 
| 196 | 
             
            ## Training
         | 
|  | |
| 242 | 
             
            - [mrfakename](https://x.com/realmrfakename) huggingface space demo ~
         | 
| 243 | 
             
            - [f5-tts-mlx](https://github.com/lucasnewman/f5-tts-mlx/tree/main) Implementation with MLX framework by [Lucas Newman](https://github.com/lucasnewman)
         | 
| 244 | 
             
            - [F5-TTS-ONNX](https://github.com/DakeQQ/F5-TTS-ONNX) ONNX Runtime version by [DakeQQ](https://github.com/DakeQQ)
         | 
| 245 | 
            +
            - [Yuekai Zhang](https://github.com/yuekaizhang) Triton and TensorRT-LLM support ~
         | 
| 246 |  | 
| 247 | 
             
            ## Citation
         | 
| 248 | 
             
            If our work and codebase is useful for you, please cite as:
         | 
    	
        pyproject.toml
    CHANGED
    
    | @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" | |
| 4 |  | 
| 5 | 
             
            [project]
         | 
| 6 | 
             
            name = "f5-tts"
         | 
| 7 | 
            -
            version = "1.0 | 
| 8 | 
             
            description = "F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching"
         | 
| 9 | 
             
            readme = "README.md"
         | 
| 10 | 
             
            license = {text = "MIT License"}
         | 
|  | |
| 4 |  | 
| 5 | 
             
            [project]
         | 
| 6 | 
             
            name = "f5-tts"
         | 
| 7 | 
            +
            version = "1.1.0"
         | 
| 8 | 
             
            description = "F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching"
         | 
| 9 | 
             
            readme = "README.md"
         | 
| 10 | 
             
            license = {text = "MIT License"}
         | 
    	
        src/f5_tts/model/cfm.py
    CHANGED
    
    | @@ -270,7 +270,7 @@ class CFM(nn.Module): | |
| 270 | 
             
                    else:
         | 
| 271 | 
             
                        drop_text = False
         | 
| 272 |  | 
| 273 | 
            -
                    # if want  | 
| 274 | 
             
                    # adding mask will use more memory, thus also need to adjust batchsampler with scaled down threshold for long sequences
         | 
| 275 | 
             
                    pred = self.transformer(
         | 
| 276 | 
             
                        x=φ, cond=cond, text=text, time=time, drop_audio_cond=drop_audio_cond, drop_text=drop_text
         | 
|  | |
| 270 | 
             
                    else:
         | 
| 271 | 
             
                        drop_text = False
         | 
| 272 |  | 
| 273 | 
            +
                    # if want rigorously mask out padding, record in collate_fn in dataset.py, and pass in here
         | 
| 274 | 
             
                    # adding mask will use more memory, thus also need to adjust batchsampler with scaled down threshold for long sequences
         | 
| 275 | 
             
                    pred = self.transformer(
         | 
| 276 | 
             
                        x=φ, cond=cond, text=text, time=time, drop_audio_cond=drop_audio_cond, drop_text=drop_text
         | 
    	
        src/f5_tts/runtime/triton_trtllm/README.md
    CHANGED
    
    | @@ -1,4 +1,4 @@ | |
| 1 | 
            -
            ## Triton Inference Serving Best Practice for F5 | 
| 2 |  | 
| 3 | 
             
            ### Quick Start
         | 
| 4 | 
             
            Directly launch the service using docker compose.
         | 
| @@ -21,14 +21,15 @@ docker run -it --name "f5-server" --gpus all --net host -v $your_mount_dir --shm | |
| 21 |  | 
| 22 | 
             
            ### Export Models to TensorRT-LLM and Launch Server
         | 
| 23 | 
             
            Inside docker container, we would follow the official guide of TensorRT-LLM to build qwen and whisper TensorRT-LLM engines. See [here](https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/whisper).
         | 
| 24 | 
            -
             | 
| 25 | 
             
            ```sh
         | 
| 26 | 
             
            bash run.sh 0 4 F5TTS_Base
         | 
| 27 | 
             
            ```
         | 
|  | |
| 28 | 
             
            ### HTTP Client
         | 
| 29 | 
             
            ```sh
         | 
| 30 | 
             
            python3 client_http.py
         | 
| 31 | 
             
            ```
         | 
|  | |
| 32 | 
             
            ### Benchmark using Dataset
         | 
| 33 | 
             
            ```sh
         | 
| 34 | 
             
            num_task=2
         | 
| @@ -38,9 +39,9 @@ python3 client_grpc.py --num-tasks $num_task --huggingface-dataset yuekai/seed_t | |
| 38 | 
             
            ### Benchmark Results
         | 
| 39 | 
             
            Decoding on a single L20 GPU, using 26 different prompt_audio/target_text pairs.
         | 
| 40 |  | 
| 41 | 
            -
            | Model | Concurrency | Avg Latency | 
| 42 | 
            -
             | 
| 43 | 
             
            | F5-TTS Base (Vocos) | 1     | 253 ms | 0.0394|
         | 
| 44 |  | 
| 45 | 
             
            ### Credits
         | 
| 46 | 
            -
            1. [F5-TTS-TRTLLM](https://github.com/Bigfishering/f5-tts-trtllm)
         | 
|  | |
| 1 | 
            +
            ## Triton Inference Serving Best Practice for F5-TTS
         | 
| 2 |  | 
| 3 | 
             
            ### Quick Start
         | 
| 4 | 
             
            Directly launch the service using docker compose.
         | 
|  | |
| 21 |  | 
| 22 | 
             
            ### Export Models to TensorRT-LLM and Launch Server
         | 
| 23 | 
             
            Inside docker container, we would follow the official guide of TensorRT-LLM to build qwen and whisper TensorRT-LLM engines. See [here](https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/whisper).
         | 
|  | |
| 24 | 
             
            ```sh
         | 
| 25 | 
             
            bash run.sh 0 4 F5TTS_Base
         | 
| 26 | 
             
            ```
         | 
| 27 | 
            +
             | 
| 28 | 
             
            ### HTTP Client
         | 
| 29 | 
             
            ```sh
         | 
| 30 | 
             
            python3 client_http.py
         | 
| 31 | 
             
            ```
         | 
| 32 | 
            +
             | 
| 33 | 
             
            ### Benchmark using Dataset
         | 
| 34 | 
             
            ```sh
         | 
| 35 | 
             
            num_task=2
         | 
|  | |
| 39 | 
             
            ### Benchmark Results
         | 
| 40 | 
             
            Decoding on a single L20 GPU, using 26 different prompt_audio/target_text pairs.
         | 
| 41 |  | 
| 42 | 
            +
            | Model | Concurrency | Avg Latency    | RTF   | 
         | 
| 43 | 
            +
            |-------|-------------|----------------|-------|
         | 
| 44 | 
             
            | F5-TTS Base (Vocos) | 1     | 253 ms | 0.0394|
         | 
| 45 |  | 
| 46 | 
             
            ### Credits
         | 
| 47 | 
            +
            1. [F5-TTS-TRTLLM](https://github.com/Bigfishering/f5-tts-trtllm)
         | 
    	
        src/f5_tts/runtime/triton_trtllm/patch/__init__.py
    CHANGED
    
    | @@ -13,10 +13,14 @@ | |
| 13 | 
             
            # See the License for the specific language governing permissions and
         | 
| 14 | 
             
            # limitations under the License.
         | 
| 15 | 
             
            from .baichuan.model import BaichuanForCausalLM
         | 
| 16 | 
            -
            from .bert.model import ( | 
| 17 | 
            -
             | 
| 18 | 
            -
             | 
| 19 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
| 20 | 
             
            from .bloom.model import BloomForCausalLM, BloomModel
         | 
| 21 | 
             
            from .chatglm.config import ChatGLMConfig
         | 
| 22 | 
             
            from .chatglm.model import ChatGLMForCausalLM, ChatGLMModel
         | 
| @@ -46,8 +50,7 @@ from .mamba.model import MambaForCausalLM | |
| 46 | 
             
            from .medusa.config import MedusaConfig
         | 
| 47 | 
             
            from .medusa.model import MedusaForCausalLm
         | 
| 48 | 
             
            from .mllama.model import MLLaMAModel
         | 
| 49 | 
            -
            from .modeling_utils import  | 
| 50 | 
            -
                                         SpeculativeDecodingMode)
         | 
| 51 | 
             
            from .mpt.model import MPTForCausalLM, MPTModel
         | 
| 52 | 
             
            from .nemotron_nas.model import DeciLMForCausalLM
         | 
| 53 | 
             
            from .opt.model import OPTForCausalLM, OPTModel
         | 
| @@ -59,138 +62,137 @@ from .redrafter.model import ReDrafterForCausalLM | |
| 59 | 
             
            from .f5tts.model import F5TTS
         | 
| 60 |  | 
| 61 | 
             
            __all__ = [
         | 
| 62 | 
            -
                 | 
| 63 | 
            -
                 | 
| 64 | 
            -
                 | 
| 65 | 
            -
                 | 
| 66 | 
            -
                 | 
| 67 | 
            -
                 | 
| 68 | 
            -
                 | 
| 69 | 
            -
                 | 
| 70 | 
            -
                 | 
| 71 | 
            -
                 | 
| 72 | 
            -
                 | 
| 73 | 
            -
                 | 
| 74 | 
            -
                 | 
| 75 | 
            -
                 | 
| 76 | 
            -
                 | 
| 77 | 
            -
                 | 
| 78 | 
            -
                 | 
| 79 | 
            -
                 | 
| 80 | 
            -
                 | 
| 81 | 
            -
                 | 
| 82 | 
            -
                 | 
| 83 | 
            -
                 | 
| 84 | 
            -
                 | 
| 85 | 
            -
                 | 
| 86 | 
            -
                 | 
| 87 | 
            -
                 | 
| 88 | 
            -
                 | 
| 89 | 
            -
                 | 
| 90 | 
            -
                 | 
| 91 | 
            -
                 | 
| 92 | 
            -
                 | 
| 93 | 
            -
                 | 
| 94 | 
            -
                 | 
| 95 | 
            -
                 | 
| 96 | 
            -
                 | 
| 97 | 
            -
                 | 
| 98 | 
            -
                 | 
| 99 | 
            -
                 | 
| 100 | 
            -
                 | 
| 101 | 
            -
                 | 
| 102 | 
            -
                 | 
| 103 | 
            -
                 | 
| 104 | 
            -
                 | 
| 105 | 
            -
                 | 
| 106 | 
            -
                 | 
| 107 | 
            -
                 | 
| 108 | 
            -
                 | 
| 109 | 
            -
                 | 
| 110 | 
            -
                 | 
| 111 | 
            -
                 | 
| 112 | 
            -
                 | 
| 113 | 
            -
                 | 
| 114 | 
            -
                 | 
| 115 | 
            -
                 | 
| 116 | 
            -
                 | 
| 117 | 
            -
                 | 
| 118 | 
            -
                 | 
| 119 | 
            -
                 | 
| 120 | 
            -
                 | 
| 121 | 
            -
                 | 
| 122 | 
            -
                 | 
| 123 | 
            -
                 | 
| 124 | 
            -
                 | 
| 125 | 
            -
                 | 
| 126 | 
            -
                'F5TTS',
         | 
| 127 | 
             
            ]
         | 
| 128 |  | 
| 129 | 
             
            MODEL_MAP = {
         | 
| 130 | 
            -
                 | 
| 131 | 
            -
                 | 
| 132 | 
            -
                 | 
| 133 | 
            -
                 | 
| 134 | 
            -
                 | 
| 135 | 
            -
                 | 
| 136 | 
            -
                 | 
| 137 | 
            -
                 | 
| 138 | 
            -
                 | 
| 139 | 
            -
                 | 
| 140 | 
            -
                 | 
| 141 | 
            -
                 | 
| 142 | 
            -
                 | 
| 143 | 
            -
                 | 
| 144 | 
            -
                 | 
| 145 | 
            -
                 | 
| 146 | 
            -
                 | 
| 147 | 
            -
                 | 
| 148 | 
            -
                 | 
| 149 | 
            -
                 | 
| 150 | 
            -
                 | 
| 151 | 
            -
                 | 
| 152 | 
            -
                 | 
| 153 | 
            -
                 | 
| 154 | 
            -
                 | 
| 155 | 
            -
                 | 
| 156 | 
            -
                 | 
| 157 | 
            -
                 | 
| 158 | 
            -
                 | 
| 159 | 
            -
                 | 
| 160 | 
            -
                 | 
| 161 | 
            -
                 | 
| 162 | 
            -
                 | 
| 163 | 
            -
                 | 
| 164 | 
            -
                 | 
| 165 | 
            -
                 | 
| 166 | 
            -
                 | 
| 167 | 
            -
                 | 
| 168 | 
             
                GEMMA_ARCHITECTURE: GemmaForCausalLM,
         | 
| 169 | 
             
                GEMMA2_ARCHITECTURE: GemmaForCausalLM,
         | 
| 170 | 
            -
                 | 
| 171 | 
            -
                 | 
| 172 | 
            -
                 | 
| 173 | 
            -
                 | 
| 174 | 
            -
                 | 
| 175 | 
            -
                 | 
| 176 | 
            -
                 | 
| 177 | 
            -
                 | 
| 178 | 
            -
                 | 
| 179 | 
            -
                 | 
| 180 | 
            -
                 | 
| 181 | 
            -
                 | 
| 182 | 
            -
                 | 
| 183 | 
            -
                 | 
| 184 | 
            -
                 | 
| 185 | 
            -
                 | 
| 186 | 
            -
                 | 
| 187 | 
            -
                 | 
| 188 | 
            -
                 | 
| 189 | 
            -
                 | 
| 190 | 
            -
                 | 
| 191 | 
            -
                 | 
| 192 | 
            -
                 | 
| 193 | 
            -
                 | 
| 194 | 
            -
                 | 
| 195 | 
            -
                 | 
| 196 | 
             
            }
         | 
|  | |
| 13 | 
             
            # See the License for the specific language governing permissions and
         | 
| 14 | 
             
            # limitations under the License.
         | 
| 15 | 
             
            from .baichuan.model import BaichuanForCausalLM
         | 
| 16 | 
            +
            from .bert.model import (
         | 
| 17 | 
            +
                BertForQuestionAnswering,
         | 
| 18 | 
            +
                BertForSequenceClassification,
         | 
| 19 | 
            +
                BertModel,
         | 
| 20 | 
            +
                RobertaForQuestionAnswering,
         | 
| 21 | 
            +
                RobertaForSequenceClassification,
         | 
| 22 | 
            +
                RobertaModel,
         | 
| 23 | 
            +
            )
         | 
| 24 | 
             
            from .bloom.model import BloomForCausalLM, BloomModel
         | 
| 25 | 
             
            from .chatglm.config import ChatGLMConfig
         | 
| 26 | 
             
            from .chatglm.model import ChatGLMForCausalLM, ChatGLMModel
         | 
|  | |
| 50 | 
             
            from .medusa.config import MedusaConfig
         | 
| 51 | 
             
            from .medusa.model import MedusaForCausalLm
         | 
| 52 | 
             
            from .mllama.model import MLLaMAModel
         | 
| 53 | 
            +
            from .modeling_utils import PretrainedConfig, PretrainedModel, SpeculativeDecodingMode
         | 
|  | |
| 54 | 
             
            from .mpt.model import MPTForCausalLM, MPTModel
         | 
| 55 | 
             
            from .nemotron_nas.model import DeciLMForCausalLM
         | 
| 56 | 
             
            from .opt.model import OPTForCausalLM, OPTModel
         | 
|  | |
| 62 | 
             
            from .f5tts.model import F5TTS
         | 
| 63 |  | 
| 64 | 
             
            __all__ = [
         | 
| 65 | 
            +
                "BertModel",
         | 
| 66 | 
            +
                "BertForQuestionAnswering",
         | 
| 67 | 
            +
                "BertForSequenceClassification",
         | 
| 68 | 
            +
                "RobertaModel",
         | 
| 69 | 
            +
                "RobertaForQuestionAnswering",
         | 
| 70 | 
            +
                "RobertaForSequenceClassification",
         | 
| 71 | 
            +
                "BloomModel",
         | 
| 72 | 
            +
                "BloomForCausalLM",
         | 
| 73 | 
            +
                "DiT",
         | 
| 74 | 
            +
                "DeepseekForCausalLM",
         | 
| 75 | 
            +
                "FalconConfig",
         | 
| 76 | 
            +
                "DeepseekV2ForCausalLM",
         | 
| 77 | 
            +
                "FalconForCausalLM",
         | 
| 78 | 
            +
                "FalconModel",
         | 
| 79 | 
            +
                "GPTConfig",
         | 
| 80 | 
            +
                "GPTModel",
         | 
| 81 | 
            +
                "GPTForCausalLM",
         | 
| 82 | 
            +
                "OPTForCausalLM",
         | 
| 83 | 
            +
                "OPTModel",
         | 
| 84 | 
            +
                "LLaMAConfig",
         | 
| 85 | 
            +
                "LLaMAForCausalLM",
         | 
| 86 | 
            +
                "LLaMAModel",
         | 
| 87 | 
            +
                "MedusaConfig",
         | 
| 88 | 
            +
                "MedusaForCausalLm",
         | 
| 89 | 
            +
                "ReDrafterForCausalLM",
         | 
| 90 | 
            +
                "GPTJConfig",
         | 
| 91 | 
            +
                "GPTJModel",
         | 
| 92 | 
            +
                "GPTJForCausalLM",
         | 
| 93 | 
            +
                "GPTNeoXModel",
         | 
| 94 | 
            +
                "GPTNeoXForCausalLM",
         | 
| 95 | 
            +
                "PhiModel",
         | 
| 96 | 
            +
                "PhiConfig",
         | 
| 97 | 
            +
                "Phi3Model",
         | 
| 98 | 
            +
                "Phi3Config",
         | 
| 99 | 
            +
                "PhiForCausalLM",
         | 
| 100 | 
            +
                "Phi3ForCausalLM",
         | 
| 101 | 
            +
                "ChatGLMConfig",
         | 
| 102 | 
            +
                "ChatGLMForCausalLM",
         | 
| 103 | 
            +
                "ChatGLMModel",
         | 
| 104 | 
            +
                "BaichuanForCausalLM",
         | 
| 105 | 
            +
                "QWenConfigQWenForCausalLM",
         | 
| 106 | 
            +
                "QWenModel",
         | 
| 107 | 
            +
                "EncoderModel",
         | 
| 108 | 
            +
                "DecoderModel",
         | 
| 109 | 
            +
                "PretrainedConfig",
         | 
| 110 | 
            +
                "PretrainedModel",
         | 
| 111 | 
            +
                "WhisperEncoder",
         | 
| 112 | 
            +
                "MambaForCausalLM",
         | 
| 113 | 
            +
                "MambaConfig",
         | 
| 114 | 
            +
                "MPTForCausalLM",
         | 
| 115 | 
            +
                "MPTModel",
         | 
| 116 | 
            +
                "SkyworkForCausalLM",
         | 
| 117 | 
            +
                "GemmaConfig",
         | 
| 118 | 
            +
                "GemmaForCausalLM",
         | 
| 119 | 
            +
                "DbrxConfig",
         | 
| 120 | 
            +
                "DbrxForCausalLM",
         | 
| 121 | 
            +
                "RecurrentGemmaForCausalLM",
         | 
| 122 | 
            +
                "CogVLMConfig",
         | 
| 123 | 
            +
                "CogVLMForCausalLM",
         | 
| 124 | 
            +
                "EagleForCausalLM",
         | 
| 125 | 
            +
                "SpeculativeDecodingMode",
         | 
| 126 | 
            +
                "CohereForCausalLM",
         | 
| 127 | 
            +
                "MLLaMAModel",
         | 
| 128 | 
            +
                "F5TTS",
         | 
|  | |
| 129 | 
             
            ]
         | 
| 130 |  | 
| 131 | 
             
            MODEL_MAP = {
         | 
| 132 | 
            +
                "GPT2LMHeadModel": GPTForCausalLM,
         | 
| 133 | 
            +
                "GPT2LMHeadCustomModel": GPTForCausalLM,
         | 
| 134 | 
            +
                "GPTBigCodeForCausalLM": GPTForCausalLM,
         | 
| 135 | 
            +
                "Starcoder2ForCausalLM": GPTForCausalLM,
         | 
| 136 | 
            +
                "FuyuForCausalLM": GPTForCausalLM,
         | 
| 137 | 
            +
                "Kosmos2ForConditionalGeneration": GPTForCausalLM,
         | 
| 138 | 
            +
                "JAISLMHeadModel": GPTForCausalLM,
         | 
| 139 | 
            +
                "GPTForCausalLM": GPTForCausalLM,
         | 
| 140 | 
            +
                "NemotronForCausalLM": GPTForCausalLM,
         | 
| 141 | 
            +
                "OPTForCausalLM": OPTForCausalLM,
         | 
| 142 | 
            +
                "BloomForCausalLM": BloomForCausalLM,
         | 
| 143 | 
            +
                "RWForCausalLM": FalconForCausalLM,
         | 
| 144 | 
            +
                "FalconForCausalLM": FalconForCausalLM,
         | 
| 145 | 
            +
                "PhiForCausalLM": PhiForCausalLM,
         | 
| 146 | 
            +
                "Phi3ForCausalLM": Phi3ForCausalLM,
         | 
| 147 | 
            +
                "Phi3VForCausalLM": Phi3ForCausalLM,
         | 
| 148 | 
            +
                "Phi3SmallForCausalLM": Phi3ForCausalLM,
         | 
| 149 | 
            +
                "PhiMoEForCausalLM": Phi3ForCausalLM,
         | 
| 150 | 
            +
                "MambaForCausalLM": MambaForCausalLM,
         | 
| 151 | 
            +
                "GPTNeoXForCausalLM": GPTNeoXForCausalLM,
         | 
| 152 | 
            +
                "GPTJForCausalLM": GPTJForCausalLM,
         | 
| 153 | 
            +
                "MPTForCausalLM": MPTForCausalLM,
         | 
| 154 | 
            +
                "GLMModel": ChatGLMForCausalLM,
         | 
| 155 | 
            +
                "ChatGLMModel": ChatGLMForCausalLM,
         | 
| 156 | 
            +
                "ChatGLMForCausalLM": ChatGLMForCausalLM,
         | 
| 157 | 
            +
                "LlamaForCausalLM": LLaMAForCausalLM,
         | 
| 158 | 
            +
                "ExaoneForCausalLM": LLaMAForCausalLM,
         | 
| 159 | 
            +
                "MistralForCausalLM": LLaMAForCausalLM,
         | 
| 160 | 
            +
                "MixtralForCausalLM": LLaMAForCausalLM,
         | 
| 161 | 
            +
                "ArcticForCausalLM": LLaMAForCausalLM,
         | 
| 162 | 
            +
                "Grok1ModelForCausalLM": GrokForCausalLM,
         | 
| 163 | 
            +
                "InternLMForCausalLM": LLaMAForCausalLM,
         | 
| 164 | 
            +
                "InternLM2ForCausalLM": LLaMAForCausalLM,
         | 
| 165 | 
            +
                "MedusaForCausalLM": MedusaForCausalLm,
         | 
| 166 | 
            +
                "ReDrafterForCausalLM": ReDrafterForCausalLM,
         | 
| 167 | 
            +
                "BaichuanForCausalLM": BaichuanForCausalLM,
         | 
| 168 | 
            +
                "BaiChuanForCausalLM": BaichuanForCausalLM,
         | 
| 169 | 
            +
                "SkyworkForCausalLM": LLaMAForCausalLM,
         | 
| 170 | 
             
                GEMMA_ARCHITECTURE: GemmaForCausalLM,
         | 
| 171 | 
             
                GEMMA2_ARCHITECTURE: GemmaForCausalLM,
         | 
| 172 | 
            +
                "QWenLMHeadModel": QWenForCausalLM,
         | 
| 173 | 
            +
                "QWenForCausalLM": QWenForCausalLM,
         | 
| 174 | 
            +
                "Qwen2ForCausalLM": QWenForCausalLM,
         | 
| 175 | 
            +
                "Qwen2MoeForCausalLM": QWenForCausalLM,
         | 
| 176 | 
            +
                "Qwen2ForSequenceClassification": QWenForCausalLM,
         | 
| 177 | 
            +
                "Qwen2VLForConditionalGeneration": QWenForCausalLM,
         | 
| 178 | 
            +
                "WhisperEncoder": WhisperEncoder,
         | 
| 179 | 
            +
                "EncoderModel": EncoderModel,
         | 
| 180 | 
            +
                "DecoderModel": DecoderModel,
         | 
| 181 | 
            +
                "DbrxForCausalLM": DbrxForCausalLM,
         | 
| 182 | 
            +
                "RecurrentGemmaForCausalLM": RecurrentGemmaForCausalLM,
         | 
| 183 | 
            +
                "CogVLMForCausalLM": CogVLMForCausalLM,
         | 
| 184 | 
            +
                "DiT": DiT,
         | 
| 185 | 
            +
                "DeepseekForCausalLM": DeepseekForCausalLM,
         | 
| 186 | 
            +
                "DeciLMForCausalLM": DeciLMForCausalLM,
         | 
| 187 | 
            +
                "DeepseekV2ForCausalLM": DeepseekV2ForCausalLM,
         | 
| 188 | 
            +
                "EagleForCausalLM": EagleForCausalLM,
         | 
| 189 | 
            +
                "CohereForCausalLM": CohereForCausalLM,
         | 
| 190 | 
            +
                "MllamaForConditionalGeneration": MLLaMAModel,
         | 
| 191 | 
            +
                "BertForQuestionAnswering": BertForQuestionAnswering,
         | 
| 192 | 
            +
                "BertForSequenceClassification": BertForSequenceClassification,
         | 
| 193 | 
            +
                "BertModel": BertModel,
         | 
| 194 | 
            +
                "RobertaModel": RobertaModel,
         | 
| 195 | 
            +
                "RobertaForQuestionAnswering": RobertaForQuestionAnswering,
         | 
| 196 | 
            +
                "RobertaForSequenceClassification": RobertaForSequenceClassification,
         | 
| 197 | 
            +
                "F5TTS": F5TTS,
         | 
| 198 | 
             
            }
         | 
