Spaces:

TimurHromek
/

HROM-V1

Running

App Files Files Community

TimurHromek commited on Mar 28

Commit

95d187a

1 Parent(s): dd6c81d

Uploaded model code and more.

Browse files

Files changed (4) hide show

HROM_Trainer.py +360 -0
LICENSE +201 -0
app.py +104 -0
tokenizer/hrom_tokenizer.json +0 -0

HROM_Trainer.py ADDED Viewed

	@@ -0,0 +1,360 @@

+import torch
+import torch.nn as nn
+from torch.utils.data import Dataset, DataLoader
+from datasets import load_dataset
+from tokenizers import Tokenizer, models, trainers, pre_tokenizers, processors, decoders
+import math
+import os
+import re
+from datetime import datetime
+from contextlib import nullcontext
+# Configuration
+CONFIG = {
+    "dim": 512,
+    "n_layers": 6,
+    "n_heads": 8,
+    "ff_dim": 2048,
+    "dropout": 0.1,
+    "max_seq_len": 1024,
+    "batch_size": 32,
+    "checkpoint_interval": 1000,
+    "debug_interval": 500,
+    "dataset": "daily_dialog",
+    "vocab_size": 32000,
+    "tokenizer_train_samples": 100000,
+    "learning_rate": 3e-4,
+    "max_turns": 6,
+    "max_checkpoints": 5,
+    "num_epochs": 50  # Increased number of epochs for longer training
+}
+class RotaryEmbedding(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2).float() / dim))
+        self.register_buffer("inv_freq", inv_freq)
+    def forward(self, seq_len):
+        t = torch.arange(seq_len, device=self.inv_freq.device).type_as(self.inv_freq)
+        freqs = torch.einsum("i, j -> i j", t, self.inv_freq)
+        return torch.cat((freqs, freqs), dim=-1)
+def rotate_half(x):
+    x1, x2 = x.chunk(2, dim=-1)
+    return torch.cat((-x2, x1), dim=-1)
+def apply_rotary_pos_emb(pos, t):
+    pos = pos.unsqueeze(0).unsqueeze(1)
+    return (t * pos.cos()) + (rotate_half(t) * pos.sin())
+class SwiGLU(nn.Module):
+    def forward(self, x):
+        x, gate = x.chunk(2, dim=-1)
+        return x * torch.sigmoid(gate)
+class HROMAttention(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.dim = CONFIG["dim"]
+        self.n_heads = CONFIG["n_heads"]
+        self.head_dim = self.dim // self.n_heads
+        self.qkv = nn.Linear(self.dim, 3 * self.dim)
+        self.proj = nn.Linear(self.dim, self.dim)
+        self.rotary = RotaryEmbedding(self.head_dim)
+        self.dropout = nn.Dropout(CONFIG["dropout"])
+    def forward(self, x, mask=None):
+        B, T, _ = x.shape
+        qkv = self.qkv(x).reshape(B, T, 3, self.n_heads, self.head_dim)
+        q, k, v = qkv.unbind(2)
+        q = q.transpose(1, 2)
+        k = k.transpose(1, 2)
+        v = v.transpose(1, 2)
+        pos = self.rotary(T)
+        q = apply_rotary_pos_emb(pos, q)
+        k = apply_rotary_pos_emb(pos, k)
+        attn = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(self.head_dim))
+        if mask is not None:
+            mask = mask.unsqueeze(1)
+            attn = attn + mask
+        attn = torch.softmax(attn, dim=-1)
+        attn = self.dropout(attn)
+        out = attn @ v
+        out = out.transpose(1, 2).reshape(B, T, self.dim)
+        return self.proj(out)
+class HROMBlock(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.attn = HROMAttention()
+        self.ff = nn.Sequential(
+            nn.Linear(CONFIG["dim"], 2 * CONFIG["ff_dim"]),
+            SwiGLU(),
+            nn.Linear(CONFIG["ff_dim"], CONFIG["dim"])
+        )
+        self.norm1 = nn.LayerNorm(CONFIG["dim"])
+        self.norm2 = nn.LayerNorm(CONFIG["dim"])
+        self.dropout = nn.Dropout(CONFIG["dropout"])
+    def forward(self, x, mask=None):
+        x = x + self.dropout(self.attn(self.norm1(x), mask))
+        x = x + self.dropout(self.ff(self.norm2(x)))
+        return x
+class HROM(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.embed = nn.Embedding(CONFIG["vocab_size"], CONFIG["dim"])
+        self.blocks = nn.ModuleList([HROMBlock() for _ in range(CONFIG["n_layers"])])
+        self.norm = nn.LayerNorm(CONFIG["dim"])
+        self.head = nn.Linear(CONFIG["dim"], CONFIG["vocab_size"])
+        self.apply(self._init_weights)
+    def _init_weights(self, module):
+        if isinstance(module, nn.Linear):
+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
+            if module.bias is not None:
+                torch.nn.init.zeros_(module.bias)
+    def forward(self, x, attention_mask=None):
+        x = self.embed(x)
+        if attention_mask is not None:
+            B, T = attention_mask.shape
+            causal_mask = torch.triu(torch.ones(T, T) * float('-inf'), diagonal=1)
+            causal_mask = causal_mask.to(x.device)
+            pad_mask = attention_mask.unsqueeze(1).unsqueeze(2).to(dtype=torch.float32)
+            pad_mask = (1.0 - pad_mask) * torch.finfo(torch.float32).min
+            mask = causal_mask + pad_mask.squeeze(1)
+        else:
+            B, T = x.shape[:2]
+            mask = torch.triu(torch.ones(T, T) * float('-inf'), diagonal=1)
+            mask = mask.to(x.device)
+            mask = mask.unsqueeze(0).expand(B, -1, -1)
+        for block in self.blocks:
+            x = block(x, mask)
+        return self.head(self.norm(x))
+class TokenizerTrainer:
+    def __init__(self):
+        self.tokenizer = Tokenizer(models.BPE())
+        self.tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel(add_prefix_space=True)
+        self.tokenizer.decoder = decoders.ByteLevel()
+        self.special_tokens = ["<pad>", "<s>", "</s>", "<unk>", "<user>", "<assistant>"]
+    def train(self, dataset_name):
+        dataset = load_dataset(dataset_name, split=f"train[:{CONFIG['tokenizer_train_samples']}]")
+        text_samples = []
+        for entry in dataset:
+            if "dialog" in entry:
+                for i, utterance in enumerate(entry["dialog"][:CONFIG["max_turns"]]):
+                    role = "<user>" if i % 2 == 0 else "<assistant>"
+                    text_samples.append(f"{role} {utterance}")
+            else:
+                text_samples.append(self._clean_text(entry.get("text", "")))
+        trainer = trainers.BpeTrainer(
+            vocab_size=CONFIG["vocab_size"],
+            special_tokens=self.special_tokens,
+            min_frequency=2,
+            show_progress=True
+        )
+        self.tokenizer.train_from_iterator(text_samples, trainer=trainer, length=len(text_samples))
+        self.tokenizer.post_processor = processors.TemplateProcessing(
+            single="$A </s>",
+            pair="$A $B </s>",
+            special_tokens=[("</s>", self.tokenizer.token_to_id("</s>"))],
+        )
+        os.makedirs("tokenizer", exist_ok=True)
+        self.tokenizer.save("tokenizer/hrom_tokenizer.json")
+    def _clean_text(self, text):
+        text = re.sub(r'[^\w\s.,!?\'\-:;<>]', '', text)
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text
+class ChatDataset(Dataset):
+    def __init__(self, tokenizer):
+        full_dataset = load_dataset(CONFIG["dataset"], split="train")
+        num_samples = min(len(full_dataset), CONFIG["tokenizer_train_samples"])
+        self.dataset = full_dataset.shuffle(seed=42).select(range(num_samples))
+        self.tokenizer = tokenizer
+        self.max_length = CONFIG["max_seq_len"]
+        self.turn_sep = self.tokenizer.token_to_id("</s>")
+    def __len__(self):
+        return len(self.dataset)
+    def __getitem__(self, idx):
+        entry = self.dataset[idx]
+        formatted = []
+        if "dialog" in entry:
+            dialog = entry["dialog"][:CONFIG["max_turns"]]
+            for i, utterance in enumerate(dialog):
+                role_token = "<user>" if i % 2 == 0 else "<assistant>"
+                formatted.extend([
+                    self.tokenizer.token_to_id(role_token),
+                    *self.tokenizer.encode(utterance).ids,
+                    self.turn_sep
+                ])
+        else:
+            text = entry.get("text", "")
+            formatted.extend([
+                self.tokenizer.token_to_id("<user>"),
+                *self.tokenizer.encode(text).ids,
+                self.turn_sep
+            ])
+        formatted = formatted[:self.max_length-2]
+        formatted = [self.tokenizer.token_to_id("<s>"), *formatted, self.tokenizer.token_to_id("</s>")]
+        return {
+            "input_ids": formatted[:-1],
+            "labels": formatted[1:]
+        }
+    @staticmethod
+    def collate_fn(batch):
+        max_len = max(len(item["input_ids"]) for item in batch)
+        pad_id = Tokenizer.from_file("tokenizer/hrom_tokenizer.json").token_to_id("<pad>")
+        inputs, labels, masks = [], [], []
+        for item in batch:
+            pad_len = max_len - len(item["input_ids"])
+            inputs.append(item["input_ids"] + [pad_id] * pad_len)
+            labels.append(item["labels"] + [pad_id] * pad_len)
+            masks.append([1] * len(item["input_ids"]) + [0] * pad_len)
+        return {
+            "input_ids": torch.tensor(inputs),
+            "labels": torch.tensor(labels),
+            "attention_mask": torch.tensor(masks)
+        }
+class HROMTrainer:
+    def __init__(self, model, tokenizer):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model = model.to(self.device)
+        if self.device.type == "cuda":
+            self.scaler = torch.cuda.amp.GradScaler()
+        else:
+            self.scaler = None
+        self.optimizer = torch.optim.AdamW(
+            self.model.parameters(),
+            lr=CONFIG["learning_rate"],
+            fused=True if self.device.type == "cuda" else False
+        )
+        self.tokenizer = tokenizer
+    def train_step(self, batch):
+        self.optimizer.zero_grad()
+        autocast = torch.cuda.amp.autocast if self.device.type == "cuda" else nullcontext
+        with autocast():
+            outputs = self.model(
+                batch["input_ids"].to(self.device),
+                attention_mask=batch["attention_mask"].to(self.device)
+            )
+            loss = nn.CrossEntropyLoss(ignore_index=self.tokenizer.token_to_id("<pad>"))(
+                outputs.view(-1, CONFIG["vocab_size"]),
+                batch["labels"].view(-1).to(self.device)
+            )
+        if self.scaler is not None:
+            self.scaler.scale(loss).backward()
+            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
+            self.scaler.step(self.optimizer)
+            self.scaler.update()
+        else:
+            loss.backward()
+            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
+            self.optimizer.step()
+        return loss.item()
+class SafetyManager:
+    def __init__(self, model, tokenizer):
+        self.model = model
+        self.tokenizer = tokenizer
+        self.bad_words = ["hate", "kill", "harm"]
+        self.bad_word_ids = [tokenizer.encode(w).ids for w in self.bad_words]
+    def content_filter(self, text):
+        tokens = self.tokenizer.encode(text).ids
+        for bad_ids in self.bad_word_ids:
+            if any(tokens[i:i+len(bad_ids)] == bad_ids for i in range(len(tokens))):
+                return False
+        return True
+    def generate_safely(self, prompt, max_length=50):
+        input_ids = self.tokenizer.encode(prompt).ids
+        device = next(self.model.parameters()).device
+        for _ in range(max_length):
+            with torch.no_grad():
+                logits = self.model(torch.tensor([input_ids]).to(device))
+            next_token = logits.argmax(-1)[:, -1].item()
+            if next_token == self.tokenizer.token_to_id("</s>"):
+                break
+            generated = self.tokenizer.decode(input_ids + [next_token])
+            if not self.content_filter(generated):
+                break
+            input_ids.append(next_token)
+        return self.tokenizer.decode(input_ids)
+    def debug_generation(self, prompt="Hello!"):
+        print(f"\nSafety Check Generation:")
+        response = self.generate_safely(prompt)
+        print(f"Prompt: {prompt}\nResponse: {response}")
+class CheckpointManager:
+    def __init__(self):
+        self.checkpoint_dir = "checkpoints"
+        os.makedirs(self.checkpoint_dir, exist_ok=True)
+    def save(self, model, optimizer, step):
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        path = f"{self.checkpoint_dir}/hrom_{timestamp}_step{step}.pt"
+        torch.save({
+            "model": model.state_dict(),
+            "optimizer": optimizer.state_dict(),
+            "step": step,
+            "config": CONFIG
+        }, path)
+        self._cleanup_old_checkpoints()
+    def _cleanup_old_checkpoints(self):
+        checkpoints = sorted(os.listdir(self.checkpoint_dir),
+                             key=lambda x: os.path.getmtime(os.path.join(self.checkpoint_dir, x)))
+        while len(checkpoints) > CONFIG["max_checkpoints"]:
+            os.remove(os.path.join(self.checkpoint_dir, checkpoints[0]))
+            checkpoints = checkpoints[1:]
+def train():
+    checkpoint_manager = CheckpointManager()
+    if not os.path.exists("tokenizer/hrom_tokenizer.json"):
+        print("Training tokenizer...")
+        tokenizer_trainer = TokenizerTrainer()
+        tokenizer_trainer.train(CONFIG["dataset"])
+    tokenizer = Tokenizer.from_file("tokenizer/hrom_tokenizer.json")
+    model = HROM()
+    print("Downloading and caching the dataset...")
+    _ = load_dataset(CONFIG["dataset"], split="train", download_mode="reuse_cache_if_exists")
+    dataset = ChatDataset(tokenizer)
+    dataloader = DataLoader(
+        dataset,
+        batch_size=CONFIG["batch_size"],
+        collate_fn=ChatDataset.collate_fn
+    )
+    trainer_obj = HROMTrainer(model, tokenizer)
+    safety = SafetyManager(model, tokenizer)
+    step = 0
+    model.train()
+    for epoch in range(CONFIG["num_epochs"]):
+        for batch in dataloader:
+            loss = trainer_obj.train_step(batch)
+            if step % CONFIG["checkpoint_interval"] == 0:
+                checkpoint_manager.save(model, trainer_obj.optimizer, step)
+                safety.debug_generation()
+            if step % CONFIG["debug_interval"] == 0:
+                print(f"Step {step} | Loss: {loss:.4f}")
+                safety.debug_generation("What's the meaning of life?")
+            step += 1
+if __name__ == "__main__":
+    train()

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2025 Timur Hromek
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

app.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import gradio as gr
+import torch
+from tokenizers import Tokenizer
+import os
+from HROM_Trainer import HROM, CONFIG, SafetyManager
+def load_latest_checkpoint(model, device):
+    checkpoint_dir = "checkpoints"
+    checkpoints = [f for f in os.listdir(checkpoint_dir) if f.endswith(".pt")]
+    if not checkpoints:
+        raise FileNotFoundError("No checkpoints found.")
+    checkpoints = sorted(checkpoints, key=lambda x: os.path.getmtime(os.path.join(checkpoint_dir, x)), reverse=True)
+    latest_checkpoint = os.path.join(checkpoint_dir, checkpoints[0])
+    checkpoint = torch.load(latest_checkpoint, map_location=device)
+    model.load_state_dict(checkpoint['model'])
+    return model
+def generate_response(model, tokenizer, input_ids, safety_manager, max_length=200):
+    device = next(model.parameters()).device
+    generated_ids = input_ids.copy()
+    for _ in range(max_length):
+        input_tensor = torch.tensor([generated_ids], device=device)
+        with torch.no_grad():
+            logits = model(input_tensor)
+        next_token = logits.argmax(-1)[:, -1].item()
+        if next_token == tokenizer.token_to_id("</s>"):
+            break
+        current_text = tokenizer.decode(generated_ids + [next_token])
+        if not safety_manager.content_filter(current_text):
+            break
+        generated_ids.append(next_token)
+    return generated_ids[len(input_ids):]
+# Initialize components once
+tokenizer = Tokenizer.from_file("tokenizer/hrom_tokenizer.json")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = HROM().to(device)
+model = load_latest_checkpoint(model, device)
+model.eval()
+safety = SafetyManager(model, tokenizer)
+max_response_length = 200
+def process_message(user_input, chat_history, token_history):
+    # Process user input
+    user_turn = f"<user> {user_input} </s>"
+    user_tokens = tokenizer.encode(user_turn).ids
+    token_history.extend(user_tokens)
+    # Prepare input sequence
+    input_sequence = [tokenizer.token_to_id("<s>")] + token_history
+    # Truncate if needed
+    max_input_len = CONFIG["max_seq_len"] - max_response_length
+    if len(input_sequence) > max_input_len:
+        input_sequence = input_sequence[-max_input_len:]
+        token_history = input_sequence[1:]
+    # Generate response
+    response_ids = generate_response(model, tokenizer, input_sequence, safety, max_response_length)
+    # Process assistant response
+    assistant_text = "I couldn't generate a proper response."
+    if response_ids:
+        if response_ids[0] == tokenizer.token_to_id("<assistant>"):
+            try:
+                end_idx = response_ids.index(tokenizer.token_to_id("</s>"))
+                assistant_text = tokenizer.decode(response_ids[1:end_idx])
+                token_history.extend(response_ids[:end_idx+1])
+            except ValueError:
+                assistant_text = tokenizer.decode(response_ids[1:])
+                token_history.extend(response_ids)
+        else:
+            assistant_text = tokenizer.decode(response_ids)
+            token_history.extend(response_ids)
+    chat_history.append((user_input, assistant_text))
+    return chat_history, token_history
+def clear_history():
+    return [], []
+with gr.Blocks() as demo:
+    gr.Markdown("# HROM Chatbot")
+    chatbot = gr.Chatbot(height=500)
+    msg = gr.Textbox(label="Your Message")
+    token_state = gr.State([])
+    msg.submit(
+        process_message,
+        [msg, chatbot, token_state],
+        [chatbot, token_state],
+        queue=False
+    ).then(
+        lambda: "", None, msg
+    )
+    clear_btn = gr.Button("Clear Chat History")
+    clear_btn.click(
+        clear_history,
+        outputs=[chatbot, token_state],
+        queue=False
+    )
+demo.launch()

tokenizer/hrom_tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff