# %%writefile app.py import streamlit as st import matplotlib.pyplot as plt import torch from transformers import AutoTokenizer, DataCollatorWithPadding, AutoModelForSequenceClassification, AdamW from datasets import load_dataset from evaluate import load as load_metric from torch.utils.data import DataLoader import random DEVICE = torch.device("cpu") NUM_ROUNDS = 3 # ########################TinyLLM#################################### # import torch # import torch.nn as nn # from torch.nn import functional as F # # hyperparameters # batch_size = 64 # how many independent sequences will we process in parallel? # block_size = 256 # what is the maximum context length for predictions? # max_iters = 5000 # eval_interval = 500 # learning_rate = 3e-4 # device = 'cuda' if torch.cuda.is_available() else 'cpu' # eval_iters = 200 # n_embd = 384 # n_head = 6 # n_layer = 6 # dropout = 0.2 # # ------------ # torch.manual_seed(1337) # # wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt # with open('input.txt', 'r', encoding='utf-8') as f: # text = f.read() # # here are all the unique characters that occur in this text # chars = sorted(list(set(text))) # vocab_size = len(chars) # # create a mapping from characters to integers # stoi = { ch:i for i,ch in enumerate(chars) } # itos = { i:ch for i,ch in enumerate(chars) } # encode = lambda s: [stoi[c] for c in s] # encoder: take a string, output a list of integers # decode = lambda l: ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string # # Train and test splits # data = torch.tensor(encode(text), dtype=torch.long) # n = int(0.9*len(data)) # first 90% will be train, rest val # train_data = data[:n] # val_data = data[n:] # # data loading # def get_batch(split): # # generate a small batch of data of inputs x and targets y # data = train_data if split == 'train' else val_data # ix = torch.randint(len(data) - block_size, (batch_size,)) # x = torch.stack([data[i:i+block_size] for i in ix]) # y = torch.stack([data[i+1:i+block_size+1] for i in ix]) # x, y = x.to(device), y.to(device) # return x, y # @torch.no_grad() # def estimate_loss(): # out = {} # model.eval() # for split in ['train', 'val']: # losses = torch.zeros(eval_iters) # for k in range(eval_iters): # X, Y = get_batch(split) # logits, loss = model(X, Y) # losses[k] = loss.item() # out[split] = losses.mean() # model.train() # return out # class Head(nn.Module): # """ one head of self-attention """ # def __init__(self, head_size): # super().__init__() # self.key = nn.Linear(n_embd, head_size, bias=False) # self.query = nn.Linear(n_embd, head_size, bias=False) # self.value = nn.Linear(n_embd, head_size, bias=False) # self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size))) # self.dropout = nn.Dropout(dropout) # def forward(self, x): # # input of size (batch, time-step, channels) # # output of size (batch, time-step, head size) # B,T,C = x.shape # k = self.key(x) # (B,T,hs) # q = self.query(x) # (B,T,hs) # # compute attention scores ("affinities") # wei = q @ k.transpose(-2,-1) * k.shape[-1]**-0.5 # (B, T, hs) @ (B, hs, T) -> (B, T, T) # wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf')) # (B, T, T) # wei = F.softmax(wei, dim=-1) # (B, T, T) # wei = self.dropout(wei) # # perform the weighted aggregation of the values # v = self.value(x) # (B,T,hs) # out = wei @ v # (B, T, T) @ (B, T, hs) -> (B, T, hs) # return out # class MultiHeadAttention(nn.Module): # """ multiple heads of self-attention in parallel """ # def __init__(self, num_heads, head_size): # super().__init__() # self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)]) # self.proj = nn.Linear(head_size * num_heads, n_embd) # self.dropout = nn.Dropout(dropout) # def forward(self, x): # out = torch.cat([h(x) for h in self.heads], dim=-1) # out = self.dropout(self.proj(out)) # return out # class FeedFoward(nn.Module): # """ a simple linear layer followed by a non-linearity """ # def __init__(self, n_embd): # super().__init__() # self.net = nn.Sequential( # nn.Linear(n_embd, 4 * n_embd), # nn.ReLU(), # nn.Linear(4 * n_embd, n_embd), # nn.Dropout(dropout), # ) # def forward(self, x): # return self.net(x) # class Block(nn.Module): # """ Transformer block: communication followed by computation """ # def __init__(self, n_embd, n_head): # # n_embd: embedding dimension, n_head: the number of heads we'd like # super().__init__() # head_size = n_embd // n_head # self.sa = MultiHeadAttention(n_head, head_size) # self.ffwd = FeedFoward(n_embd) # self.ln1 = nn.LayerNorm(n_embd) # self.ln2 = nn.LayerNorm(n_embd) # def forward(self, x): # x = x + self.sa(self.ln1(x)) # x = x + self.ffwd(self.ln2(x)) # return x # class GPTLanguageModel(nn.Module): # def __init__(self): # super().__init__() # # each token directly reads off the logits for the next token from a lookup table # self.token_embedding_table = nn.Embedding(vocab_size, n_embd) # self.position_embedding_table = nn.Embedding(block_size, n_embd) # self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)]) # self.ln_f = nn.LayerNorm(n_embd) # final layer norm # self.lm_head = nn.Linear(n_embd, vocab_size) # # better init, not covered in the original GPT video, but important, will cover in followup video # self.apply(self._init_weights) # def _init_weights(self, module): # if isinstance(module, nn.Linear): # torch.nn.init.normal_(module.weight, mean=0.0, std=0.02) # if module.bias is not None: # torch.nn.init.zeros_(module.bias) # elif isinstance(module, nn.Embedding): # torch.nn.init.normal_(module.weight, mean=0.0, std=0.02) # def forward(self, idx, targets=None): # B, T = idx.shape # # idx and targets are both (B,T) tensor of integers # tok_emb = self.token_embedding_table(idx) # (B,T,C) # pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C) # x = tok_emb + pos_emb # (B,T,C) # x = self.blocks(x) # (B,T,C) # x = self.ln_f(x) # (B,T,C) # logits = self.lm_head(x) # (B,T,vocab_size) # if targets is None: # loss = None # else: # B, T, C = logits.shape # logits = logits.view(B*T, C) # targets = targets.view(B*T) # loss = F.cross_entropy(logits, targets) # return logits, loss # def generate(self, idx, max_new_tokens): # # idx is (B, T) array of indices in the current context # for _ in range(max_new_tokens): # # crop idx to the last block_size tokens # idx_cond = idx[:, -block_size:] # # get the predictions # logits, loss = self(idx_cond) # # focus only on the last time step # logits = logits[:, -1, :] # becomes (B, C) # # apply softmax to get probabilities # probs = F.softmax(logits, dim=-1) # (B, C) # # sample from the distribution # idx_next = torch.multinomial(probs, num_samples=1) # (B, 1) # # append sampled index to the running sequence # idx = torch.cat((idx, idx_next), dim=1) # (B, T+1) # return idx # model = GPTLanguageModel() # m = model.to(device) # # print the number of parameters in the model # print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters') # # create a PyTorch optimizer # optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) # for iter in range(max_iters): # # every once in a while evaluate the loss on train and val sets # if iter % eval_interval == 0 or iter == max_iters - 1: # losses = estimate_loss() # print(f"step {iter}: train loss {losses['train']:.4f}, val loss {losses['val']:.4f}") # # sample a batch of data # xb, yb = get_batch('train') # # evaluate the loss # logits, loss = model(xb, yb) # optimizer.zero_grad(set_to_none=True) # loss.backward() # optimizer.step() # # generate from the model # context = torch.zeros((1, 1), dtype=torch.long, device=device) # print(decode(m.generate(context, max_new_tokens=500)[0].tolist())) # #open('more.txt', 'w').write(decode(m.generate(context, max_new_tokens=10000)[0].tolist())) # ########################TinyLLM################################## def load_data(dataset_name): raw_datasets = load_dataset(dataset_name) raw_datasets = raw_datasets.shuffle(seed=42) del raw_datasets["unsupervised"] tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") def tokenize_function(examples): return tokenizer(examples["text"], truncation=True) train_population = random.sample(range(len(raw_datasets["train"])), 20) test_population = random.sample(range(len(raw_datasets["test"])), 20) tokenized_datasets = raw_datasets.map(tokenize_function, batched=True) tokenized_datasets["train"] = tokenized_datasets["train"].select(train_population) tokenized_datasets["test"] = tokenized_datasets["test"].select(test_population) tokenized_datasets = tokenized_datasets.remove_columns("text") tokenized_datasets = tokenized_datasets.rename_column("label", "labels") data_collator = DataCollatorWithPadding(tokenizer=tokenizer) trainloader = DataLoader(tokenized_datasets["train"], shuffle=True, batch_size=32, collate_fn=data_collator) testloader = DataLoader(tokenized_datasets["test"], batch_size=32, collate_fn=data_collator) return trainloader, testloader def train(net, trainloader, epochs): optimizer = AdamW(net.parameters(), lr=5e-5) net.train() for _ in range(epochs): for batch in trainloader: batch = {k: v.to(DEVICE) for k, v in batch.items()} outputs = net(**batch) loss = outputs.loss loss.backward() optimizer.step() optimizer.zero_grad() def test(net, testloader): metric = load_metric("accuracy") loss = 0 net.eval() for batch in testloader: batch = {k: v.to(DEVICE) for k, v in batch.items()} with torch.no_grad(): outputs = net(**batch) logits = outputs.logits loss += outputs.loss.item() predictions = torch.argmax(logits, dim=-1) metric.add_batch(predictions=predictions, references=batch["labels"]) loss /= len(testloader.dataset) accuracy = metric.compute()["accuracy"] return loss, accuracy from transformers import Wav2Vec2Processor, HubertForSequenceClassification import torch # def main(): # st.write("## Audio Classification with HuBERT") # dataset_name = st.selectbox("Dataset", ["librispeech", "your_audio_dataset"]) # model_name = "facebook/hubert-base-ls960" # processor = Wav2Vec2Processor.from_pretrained(model_name) # net = HubertForSequenceClassification.from_pretrained(model_name, num_labels=2).to(DEVICE) # train_dataset, test_dataset = load_data(dataset_name) # # Further implementation needed for actual data preparation and training loops # st.write("Details of further steps would be filled in based on specific requirements and dataset structure.") # if __name__ == "__main__": # main() from transformers import Wav2Vec2FeatureExtractor, HubertForSequenceClassification import torch import soundfile as sf def load_audio(file_path): # Load an audio file, return waveform and sampling rate waveform, sample_rate = sf.read(file_path) return waveform, sample_rate def prepare_dataset(data_paths): # Dummy function to simulate loading and processing a dataset # Replace this with actual data loading and processing logic features = [] labels = [] for path, label in data_paths: waveform, sr = load_audio(path) input_values = feature_extractor(waveform, sampling_rate=sr, return_tensors="pt").input_values features.append(input_values) labels.append(label) return torch.cat(features, dim=0), torch.tensor(labels) def main(): st.write("## Federated Learning with dynamic models and datasets for mobile devices") dataset_name = st.selectbox("Dataset", ["audio_instruction_task","imdb", "amazon_polarity", "ag_news"]) model_name = st.selectbox("Model", ["facebook/hubert-base-ls960","bert-base-uncased", "distilbert-base-uncased"]) # net = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2).to(DEVICE) # processor = Wav2Vec2Processor.from_pretrained(model_name) # net = HubertForSequenceClassification.from_pretrained(model_name, num_labels=2).to(DEVICE) feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name) net = HubertForSequenceClassification.from_pretrained(model_name, num_labels=2).to(DEVICE) NUM_CLIENTS = st.slider("Number of Clients", min_value=1, max_value=10, value=2) NUM_ROUNDS = st.slider("Number of Rounds", min_value=1, max_value=10, value=3) trainloader, testloader = load_data(dataset_name) if st.button("Start Training"): round_losses = [] round_accuracies = [] # Store accuracy values for each round for round_num in range(1, NUM_ROUNDS + 1): st.write(f"## Round {round_num}") st.write("### Training Metrics for Each Client") for client in range(1, NUM_CLIENTS + 1): client_loss, client_accuracy = test(net, testloader) # Placeholder for actual client metrics st.write(f"Client {client}: Loss: {client_loss}, Accuracy: {client_accuracy}") st.write("### Accuracy Over Rounds") round_accuracies.append(client_accuracy) # Append the accuracy for this round plt.plot(range(1, round_num + 1), round_accuracies, marker='o') # Plot accuracy over rounds plt.xlabel("Round") plt.ylabel("Accuracy") plt.title("Accuracy Over Rounds") st.pyplot() st.write("### Loss Over Rounds") loss_value = random.random() # Placeholder for loss values round_losses.append(loss_value) rounds = list(range(1, round_num + 1)) plt.plot(rounds, round_losses) plt.xlabel("Round") plt.ylabel("Loss") plt.title("Loss Over Rounds") st.pyplot() st.success(f"Round {round_num} completed successfully!") else: st.write("Click the 'Start Training' button to start the training process.") if __name__ == "__main__": main()