Spaces:

DeepLearning101
/

IE101TW

Running

App Files Files Community

DeepLearning101 commited on Oct 15, 2023

Commit

fdc4786

1 Parent(s): 4cda815

Upload 6 files

Browse files

Files changed (6) hide show

loss/contrastive_loss.py +88 -0
loss/focal_loss.py +28 -0
loss/label_smoothing.py +21 -0
loss/rl_loss.py +122 -0
loss/similarity_loss.py +70 -0
loss/triplet_loss.py +103 -0

loss/contrastive_loss.py ADDED Viewed

	@@ -0,0 +1,88 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2022/03/23 14:50
+# @Author  : Jianing Wang
+# @Email   : [email protected]
+# @File    : ContrastiveLoss.py
+# !/usr/bin/env python
+# coding=utf-8
+from enum import Enum
+import torch
+import torch.nn.functional as F
+from torch import nn, Tensor
+from transformers.models.bert.modeling_bert import BertModel
+from transformers import BertTokenizer, BertConfig
+class SiameseDistanceMetric(Enum):
+    """
+    The metric for the contrastive loss
+    """
+    EUCLIDEAN = lambda x, y: F.pairwise_distance(x, y, p=2)
+    MANHATTAN = lambda x, y: F.pairwise_distance(x, y, p=1)
+    COSINE_DISTANCE = lambda x, y: 1-F.cosine_similarity(x, y)
+class ContrastiveLoss(nn.Module):
+    """
+    Contrastive loss. Expects as input two texts and a label of either 0 or 1. If the label == 1, then the distance between the
+    two embeddings is reduced. If the label == 0, then the distance between the embeddings is increased.
+    @:param distance_metric: The distance metric function
+    @:param margin: (float) The margin distance
+    @:param size_average: (bool) Whether to get averaged loss
+    Input example of forward function:
+        rep_anchor: [[0.2, -0.1, ..., 0.6], [0.2, -0.1, ..., 0.6], ..., [0.2, -0.1, ..., 0.6]]
+        rep_candidate: [[0.3, 0.1, ...m -0.3], [-0.8, 1.2, ..., 0.7], ..., [-0.9, 0.1, ..., 0.4]]
+        label: [0, 1, ..., 1]
+    Return example of forward function:
+        0.015 (averged)
+        2.672 (sum)
+    """
+    def __init__(self, distance_metric=SiameseDistanceMetric.COSINE_DISTANCE, margin: float = 0.5, size_average:bool = False):
+        super(ContrastiveLoss, self).__init__()
+        self.distance_metric = distance_metric
+        self.margin = margin
+        self.size_average = size_average
+    def forward(self, rep_anchor, rep_candidate, label: Tensor):
+        # rep_anchor: [batch_size, hidden_dim] denotes the representations of anchors
+        # rep_candidate: [batch_size, hidden_dim] denotes the representations of positive / negative
+        # label: [batch_size, hidden_dim] denotes the label of each anchor - candidate pair
+        distances = self.distance_metric(rep_anchor, rep_candidate)
+        losses = 0.5 * (label.float() * distances.pow(2) + (1 - label).float() * F.relu(self.margin - distances).pow(2))
+        return losses.mean() if self.size_average else losses.sum()
+if __name__ == "__main__":
+    # configure for huggingface pre-trained language models
+    config = BertConfig.from_pretrained("bert-base-cased")
+    # tokenizer for huggingface pre-trained language models
+    tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
+    # pytorch_model.bin for huggingface pre-trained language models
+    model = BertModel.from_pretrained("bert-base-cased")
+    # obtain two batch of examples, each corresponding example is a pair
+    examples1 = ["This is the sentence anchor 1.", "It is the second sentence in this article named Section D."]
+    examples2 = ["It is the same as anchor 1.", "I think it is different with Section D."]
+    label = [1, 0]
+    # convert each example for feature
+    # {"input_ids": xxx, "attention_mask": xxx, "token_tuype_ids": xxx}
+    features1 = tokenizer(examples1, add_special_tokens=True, padding=True)
+    features2 = tokenizer(examples2, add_special_tokens=True, padding=True)
+    # padding and convert to feature batch
+    max_seq_lem = 16
+    features1 = {key: torch.Tensor([value + [0] * (max_seq_lem - len(value)) for value in values]).long() for key, values in features1.items()}
+    features2 = {key: torch.Tensor([value + [0] * (max_seq_lem - len(value)) for value in values]).long() for key, values in features2.items()}
+    label = torch.Tensor(label).long()
+    # obtain sentence embedding by averaged pooling
+    rep_anchor = model(**features1)[0] # [batch_size, max_seq_len, hidden_dim]
+    rep_candidate = model(**features2)[0] # [batch_size, max_seq_len, hidden_dim]
+    rep_anchor = torch.mean(rep_anchor, -1) # [batch_size, hidden_dim]
+    rep_candidate = torch.mean(rep_candidate, -1) # [batch_size, hidden_dim]
+    # obtain contrastive loss
+    loss_fn = ContrastiveLoss()
+    loss = loss_fn(rep_anchor=rep_anchor, rep_candidate=rep_candidate, label=label)
+    print(loss) # tensor(0.0869, grad_fn=<SumBackward0>)

loss/focal_loss.py ADDED Viewed

	@@ -0,0 +1,28 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2022/2/17 6:05 下午
+# @Author  : JianingWang
+# @File    : loss
+import torch
+from torch import nn
+import torch.nn.functional as F
+class FocalLoss(nn.Module):
+    """Multi-class Focal loss implementation"""
+    def __init__(self, gamma=2, weight=None, ignore_index=-100):
+        super(FocalLoss, self).__init__()
+        self.gamma = gamma
+        self.weight = weight
+        self.ignore_index = ignore_index
+    def forward(self, input, target):
+        """
+        input: [N, C]
+        target: [N, ]
+        """
+        logpt = F.log_softmax(input, dim=1)
+        pt = torch.exp(logpt)
+        logpt = (1 - pt) ** self.gamma * logpt
+        loss = F.nll_loss(logpt, target, self.weight, ignore_index=self.ignore_index)
+        return loss

loss/label_smoothing.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import torch.nn as nn
+import torch.nn.functional as F
+class LabelSmoothingCrossEntropy(nn.Module):
+    def __init__(self, eps=0.1, reduction="mean",ignore_index=-100):
+        super(LabelSmoothingCrossEntropy, self).__init__()
+        self.eps = eps
+        self.reduction = reduction
+        self.ignore_index = ignore_index
+    def forward(self, output, target):
+        c = output.size()[-1]
+        log_preds = F.log_softmax(output, dim=-1)
+        if self.reduction=="sum":
+            loss = -log_preds.sum()
+        else:
+            loss = -log_preds.sum(dim=-1)
+            if self.reduction=="mean":
+                loss = loss.mean()
+        return loss*self.eps/c + (1-self.eps) * F.nll_loss(log_preds, target, reduction=self.reduction,
+                                                           ignore_index=self.ignore_index)

loss/rl_loss.py ADDED Viewed

	@@ -0,0 +1,122 @@

+from typing import Optional
+import torch
+import torch.nn as nn
+def masked_mean(tensor: torch.Tensor, mask: torch.Tensor, dim: int = 1) -> torch.Tensor:
+    tensor = tensor * mask
+    tensor = tensor.sum(dim=dim)
+    mask_sum = mask.sum(dim=dim)
+    mean = tensor / (mask_sum + 1e-8)
+    return mean
+class GPTLMLoss(nn.Module):
+    """
+    GPT Language Model Loss
+    """
+    def __init__(self):
+        super().__init__()
+        self.loss = nn.CrossEntropyLoss()
+    def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor:
+        shift_logits = logits[..., :-1, :].contiguous()
+        shift_labels = labels[..., 1:].contiguous()
+        # Flatten the tokens
+        return self.loss(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
+class PolicyLoss(nn.Module):
+    """
+    Policy Loss for PPO
+    """
+    def __init__(self, clip_eps: float = 0.2) -> None:
+        super().__init__()
+        self.clip_eps = clip_eps
+    def forward(self,
+                log_probs: torch.Tensor,
+                old_log_probs: torch.Tensor,
+                advantages: torch.Tensor,
+                action_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
+        ratio = (log_probs - old_log_probs).exp()
+        surr1 = ratio * advantages
+        surr2 = ratio.clamp(1 - self.clip_eps, 1 + self.clip_eps) * advantages
+        loss = -torch.min(surr1, surr2)
+        if action_mask is not None:
+            loss = masked_mean(loss, action_mask)
+        loss = loss.mean()
+        return loss
+class ValueLoss(nn.Module):
+    """
+    Value Loss for PPO
+    """
+    def __init__(self, clip_eps: float = 0.4) -> None:
+        super().__init__()
+        self.clip_eps = clip_eps
+    def forward(self,
+                values: torch.Tensor,
+                old_values: torch.Tensor,
+                reward: torch.Tensor,
+                action_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
+        values_clipped = old_values + (values - old_values).clamp(-self.clip_eps, self.clip_eps)
+        surr1 = (values_clipped - reward)**2
+        surr2 = (values - reward)**2
+        loss = torch.max(surr1, surr2)
+        loss = loss.mean()
+        return 0.5 * loss
+class PPOPtxActorLoss(nn.Module):
+    """
+    To Do:
+    PPO-ptx Actor Loss
+    """
+    def __init__(self, policy_clip_eps: float = 0.2, pretrain_coef: float = 0.0, pretrain_loss_fn=GPTLMLoss()) -> None:
+        super().__init__()
+        self.pretrain_coef = pretrain_coef
+        self.policy_loss_fn = PolicyLoss(clip_eps=policy_clip_eps)
+        self.pretrain_loss_fn = pretrain_loss_fn
+    def forward(self,
+                log_probs: torch.Tensor,
+                old_log_probs: torch.Tensor,
+                advantages: torch.Tensor,
+                lm_logits: torch.Tensor,
+                lm_input_ids: torch.Tensor,
+                action_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
+        policy_loss = self.policy_loss_fn(log_probs, old_log_probs, advantages, action_mask=action_mask)
+        lm_loss = self.pretrain_loss_fn(lm_logits, lm_input_ids)
+        return policy_loss + self.pretrain_coef * lm_loss
+class LogSigLoss(nn.Module):
+    """
+    Pairwise Loss for Reward Model
+    Details: https://arxiv.org/abs/2203.02155
+    """
+    def forward(self, chosen_reward: torch.Tensor, reject_reward: torch.Tensor) -> torch.Tensor:
+        probs = torch.sigmoid(chosen_reward - reject_reward)
+        log_probs = torch.log(probs)
+        loss = -log_probs.mean()
+        return loss
+class LogExpLoss(nn.Module):
+    """
+    Pairwise Loss for Reward Model
+    Details: https://arxiv.org/abs/2204.05862
+    """
+    def forward(self, chosen_reward: torch.Tensor, reject_reward: torch.Tensor) -> torch.Tensor:
+        loss = torch.log(1 + torch.exp(reject_reward - chosen_reward)).mean()
+        return loss

loss/similarity_loss.py ADDED Viewed

	@@ -0,0 +1,70 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2022/03/23 16:55
+# @Author  : Jianing Wang
+# @Email   : [email protected]
+# @File    : SimilarityLoss.py
+# !/usr/bin/env python
+# coding=utf-8
+import torch
+from torch import nn, Tensor
+from transformers.models.bert.modeling_bert import BertModel
+from transformers import BertTokenizer, BertConfig
+class CosineSimilarityLoss(nn.Module):
+    """
+    CosineSimilarityLoss expects, that the InputExamples consists of two texts and a float label.
+    It computes the vectors u = model(input_text[0]) and v = model(input_text[1]) and measures the cosine-similarity between the two.
+    By default, it minimizes the following loss: ||input_label - cos_score_transformation(cosine_sim(u,v))||_2.
+    :param loss_fct: Which pytorch loss function should be used to compare the cosine_similartiy(u,v) with the input_label? By default, MSE:  ||input_label - cosine_sim(u,v)||_2
+    :param cos_score_transformation: The cos_score_transformation function is applied on top of cosine_similarity. By default, the identify function is used (i.e. no change).
+    """
+    def __init__(self, loss_fct = nn.MSELoss(), cos_score_transformation=nn.Identity()):
+        super(CosineSimilarityLoss, self).__init__()
+        self.loss_fct = loss_fct
+        self.cos_score_transformation = cos_score_transformation
+    def forward(self, rep_a, rep_b, label: Tensor):
+        # rep_a: [batch_size, hidden_dim]
+        # rep_b: [batch_size, hidden_dim]
+        output = self.cos_score_transformation(torch.cosine_similarity(rep_a, rep_b))
+        # print(output) # tensor([0.9925, 0.5846], grad_fn=<DivBackward0>), tensor(0.1709, grad_fn=<MseLossBackward0>)
+        return self.loss_fct(output, label.view(-1))
+if __name__ == "__main__":
+    # configure for huggingface pre-trained language models
+    config = BertConfig.from_pretrained("bert-base-cased")
+    # tokenizer for huggingface pre-trained language models
+    tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
+    # pytorch_model.bin for huggingface pre-trained language models
+    model = BertModel.from_pretrained("bert-base-cased")
+    # obtain two batch of examples, each corresponding example is a pair
+    examples1 = ["Beijing is one of the biggest city in China.", "Disney film is well seeing for us."]
+    examples2 = ["Shanghai is the largest city in east of China.", "ACL 2021 will be held in line due to COVID-19."]
+    label = [1, 0]
+    # convert each example for feature
+    # {"input_ids": xxx, "attention_mask": xxx, "token_tuype_ids": xxx}
+    features1 = tokenizer(examples1, add_special_tokens=True, padding=True)
+    features2 = tokenizer(examples2, add_special_tokens=True, padding=True)
+    # padding and convert to feature batch
+    max_seq_lem = 24
+    features1 = {key: torch.Tensor([value + [0] * (max_seq_lem - len(value)) for value in values]).long() for key, values in features1.items()}
+    features2 = {key: torch.Tensor([value + [0] * (max_seq_lem - len(value)) for value in values]).long() for key, values in features2.items()}
+    label = torch.Tensor(label).long()
+    # obtain sentence embedding by averaged pooling
+    rep_a = model(**features1)[0] # [batch_size, max_seq_len, hidden_dim]
+    rep_b = model(**features2)[0] # [batch_size, max_seq_len, hidden_dim]
+    rep_a = torch.mean(rep_a, -1)  # [batch_size, hidden_dim]
+    rep_b = torch.mean(rep_b, -1)  # [batch_size, hidden_dim]
+    # obtain contrastive loss
+    loss_fn = CosineSimilarityLoss()
+    loss = loss_fn(rep_a=rep_a, rep_b=rep_b, label=label)
+    print(loss) # tensor(0.1709, grad_fn=<SumBackward0>)

loss/triplet_loss.py ADDED Viewed

	@@ -0,0 +1,103 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2022/03/23 15:25
+# @Author  : Jianing Wang
+# @Email   : [email protected]
+# @File    : TripletLoss.py
+# !/usr/bin/env python
+# coding=utf-8
+from enum import Enum
+import torch
+from torch import nn, Tensor
+import torch.nn.functional as F
+from transformers.models.bert.modeling_bert import BertModel
+from transformers import BertTokenizer, BertConfig
+class TripletDistanceMetric(Enum):
+    """
+    The metric for the triplet loss
+    """
+    COSINE = lambda x, y: 1 - F.cosine_similarity(x, y)
+    EUCLIDEAN = lambda x, y: F.pairwise_distance(x, y, p=2)
+    MANHATTAN = lambda x, y: F.pairwise_distance(x, y, p=1)
+class TripletLoss(nn.Module):
+    """
+    This class implements triplet loss. Given a triplet of (anchor, positive, negative),
+    the loss minimizes the distance between anchor and positive while it maximizes the distance
+    between anchor and negative. It compute the following loss function:
+    loss = max(||anchor - positive|| - ||anchor - negative|| + margin, 0).
+    Margin is an important hyperparameter and needs to be tuned respectively.
+    @:param distance_metric: The distance metric function
+    @:param triplet_margin: (float) The margin distance
+    Input example of forward function:
+        rep_anchor: [[0.2, -0.1, ..., 0.6], [0.2, -0.1, ..., 0.6], ..., [0.2, -0.1, ..., 0.6]]
+        rep_candidate: [[0.3, 0.1, ...m -0.3], [-0.8, 1.2, ..., 0.7], ..., [-0.9, 0.1, ..., 0.4]]
+        label: [0, 1, ..., 1]
+    Return example of forward function:
+        0.015 (averged)
+        2.672 (sum)
+    """
+    def __init__(self, distance_metric=TripletDistanceMetric.EUCLIDEAN, triplet_margin: float = 0.5):
+        super(TripletLoss, self).__init__()
+        self.distance_metric = distance_metric
+        self.triplet_margin = triplet_margin
+    def forward(self, rep_anchor, rep_positive, rep_negative):
+        # rep_anchor: [batch_size, hidden_dim] denotes the representations of anchors
+        # rep_positive: [batch_size, hidden_dim] denotes the representations of positive, sometimes, it canbe dropout
+        # rep_negative: [batch_size, hidden_dim] denotes the representations of negative
+        # label: [batch_size, hidden_dim] denotes the label of each anchor - candidate pair
+        distance_pos = self.distance_metric(rep_anchor, rep_positive)
+        distance_neg = self.distance_metric(rep_anchor, rep_negative)
+        losses = F.relu(distance_pos - distance_neg + self.triplet_margin)
+        return losses.mean()
+if __name__ == "__main__":
+    # configure for huggingface pre-trained language models
+    config = BertConfig.from_pretrained("bert-base-cased")
+    # tokenizer for huggingface pre-trained language models
+    tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
+    # pytorch_model.bin for huggingface pre-trained language models
+    model = BertModel.from_pretrained("bert-base-cased")
+    # obtain two batch of examples, each corresponding example is a pair
+    anchor_example = ["I am an anchor, which is the source example sampled from corpora."] # anchor sentence
+    positive_example = [
+        "I am an anchor, which is the source example.",
+        "I am the source example sampled from corpora."
+    ] # positive, which randomly dropout or noise from anchor
+    negative_example = [
+        "It is different with the anchor.",
+        "My name is Jianing Wang, please give me some stars, thank you!"
+    ] # negative, which randomly sampled from corpora
+    # convert each example for feature
+    # {"input_ids": xxx, "attention_mask": xxx, "token_tuype_ids": xxx}
+    anchor_feature = tokenizer(anchor_example, add_special_tokens=True, padding=True)
+    positive_feature = tokenizer(positive_example, add_special_tokens=True, padding=True)
+    negative_feature = tokenizer(negative_example, add_special_tokens=True, padding=True)
+    # padding and convert to feature batch
+    max_seq_lem = 24
+    anchor_feature = {key: torch.Tensor([value + [0] * (max_seq_lem - len(value)) for value in values]).long() for key, values in anchor_feature.items()}
+    positive_feature = {key: torch.Tensor([value + [0] * (max_seq_lem - len(value)) for value in values]).long() for key, values in positive_feature.items()}
+    negative_feature = {key: torch.Tensor([value + [0] * (max_seq_lem - len(value)) for value in values]).long() for key, values in negative_feature.items()}
+    # obtain sentence embedding by averaged pooling
+    rep_anchor = model(**anchor_feature)[0] # [1, max_seq_len, hidden_dim]
+    rep_positive = model(**positive_feature)[0] # [batch_size, max_seq_len, hidden_dim]
+    rep_negative = model(**negative_feature)[0] # [batch_size, max_seq_len, hidden_dim]
+    # repeat
+    rep_anchor = torch.mean(rep_anchor, -1) # [1, hidden_dim]
+    rep_positive = torch.mean(rep_positive, -1) # [batch_size, hidden_dim]
+    rep_negative = torch.mean(rep_negative, -1) # [batch_size, hidden_dim]
+    # obtain contrastive loss
+    loss_fn = TripletLoss()
+    loss = loss_fn(rep_anchor=rep_anchor, rep_positive=rep_positive, rep_negative=rep_negative)
+    print(loss) # tensor(0.5001, grad_fn=<MeanBackward0>)