Spaces:

Vertaix
/

vendiscore

Build error

App Files Files Community

danf0 commited on Aug 30, 2022

Commit

c3f0353

1 Parent(s): 1b92067

Update vendiscore.

Browse files

Files changed (2) hide show

requirements.txt +11 -1
vendiscore.py +98 -47

requirements.txt CHANGED Viewed

	@@ -1 +1,11 @@
1	- git+https://github.com/huggingface/evaluate@main

+git+https://github.com/huggingface/evaluate@main
+numpy>=1.13
+scipy>=1.3.2
+scikit-learn>=1.1
+torch
+torchvision
+matplotlib
+transformers
+datasets
+nltk
+vendi_score

vendiscore.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,49 +15,62 @@
 import evaluate
 import datasets
 # TODO: Add BibTeX citation
-_CITATION = """\
-@InProceedings{huggingface:module,
-title = {A great new module},
-authors={huggingface, Inc.},
-year={2020}
-}
-"""
-# TODO: Add description of the module here
 _DESCRIPTION = """\
-This new module is designed to solve this great ML task and is crafted with a lot of care.
 """
-# TODO: Add description of the arguments of the module here
 _KWARGS_DESCRIPTION = """
-Calculates how good are predictions given some references, using certain scores
 Args:
-    predictions: list of predictions to score. Each predictions
-        should be a string with tokens separated by spaces.
-    references: list of reference for each prediction. Each
-        reference should be a string with tokens separated by spaces.
 Returns:
-    accuracy: description of the first score,
-    another_score: description of the second score,
 Examples:
-    Examples should be written in doctest format, and should illustrate how
-    to use the function.
-    >>> my_new_module = evaluate.load("my_new_module")
-    >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
     >>> print(results)
-    {'accuracy': 1.0}
 """
-# TODO: Define external resources urls if needed
-BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
-@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class VendiScore(evaluate.Metric):
     """TODO: Short description of my evaluation module."""
@@ -69,27 +82,65 @@ class VendiScore(evaluate.Metric):
             description=_DESCRIPTION,
             citation=_CITATION,
             inputs_description=_KWARGS_DESCRIPTION,
-            # This defines the format of each prediction and reference
-            features=datasets.Features({
-                'predictions': datasets.Value('int64'),
-                'references': datasets.Value('int64'),
-            }),
-            # Homepage of the module for documentation
-            homepage="http://module.homepage",
-            # Additional links to the codebase or references
-            codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
-            reference_urls=["http://path.to.reference.url/new_module"]
         )
     def _download_and_prepare(self, dl_manager):
         """Optional: download external resources useful to compute the scores"""
-        # TODO: Download external resources if needed
         pass
-    def _compute(self, predictions, references):
-        """Returns the scores"""
-        # TODO: Compute the different scores of the module
-        accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
-        return {
-            "accuracy": accuracy,
-        }

+# Copyright 2022 The HuggingFace Datasets Authors and the current dataset script contributor.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 import evaluate
 import datasets
+import numpy as np
+from vendi_score import vendi, image_utils, text_utils
 # TODO: Add BibTeX citation
+_CITATION = ""
 _DESCRIPTION = """\
+A diversity evaluation metric for machine learning.
 """
 _KWARGS_DESCRIPTION = """
+Calculates the Vendi Score given samples and a similarity function.
 Args:
+   samples: list of n sentences to score, an n x n similarity matrix K, or
+       an n x d feature matrix X.
+   k: a pairwise similarity function, or a string identifying a predefined
+       similarity function.
+       Options: ngram_overlap, text_embeddings, pixels, image_embeddings.
+   score_K: if true, samples is an n x n similarity matrix K.
+   score_X: if true, samples is an n x d feature matrix X.
+   score_dual: if true, compute diversity score of X @ X.T.
+   normalize: if true, normalize the similarity scores.
+   model (optional): if k is "text_embeddings", a model mapping sentences to
+       embeddings (output should be an object with an attribute called
+       `pooler_output` or `last_hidden_state`). If k is "image_embeddings", a
+       model mapping images to embeddings.
+   tokenizer (optional): if k is "text_embeddings" or "ngram_overlap", a
+       tokenizer mapping strings to lists.
+   transform (optional): if k is "image_embeddings", a torchvision transform
+       to apply to the samples.
+   model_path (optional): if k is "text_embeddings", the name of a model on the
+       HuggingFace hub.
+   ns (optional): if k is "ngram_overlap", the values of n to calculate.
+   batch_size (optional): batch size to use if k is "text_embedding" or
+       "image_embedding".
+   device (optional): a string (e.g. "cuda", "cpu") or torch.device identifying
+       the device to use if k is "text_embedding or "image_embedding".
 Returns:
+    VS: The Vendi Score.
 Examples:
+    >>> vendi_score = evaluate.load("vendi_score")
+    >>> samples = ["Look, Jane.",
+                   "See Spot.",
+                   "See Spot run.",
+                   "Run, Spot, run.",
+	           "Jane sees Spot run."]
+    >>> results = vendi_score.compute(samples, k="ngram_overlap", ns=[1, 2])
     >>> print(results)
+    {'VS': 3.90657...}
 """
+@evaluate.utils.file_utils.add_start_docstrings(
+    _DESCRIPTION, _KWARGS_DESCRIPTION
+)
 class VendiScore(evaluate.Metric):
     """TODO: Short description of my evaluation module."""
             description=_DESCRIPTION,
             citation=_CITATION,
             inputs_description=_KWARGS_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "samples": datasets.Value("string"),
+                }
+            ),
+            homepage="http://github.com/Vertaix/Vendi-Score",
+            codebase_urls=["http://github.com/Vertaix/Vendi-Score"],
+            reference_urls=[],
         )
     def _download_and_prepare(self, dl_manager):
         """Optional: download external resources useful to compute the scores"""
         pass
+    def _compute(
+        self,
+        samples,
+        k="ngram_overlap",
+        score_K=False,
+        score_X=False,
+        score_dual=False,
+        normalize=False,
+        model=None,
+        tokenizer=None,
+        transform=None,
+        model_path=None,
+        ns=[1, 2],
+        batch_size=16,
+        device="cpu",
+    ):
+        if score_K:
+            vs = vendi.score_K(samples, normalize=normalize)
+        elif score_dual:
+            vs = vendi.score_dual(samples, normalize=normalize)
+        elif score_X:
+            vs = vendi.score_X(samples, normalize=normalize)
+        elif type(k) == str and k == "ngram_overlap":
+            vs = text_utils.ngram_vendi_score(
+                samples, ns=ns, tokenizer=tokenizer
+            )
+        elif type(k) == str and k == "text_embeddings":
+            vs = text_utils.embedding_vendi_score(
+                samples,
+                model=model,
+                tokenizer=tokenizer,
+                batch_size=batch_size,
+                device=device,
+                model_path=model_path,
+            )
+        elif type(k) == str and k == "pixels":
+            vs = image_utils.pixel_vendi_score(samples)
+        elif type(k) == str and k == "image_embeddings":
+            vs = image_utils.embedding_vendi_score(
+                samples,
+                batch_size=batch_size,
+                device=device,
+                model=model,
+                transform=transform,
+            )
+        else:
+            vs = vendi.score(samples, k)
+        return {"VS": vs}