j-tobias commited on
Commit
61ba593
·
1 Parent(s): 378c937

added Model Cards

Browse files
Files changed (4) hide show
  1. app.py +17 -11
  2. cards.txt +15 -2
  3. model.py +4 -1
  4. utils.py +10 -10
app.py CHANGED
@@ -1,12 +1,17 @@
1
  import gradio as gr # needs to be installed
2
- import os
3
  from dataset import Dataset
4
  from model import Model
5
- from huggingface_hub import login
6
  from utils import compute_wer
7
 
 
 
 
 
 
 
8
  hf_token = os.getenv("HF_Token")
9
- login(token=hf_token, add_to_git_credential=True)
 
10
 
11
  dataset = Dataset()
12
  models = Model()
@@ -33,14 +38,15 @@ def eval(data_subset:str, model_1:str, model_2:str)->str:
33
 
34
  def get_card(selected_model:str)->str:
35
 
36
- if selected_model == "None":
37
- return ""
38
- elif selected_model == "Model2":
39
- return "A very good model indeed"
40
- elif selected_model == "Model3":
41
- return "Also very good"
42
- else:
43
- return "Unknown Model"
 
44
 
45
  def is_own(data_subset:str):
46
  if data_subset == "own":
 
1
  import gradio as gr # needs to be installed
 
2
  from dataset import Dataset
3
  from model import Model
 
4
  from utils import compute_wer
5
 
6
+ # from utils import hf_login
7
+ # hf_login()
8
+
9
+ from huggingface_hub import login
10
+ import os
11
+
12
  hf_token = os.getenv("HF_Token")
13
+ login(hf_token)
14
+
15
 
16
  dataset = Dataset()
17
  models = Model()
 
38
 
39
  def get_card(selected_model:str)->str:
40
 
41
+ with open("cards.txt", "r") as f:
42
+ cards = f.read()
43
+
44
+ cards = cards.split("@@")
45
+ for card in cards:
46
+ if "ID: "+selected_model in card:
47
+ return card
48
+
49
+ return "Unknown Model"
50
 
51
  def is_own(data_subset:str):
52
  if data_subset == "own":
cards.txt CHANGED
@@ -1,5 +1,18 @@
1
  #### Whisper Tiny (EN)
2
-
3
- -
 
 
 
 
 
4
  @@
 
 
 
 
 
 
 
 
5
  @@
 
1
  #### Whisper Tiny (EN)
2
+ - ID: openai/whisper-tiny.en
3
+ - Hugging Face: [model](https://huggingface.co/openai/whisper-tiny.en)
4
+ - Creator: openai
5
+ - Finetuned: No
6
+ - Model Size: 39 M Parameters
7
+ - Model Paper: [Robust Speech Recognition via Large-Scale Weak Supervision](https://cdn.openai.com/papers/whisper.pdf)
8
+ - Training Data: The models are trained on 680,000 hours of audio and the corresponding transcripts collected from the internet. 65% of this data (or 438,000 hours) represents English-language audio and matched English transcripts, roughly 18% (or 126,000 hours) represents non-English audio and English transcripts, while the final 17% (or 117,000 hours) represents non-English audio and the corresponding transcript. This non-English data represents 98 different languages.
9
  @@
10
+ #### S2T Medium ASR
11
+ - ID: facebook/s2t-medium-librispeech-asr
12
+ - Hugging Face: [model](https://huggingface.co/facebook/s2t-medium-librispeech-asr)
13
+ - Creator: facebook
14
+ - Finetuned: No
15
+ - Model Size: 71.2 M Parameters
16
+ - Model Paper: [fairseq S2T: Fast Speech-to-Text Modeling with fairseq](https://arxiv.org/abs/2010.05171)
17
+ - Training Data: [LibriSpeech ASR Corpus](https://www.openslr.org/12)
18
  @@
model.py CHANGED
@@ -97,8 +97,11 @@ class Model:
97
  predictions = []
98
  references = []
99
 
 
 
 
100
  for sample in result:
101
  predictions.append(sample['transcription'])
102
- references.append(sample['text'])
103
 
104
  return references, predictions
 
97
  predictions = []
98
  references = []
99
 
100
+ DaTaSeT._check_text()
101
+ text_column = DaTaSeT.text
102
+
103
  for sample in result:
104
  predictions.append(sample['transcription'])
105
+ references.append(sample[text_column])
106
 
107
  return references, predictions
utils.py CHANGED
@@ -1,15 +1,15 @@
1
- # from huggingface_hub import login
2
- # import json
3
  import evaluate
4
- # import os
5
 
6
- # def hf_login():
7
- # hf_token = os.getenv("HF_Token")
8
- # print(hf_token)
9
- # # if hf_token is None:
10
- # # with open("credentials.json", "r") as f:
11
- # # hf_token = json.load(f)["token"]
12
- # login(token=hf_token, add_to_git_credential=True)
13
 
14
  def data(dataset):
15
  for i, item in enumerate(dataset):
 
1
+ from huggingface_hub import login
2
+ import json
3
  import evaluate
4
+ import os
5
 
6
+ def hf_login():
7
+ hf_token = os.getenv("HF_Token")
8
+ print(hf_token)
9
+ if hf_token is None:
10
+ with open("credentials.json", "r") as f:
11
+ hf_token = json.load(f)["token"]
12
+ login(token=hf_token, add_to_git_credential=True)
13
 
14
  def data(dataset):
15
  for i, item in enumerate(dataset):