tien314 commited on
Commit
cd84a49
·
verified ·
1 Parent(s): 070efd4

Update BM25S model

Browse files
README.md CHANGED
@@ -123,9 +123,9 @@ This dataset was created using the following data:
123
 
124
  | Statistic | Value |
125
  | --- | --- |
126
- | Number of documents | 362841 |
127
- | Number of tokens | 2975453 |
128
- | Average tokens per document | 8.2 |
129
 
130
  ## Parameters
131
 
 
123
 
124
  | Statistic | Value |
125
  | --- | --- |
126
+ | Number of documents | 831507 |
127
+ | Number of tokens | 8338070 |
128
+ | Average tokens per document | 10.03 |
129
 
130
  ## Parameters
131
 
corpus.jsonl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f53137b4941dda5acc77bc9aaf71e87674bfdd8c0e210f7da39835929823189
3
- size 31663181
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16164bf429cfc58ca04d9782582ab594274462760bc2eb0cc776e07d095cc5d5
3
+ size 85381127
corpus.mmindex.json CHANGED
The diff for this file is too large to render. See raw diff
 
data.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2f9838d0102b28876970161fa72ef842c60e8745dd855a336d95f4c5cab5694
3
- size 11901940
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c698db1df7ff3cb81bc33c0e162561368ebbc3dd40cf3bc4e1ea747d1234407
3
+ size 33352408
indices.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8f216e8ad4aa8d8a5842155212809db8f6271b634722a72b23493087b2d8f12
3
- size 11901940
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1c9a4c9f1c7423f165ff28fbff534b9cb486281f7aa29246899c9eda84521dc
3
+ size 33352408
indptr.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:241b5484c5bb54f67b0b0d04b3e6d3d09a669c44c57f0ca6ed03ea630abcb43d
3
- size 577168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03d05dfbd9c6cf91f4b4d3f7047f44c573bd55fadee26ee6a4fbefc25b920fe8
3
+ size 1408676
params.index.json CHANGED
@@ -6,7 +6,7 @@
6
  "idf_method": "lucene",
7
  "dtype": "float32",
8
  "int_dtype": "int32",
9
- "num_docs": 362841,
10
  "version": "0.2.6",
11
  "backend": "numpy"
12
  }
 
6
  "idf_method": "lucene",
7
  "dtype": "float32",
8
  "int_dtype": "int32",
9
+ "num_docs": 831507,
10
  "version": "0.2.6",
11
  "backend": "numpy"
12
  }
vocab.index.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1d934afdf72d46521d9543421eda21214c8dcbf975c20f20841c0e63e13ce8e
3
- size 2437143
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:736514f073d877676ddb3bb40b61362d0d9974b4e0826c84ed13ccd47e59b414
3
+ size 6301373