tien314 commited on
Commit
a477eff
·
verified ·
1 Parent(s): eff795c

Update BM25S model

Browse files
README.md CHANGED
@@ -123,9 +123,9 @@ This dataset was created using the following data:
123
 
124
  | Statistic | Value |
125
  | --- | --- |
126
- | Number of documents | 210801 |
127
- | Number of tokens | 1247449 |
128
- | Average tokens per document | 5.92 |
129
 
130
  ## Parameters
131
 
 
123
 
124
  | Statistic | Value |
125
  | --- | --- |
126
+ | Number of documents | 347536 |
127
+ | Number of tokens | 2831191 |
128
+ | Average tokens per document | 8.15 |
129
 
130
  ## Parameters
131
 
corpus.jsonl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34a38e542a64bfd8e7e4f75c31ec7f9cae03b26196701e40904bc580cd971150
3
- size 14491479
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70cbea160b6e2b451f7895ef633be9f65f8f277a4cf719f2d6d1307e07b18944
3
+ size 29870605
corpus.mmindex.json CHANGED
The diff for this file is too large to render. See raw diff
 
data.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99d46b75c6d39fd6b858be4b194d6385f77073896e33955d707086366a09801c
3
- size 4989924
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10f3d0fd57448dd4dcd62cf39dfc704a7e351a384b3dfce6c7d592c598194e84
3
+ size 11324892
indices.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d615201000cc91e6961a8b2a102d79cffbfa87610645fbfe414753bcc592d6e
3
- size 4989924
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fd3bbbecb5cb9da6f930c947e99d143de87670d8f06f2929fbf217d26703450
3
+ size 11324892
indptr.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee295a3f6f37a7988013298b018103c98aef8d5861590e4d87391285d40d266c
3
- size 248616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52277c0ab954d867d9d502276cc4236c6a4ffa6e81d4af199fd26a4a686589fa
3
+ size 577132
params.index.json CHANGED
@@ -6,7 +6,7 @@
6
  "idf_method": "lucene",
7
  "dtype": "float32",
8
  "int_dtype": "int32",
9
- "num_docs": 210801,
10
  "version": "0.2.6",
11
  "backend": "numpy"
12
  }
 
6
  "idf_method": "lucene",
7
  "dtype": "float32",
8
  "int_dtype": "int32",
9
+ "num_docs": 347536,
10
  "version": "0.2.6",
11
  "backend": "numpy"
12
  }
vocab.index.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03d5613c6c043f095d746de405aab145523713820d2756469e919e871e02f5e4
3
- size 995887
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5262817bfcf340fbc58ca8779dce06fc99093abe768db9ebabb1099bbccb0774
3
+ size 2436979