Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- 1_Pooling/config.json +10 -0
- README.md +760 -0
- config.json +28 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- sentencepiece.bpe.model +3 -0
- special_tokens_map.json +51 -0
- tokenizer.json +3 -0
- tokenizer_config.json +54 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 1024,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,760 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:32378
|
8 |
+
- loss:MultipleNegativesRankingLoss
|
9 |
+
base_model: intfloat/multilingual-e5-large
|
10 |
+
widget:
|
11 |
+
- source_sentence: 'query: What is the maximum amount of Elective Deferral Contributions
|
12 |
+
that a Participant can make in a taxable year?'
|
13 |
+
sentences:
|
14 |
+
- no event later than the time permitted by the Code with any applicable extensions.
|
15 |
+
3.3. Maximum Deferral. Notwithstanding any other provision of the Plan to the
|
16 |
+
contrary the Plan (and any other plan maintained by the Employer) shall not accept
|
17 |
+
Elective Deferral Contributions under Section3.1.1 for any taxable year of a Participant
|
18 |
+
in excess of $10,500, as adjusted under section 402(g) of the Code. For the taxable
|
19 |
+
year following the taxable year in which a Participant receives a hardship withdrawal,
|
20 |
+
the amount determined under the preceding sentence shall be reduced by the amount
|
21 |
+
of such Participant’s Elective Deferral Contributions for the taxable year during
|
22 |
+
which the hardship withdrawal occurs. 3.3.1. Distribution of Excess Elective Deferrals.
|
23 |
+
If a Participant has Excess Deferrals for a taxable year of that Participant,
|
24 |
+
- on Catch-up Contributions under Code Section 414(v)(2)(B)(i) was $5,000 for taxable
|
25 |
+
years beginning in 2006.After 2006, the $5,000 limit is adjusted by the Secretary
|
26 |
+
of the Treasury for cost-of-living increases under Code Section 414(v)(2)(C).Any
|
27 |
+
such adjustments will be in multiples of $500. Elective Deferral Contributions
|
28 |
+
are 100% vested and nonforfeitable. (b) The Employer shall make Qualified Nonelective
|
29 |
+
Contributions in an amount equal to 3% of Compensation for the Plan Year for persons
|
30 |
+
who were Active Participants at any time during the Plan Year. Qualified Nonelective
|
31 |
+
Contributions are 100% vested when made. RESTATEMENT MAY 15, 2014 23 ARTICLE III
|
32 |
+
(5-22134) --------------------------------------------------------------------------------
|
33 |
+
(c) Discretionary Contributions may be made for each Plan Year in an amount determined
|
34 |
+
by the Employer. Discretionary Contributions are subject to the Vesting Percentage.
|
35 |
+
Employer Contributions are
|
36 |
+
- direction in connection with Employer’s business are “works made for hire” and
|
37 |
+
shall be the sole and complete property of Employer and those any and all copyrights
|
38 |
+
to such works shall belong to Employer. To the extent any of the works described
|
39 |
+
in the preceding sentence are not deemed to be “works made for hire”, Employee
|
40 |
+
hereby assigns all proprietary rights, including copyright, in these works to
|
41 |
+
Employer without further compensation. (b)Employee further agrees to (i) disclose
|
42 |
+
promptly to Employer all such Creations which Employee has made or may make solely,
|
43 |
+
jointly or commonly with others during the period of employment to the extent
|
44 |
+
connected with Employer’s business, (ii) assign all such Creations to Employer
|
45 |
+
and (iii) execute and sign any and all applications, assignments or other instruments
|
46 |
+
- source_sentence: 'query: What is the maximum rate at which a stock loan borrow fee
|
47 |
+
can be charged?'
|
48 |
+
sentences:
|
49 |
+
- nn,nnn.nn Invoice Dollars for payment will be two (2) nnn,nnn.nn Parts per million
|
50 |
+
(ppm) will be two (2) nn.nn All ratios will be two (2) nn.nn All percents (%)will
|
51 |
+
be two (2) nn.nn All degrees Fahrenheit (0F) will be zero (0) n,nnn. SO2 lbs/MMBtu
|
52 |
+
will be two (2) nn.nn Price in Dollars per ton will be four (4) nn.nnnn Quality
|
53 |
+
Dollars per ton will be three (3) n.nnn Items not specified above will use the
|
54 |
+
industry standards for significant digits to the right of the decimal place. 5.4
|
55 |
+
Beneficiated or Washed Coal. Seller shall provide an analysis indicating Btu/lb,
|
56 |
+
moisture, Sulfur, and ash on all raw Coal which is subjected to beneficiation
|
57 |
+
and/or washing to produce Coal meeting the Specifications. Within five (5)Business
|
58 |
+
Days following the end of
|
59 |
+
- '(b) any stock loan borrow fee that would be payable for such Shares, expressed
|
60 |
+
as fixed rate per annum. Maximum Stock Loan Rate: 50 basis points. Increased Cost
|
61 |
+
of Stock Borrow: Not Applicable. --------------------------------------------------------------------------------
|
62 |
+
FED FUNDS: “FED FUNDS” means, for any day, the rate set forth for such day opposite
|
63 |
+
the caption “Federal funds”, as such rate is displayed on the page “FedsOpen <Index>
|
64 |
+
<GO>” on the BLOOMBERG Professional Service, or any successor page; provided that
|
65 |
+
if no rate appears for any day on such page, the rate for the immediately preceding
|
66 |
+
day for which a rate does so appear shall be used for such day. Hedging Party:
|
67 |
+
Barclays or an affiliate of Barclays that is involved in the hedging of the Transaction
|
68 |
+
for all applicable Additional Disruption'
|
69 |
+
- Term Loan will be secured as provided in Section 6. In addition to interest as
|
70 |
+
set forth herein, Borrower shall pay to Bank a late charge equal to the greater
|
71 |
+
of (i) five percent (5.00%) of the total payment due, or (ii) $10.00 per late
|
72 |
+
charge, in the event any such amount is not paid within one (1) day after the
|
73 |
+
date when due. 1 1.3Line of Credit. Subject at all times to the terms and limitations
|
74 |
+
set forth herein, the Bank agrees to extend credit to the Borrower on a revolving
|
75 |
+
basis from time to time prior to the Expiration Date in one or more Advances in
|
76 |
+
an aggregate principal amount not to exceed at any time outstanding Two Million
|
77 |
+
Dollars and No/100 Cents ($2,000,000.00) (the “Maximum
|
78 |
+
- source_sentence: 'query: What transfer taxes are the undersigned responsible for
|
79 |
+
paying in the event of a conversion?'
|
80 |
+
sentences:
|
81 |
+
- 'undersigned, the undersigned will pay all transfer taxes payable with respect
|
82 |
+
thereto and is delivering herewith such certificates and opinions as reasonably
|
83 |
+
requested by the Company in accordance therewith. No fee will be charged to the
|
84 |
+
holder for any conversion, except for such transfer taxes, if any. By the delivery
|
85 |
+
of this Notice of Conversion the undersigned represents and warrants to the Company
|
86 |
+
that its ownership of the Common Stock does not exceed the amounts specified under
|
87 |
+
Section 4 of this Debenture, as determined in accordance with Section 13(d) of
|
88 |
+
the Exchange Act. The undersigned agrees to comply with the prospectus delivery
|
89 |
+
requirements under the applicable securities laws in connection with any transfer
|
90 |
+
of the aforesaid shares of Common Stock. Conversion calculations: Date to Effect
|
91 |
+
Conversion: Principal Amount'
|
92 |
+
- 'second copy of this letter is provided for your records. Timothy, we are very
|
93 |
+
enthusiastic about you joining Enzon and look forward to a mutually rewarding
|
94 |
+
working relationship. We believe we can offer you opportunities that challenge
|
95 |
+
and reward you and we look forward to your acceptance of this letter agreement.
|
96 |
+
Sincerely, /s/ Andrew Rackear --------------------------------------------------------------------------------
|
97 |
+
Andrew Rackear VP & General Counsel Enzon Pharmaceuticals, Inc. Accepted and agreed:
|
98 |
+
/s/ Timothy G. Daly --------------------------------------------------------------------------------
|
99 |
+
Timothy G. Daly Date: 11/23/2011 --------------------------------------------------------------------------------'
|
100 |
+
- DATE AN AMOUNT UNDER SECTION2(D)(I)(4)OF THE AGREEMENT GREATER THAN AN AMOUNT
|
101 |
+
THAT BANK WOULD HAVE BEEN REQUIRED TO PAY TO COUNTERPARTY IN THE ABSENCE OF SUCH
|
102 |
+
TRANSFER AND ASSIGNMENT; (E) AN EVENT OF DEFAULT, POTENTIAL EVENT OF DEFAULT OR
|
103 |
+
TERMINATION EVENT WILL NOT OCCUR AS A RESULT OF SUCH TRANSFER AND ASSIGNMENT;
|
104 |
+
(F) WITHOUT LIMITING THE GENERALITY OF CLAUSE (B), COUNTERPARTY SHALL CAUSE THE
|
105 |
+
TRANSFEREE TO MAKE SUCH PAYEE TAX REPRESENTATIONS AND TO PROVIDE SUCH TAX DOCUMENTATION
|
106 |
+
AS MAY BE REASONABLY REQUESTED BY BANK TO PERMIT BANK TO DETERMINE THAT RESULTS
|
107 |
+
DESCRIBED IN CLAUSES (D)AND (E)WILL NOT OCCUR UPON OR AFTER SUCH TRANSFER AND
|
108 |
+
ASSIGNMENT; AND (G) COUNTERPARTY SHALL BE RESPONSIBLE FOR ALL REASONABLE COSTS
|
109 |
+
AND EXPENSES, INCLUDING REASONABLE COUNSEL FEES, INCURRED BY BANK IN CONNECTION
|
110 |
+
WITH SUCH
|
111 |
+
- source_sentence: 'query: What is the correct order for reducing payments under Section
|
112 |
+
9(c) of the employment agreement?'
|
113 |
+
sentences:
|
114 |
+
- 'Exhibit 10.52 December18, 2008 Jeffery H. Boyd c/o priceline.com Incorporated
|
115 |
+
800 Connecticut Avenue Norwalk, CT 06854 Dear Jeff: This letter serves to amend
|
116 |
+
an incorrect cross-reference in Section16 of the amended and restated employment
|
117 |
+
agreement between you and priceline.com Incorporated (the “Company”), dated August22,
|
118 |
+
2008 (the “Employment Agreement”). Capitalized terms that are used herein shall
|
119 |
+
have the same meaning as those terms used in the Employment Agreement. Effective
|
120 |
+
as of the date hereof, the second sentence of the second paragraph of Section16(a)shall
|
121 |
+
be amended in its entirety to read as follows: “The reduction of the amounts payable
|
122 |
+
hereunder, if applicable, shall be made by reducing the payments under Section9(c)(i)or
|
123 |
+
Section9(c)(ii), as applicable, in the following order: first, the payments under
|
124 |
+
clause (A), second, the payments under clause (E),'
|
125 |
+
- under all circumstances, irrespective of any lack of validity or unenforceability
|
126 |
+
of any Loan Documents; any draft, certificate or other document presented under
|
127 |
+
an Australian Letter of Credit having been determined to be forged, fraudulent,
|
128 |
+
invalid or insufficient in any respect or any statement therein being untrue or
|
129 |
+
inaccurate in any respect; or the existence of any setoff or defense that any
|
130 |
+
Loan Party may have with respect to any Obligations. No Australian Fronting Bank
|
131 |
+
assumes any responsibility for any failure or delay in performance or any breach
|
132 |
+
by any Australian Borrower or other Person of any obligations under any Australian
|
133 |
+
LC Documents. No Australian Fronting Bank makes any express or implied warranty,
|
134 |
+
representation or guarantee to Australian Lenders with respect to the Australian
|
135 |
+
Facility Collateral, the Australian
|
136 |
+
- 'of the Agreement is hereby amended by deleting the first sentence thereof and
|
137 |
+
inserting the following sentence in its place: Notwithstanding the foregoing,
|
138 |
+
if the payment required to be paid under this Section9(f)(i), when considered
|
139 |
+
either alone or with other payments paid or imputed to the Executive from Wintrust
|
140 |
+
or an Affiliate that would be deemed “excess parachute payments” under Section280G(b)(1)
|
141 |
+
of the Code is deemed by Wintrust to be a “parachute payment” under Section280G(b)(2)
|
142 |
+
of the Code, then the amount of Severance Pay required to be paid under this Section9(f)(i)
|
143 |
+
shall be automatically reduced in order of scheduled payments to an amount equal
|
144 |
+
to $1.00 less than three times (3x)the “base amount” (as defined in Section280G(3)
|
145 |
+
of the Code) (the “Reduced Amount”). Amendment 3.1 --------------------------------------------------------------------------------
|
146 |
+
3.Section9(i) of'
|
147 |
+
- source_sentence: 'query: What is the lump sum payment for professional outplacement
|
148 |
+
services that the Executive will receive?'
|
149 |
+
sentences:
|
150 |
+
- be entitled to receive continuing group medical coverage for himself and his dependents
|
151 |
+
(on a non-taxable basis, including if necessary, payment of any gross-up payments
|
152 |
+
necessary to result in net non-taxable benefits), which coverage is not materially
|
153 |
+
less favorable to the Executive than the group medical coverage which was provided
|
154 |
+
to the Executive by the Company or its affiliates immediately prior to the Termination
|
155 |
+
Date. To the extent applicable and to the extent permitted by law, any continuing
|
156 |
+
coverage provided to the Executive and/or his dependents pursuant to this subparagraph
|
157 |
+
(iii) shall be considered part of, and not in addition to, any coverage required
|
158 |
+
under COBRA. (iv) The Executive will be provided with a lump sum payment of $12,000
|
159 |
+
for professional outplacement services. Notice by the Company that
|
160 |
+
- vested amounts, if any, to which the Executive is entitled under the Savings Plan
|
161 |
+
as of the Date of Termination, the Company will pay the Executive, in accordance
|
162 |
+
with Section3.04, a lump sum amount equal to the value of the unvested portion,
|
163 |
+
if any, of the employer matching and fixed contributions (and attributable earnings)
|
164 |
+
credited to the Executive under the Savings Plan. 8 --------------------------------------------------------------------------------
|
165 |
+
(f) Outplacement Services. For a period not to exceed six (6)months following
|
166 |
+
the Date of Termination, the Company will provide the Executive with reasonable
|
167 |
+
outplacement services consistent with past practices of the Company prior to the
|
168 |
+
Change in Control or, if no past practice has been established prior to the Change
|
169 |
+
in Control, consistent with the prevailing practice in the medical device manufacturing
|
170 |
+
industry.
|
171 |
+
- 'the rights granted to MICL under the License Agreement. “Supply Interruption”
|
172 |
+
has the meaning set forth in Section4.2. “Technical Agreement” means the agreement
|
173 |
+
entered into by the Parties as of [***], and appended hereto as Exhibit B, as
|
174 |
+
may be amended by the Parties from time to time. The Parties agree to amend and
|
175 |
+
restate the Technical Agreement within [***] of the Amendment Effective Date in
|
176 |
+
order to align it with the provisions of this Agreement and the License Agreement.
|
177 |
+
“Term” has the meaning set forth in Section2. [***]: CONFIDENTIAL PORTIONS OMITTED
|
178 |
+
AND FILED SEPARATELY WITH THE COMMISSION. 8 --------------------------------------------------------------------------------
|
179 |
+
“Testing Laboratory” means [***], or such other independent testing facility approved
|
180 |
+
in the appropriate jurisdiction in the Licensed Territory as may be agreed by
|
181 |
+
the Parties through the'
|
182 |
+
pipeline_tag: sentence-similarity
|
183 |
+
library_name: sentence-transformers
|
184 |
+
---
|
185 |
+
|
186 |
+
# SentenceTransformer based on intfloat/multilingual-e5-large
|
187 |
+
|
188 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) on the json dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
189 |
+
|
190 |
+
## Model Details
|
191 |
+
|
192 |
+
### Model Description
|
193 |
+
- **Model Type:** Sentence Transformer
|
194 |
+
- **Base model:** [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) <!-- at revision ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb -->
|
195 |
+
- **Maximum Sequence Length:** 512 tokens
|
196 |
+
- **Output Dimensionality:** 1024 dimensions
|
197 |
+
- **Similarity Function:** Cosine Similarity
|
198 |
+
- **Training Dataset:**
|
199 |
+
- json
|
200 |
+
<!-- - **Language:** Unknown -->
|
201 |
+
<!-- - **License:** Unknown -->
|
202 |
+
|
203 |
+
### Model Sources
|
204 |
+
|
205 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
206 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
207 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
208 |
+
|
209 |
+
### Full Model Architecture
|
210 |
+
|
211 |
+
```
|
212 |
+
SentenceTransformer(
|
213 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: PeftModelForFeatureExtraction
|
214 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
215 |
+
(2): Normalize()
|
216 |
+
)
|
217 |
+
```
|
218 |
+
|
219 |
+
## Usage
|
220 |
+
|
221 |
+
### Direct Usage (Sentence Transformers)
|
222 |
+
|
223 |
+
First install the Sentence Transformers library:
|
224 |
+
|
225 |
+
```bash
|
226 |
+
pip install -U sentence-transformers
|
227 |
+
```
|
228 |
+
|
229 |
+
Then you can load this model and run inference.
|
230 |
+
```python
|
231 |
+
from sentence_transformers import SentenceTransformer
|
232 |
+
|
233 |
+
# Download from the 🤗 Hub
|
234 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
235 |
+
# Run inference
|
236 |
+
sentences = [
|
237 |
+
'query: What is the lump sum payment for professional outplacement services that the Executive will receive?',
|
238 |
+
'be entitled to receive continuing group medical coverage for himself and his dependents (on a non-taxable basis, including if necessary, payment of any gross-up payments necessary to result in net non-taxable benefits), which coverage is not materially less favorable to the Executive than the group medical coverage which was provided to the Executive by the Company or its affiliates immediately prior to the Termination Date. To the extent applicable and to the extent permitted by law, any continuing coverage provided to the Executive and/or his dependents pursuant to this subparagraph (iii) shall be considered part of, and not in addition to, any coverage required under COBRA. (iv) The Executive will be provided with a lump sum payment of $12,000 for professional outplacement services. Notice by the Company that',
|
239 |
+
'vested amounts, if any, to which the Executive is entitled under the Savings Plan as of the Date of Termination, the Company will pay the Executive, in accordance with Section3.04, a lump sum amount equal to the value of the unvested portion, if any, of the employer matching and fixed contributions (and attributable earnings) credited to the Executive under the Savings Plan. 8 -------------------------------------------------------------------------------- (f) Outplacement Services. For a period not to exceed six (6)months following the Date of Termination, the Company will provide the Executive with reasonable outplacement services consistent with past practices of the Company prior to the Change in Control or, if no past practice has been established prior to the Change in Control, consistent with the prevailing practice in the medical device manufacturing industry.',
|
240 |
+
]
|
241 |
+
embeddings = model.encode(sentences)
|
242 |
+
print(embeddings.shape)
|
243 |
+
# [3, 1024]
|
244 |
+
|
245 |
+
# Get the similarity scores for the embeddings
|
246 |
+
similarities = model.similarity(embeddings, embeddings)
|
247 |
+
print(similarities.shape)
|
248 |
+
# [3, 3]
|
249 |
+
```
|
250 |
+
|
251 |
+
<!--
|
252 |
+
### Direct Usage (Transformers)
|
253 |
+
|
254 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
255 |
+
|
256 |
+
</details>
|
257 |
+
-->
|
258 |
+
|
259 |
+
<!--
|
260 |
+
### Downstream Usage (Sentence Transformers)
|
261 |
+
|
262 |
+
You can finetune this model on your own dataset.
|
263 |
+
|
264 |
+
<details><summary>Click to expand</summary>
|
265 |
+
|
266 |
+
</details>
|
267 |
+
-->
|
268 |
+
|
269 |
+
<!--
|
270 |
+
### Out-of-Scope Use
|
271 |
+
|
272 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
273 |
+
-->
|
274 |
+
|
275 |
+
<!--
|
276 |
+
## Bias, Risks and Limitations
|
277 |
+
|
278 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
279 |
+
-->
|
280 |
+
|
281 |
+
<!--
|
282 |
+
### Recommendations
|
283 |
+
|
284 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
285 |
+
-->
|
286 |
+
|
287 |
+
## Training Details
|
288 |
+
|
289 |
+
### Training Dataset
|
290 |
+
|
291 |
+
#### json
|
292 |
+
|
293 |
+
* Dataset: json
|
294 |
+
* Size: 32,378 training samples
|
295 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
296 |
+
* Approximate statistics based on the first 1000 samples:
|
297 |
+
| | anchor | positive | negative |
|
298 |
+
|:--------|:----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
299 |
+
| type | string | string | string |
|
300 |
+
| details | <ul><li>min: 12 tokens</li><li>mean: 25.6 tokens</li><li>max: 47 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 190.52 tokens</li><li>max: 512 tokens</li></ul> | <ul><li>min: 53 tokens</li><li>mean: 202.25 tokens</li><li>max: 485 tokens</li></ul> |
|
301 |
+
* Samples:
|
302 |
+
| anchor | positive | negative |
|
303 |
+
|:-----------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
304 |
+
| <code>query: What is the effective date of the Fifth Amendment to the Approach Resources Inc. 2007 Stock Incentive Plan?</code> | <code>Exhibit 10.1 FIFTH AMENDMENT TO THE APPROACH RESOURCES INC. 2007 STOCK INCENTIVE PLAN This Fifth Amendment (the “Fifth Amendment”) to the Approach Resources Inc. 2007 Stock Incentive Plan, as amended from time to time (the “Plan”), is made effective as of June 2, 2016 (the “Amendment Effective Date”), by Approach Resources Inc., a Delaware corporation (“Approach”), subject to approval by Approach’s stockholders. W I T N E S S E T H: WHEREAS, Approach established the Plan, originally effective as of June 28, 2007 and most recently amended effective March 2, 2016, under which Approach is authorized to grant equity-based incentive awards to certain employees and service providers of Approach and its subsidiaries; WHEREAS, Section 14.1 of the Plan provides that Approach’s board of directors (the “Board”) may</code> | <code>Exhibit 10.39 AMENDMENT TO THE BPZ RESOURCES, INC. 2007 LONG-TERM INCENTIVE COMPENSATION PLAN WHEREAS, BPZ Resources,Inc. (the “Company”) adopted and maintains the BPZ Energy,Inc. 2007 Long-Term Incentive Compensation Plan (the “Plan”), effective as of June4, 2007, to provide an opportunity for its eligible employees and certain independent contractors to earn long term incentive awards in consideration for their services; WHEREAS, the Company now desires to amend the Plan to reserve additional shares for issuance under the Plan. NOW THEREFORE, effective as of June 20, 2014, the Plan is hereby amended by replacing Section7(a)with the following new Section7(a)that shall read as follows: “(a) Maximum Shares. Subject to adjustment as provided in this Section 7, there is hereby reserved for issuance under the Plan up to 12,000,000 shares of Stock</code> |
|
305 |
+
| <code>query: What is the date on which the Company accepted the subscription?</code> | <code>to acceptance by the Company, the undersigned has completed this Subscription Agreement to evidence his/her/its subscription for participation in the securities of the Company, this ____th day of _________ 2013. Subscriber Printed name If an entity, on behalf of: Subscriber’s position with entity: The Company has accepted this subscription this ____ day of _________ 2012. OverNear, Inc. By Its: Printed Name: Page11 of 19 Subscription Agreement OverNear, Inc. -------------------------------------------------------------------------------- Subscription Documents - Continued OVERNEAR, INC. (THE “COMPANY”) INVESTOR APPLICATION (QUALIFICATION QUESTIONNAIRE) (CONFIDENTIAL) ALL INFORMATION CONTAINED IN THIS APPLICATIONWILL BE TREATEDCONFIDENTIALLY. The undersigned understands, however, that the Company may present this application to such parties as the Company, in his discretion, deems appropriate when called upon to establish that the proposed offer and sale of the Securities are exempt</code> | <code>and each Subscriber is executing and delivering this agreement in reliance upon the exemption from securities registration afforded by Section 4(2) of the Securities Act and Rule 506 of Regulation D as promulgated by the SEC under the Securities Act; and WHEREAS the subscription for the Securities will be made in accordance with and subject to the terms and conditions of this Subscription Agreement and the Company's Confidential Private Placement Memorandum dated January 28, 2014 together with all amendments thereof and supplements and exhibits thereto and as such may be amended from time to time (the "Memorandum"); and WHEREAS, the Subscriber desires to purchase such number of shares of Common Stock (together with the associated Warrants) as set forth on the signature page hereof on the terms and</code> |
|
306 |
+
| <code>query: What percentage of common shares must an entity own to be considered an Acquiring Person under the Rights Agreement?</code> | <code>the mutual agreements herein set forth, the parties agree as follows: Section1. Amendment to Section1.1. Section1.1 of the Rights Agreement is amended to read in its entirety as follows: “1.1 “Acquiring Person” shall mean any Person (as such term is hereinafter defined) who or which, together with all Affiliates and Associates (as such terms are hereinafter defined) of such Person, shall be the Beneficial Owner (as such term is hereinafter defined) of 15% or more of the Common Shares of the Company then outstanding, but shall not include: (i) the Company; (ii) any Subsidiary of the Company; (iii) any employee benefit plan of the Company or of any Subsidiary of the Company or any entity holding shares of capital stock of the Company for or pursuant to the</code> | <code>of more than 25% of the Common Shares outstanding immediately prior to the distribution, and in making this determination the Common Shares to be issued to such Person in the distribution shall be deemed to be held by such Person but shall not be included in the aggregate number of outstanding Common Shares immediately prior to the distribution ("Exempt Acquisitions"); the acquisition of Common Shares upon the exercise of Convertible Securities received by such Person pursuant to a Permitted Bid Acquisition, an Exempt Acquisition or a Pro Rata Acquisition (as defined below) ("Convertible Security Acquisitions"); or acquisitions as a result of a stock dividend, a stock split or other event pursuant to which such Person receives or acquires Common Shares or Convertible Securities on the same pro rata</code> |
|
307 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
308 |
+
```json
|
309 |
+
{
|
310 |
+
"scale": 20.0,
|
311 |
+
"similarity_fct": "cos_sim"
|
312 |
+
}
|
313 |
+
```
|
314 |
+
|
315 |
+
### Training Hyperparameters
|
316 |
+
#### Non-Default Hyperparameters
|
317 |
+
|
318 |
+
- `per_device_train_batch_size`: 16
|
319 |
+
- `gradient_accumulation_steps`: 8
|
320 |
+
- `learning_rate`: 0.0001
|
321 |
+
- `num_train_epochs`: 1
|
322 |
+
- `lr_scheduler_type`: cosine_with_restarts
|
323 |
+
- `warmup_ratio`: 0.1
|
324 |
+
- `bf16`: True
|
325 |
+
- `batch_sampler`: no_duplicates
|
326 |
+
|
327 |
+
#### All Hyperparameters
|
328 |
+
<details><summary>Click to expand</summary>
|
329 |
+
|
330 |
+
- `overwrite_output_dir`: False
|
331 |
+
- `do_predict`: False
|
332 |
+
- `eval_strategy`: no
|
333 |
+
- `prediction_loss_only`: True
|
334 |
+
- `per_device_train_batch_size`: 16
|
335 |
+
- `per_device_eval_batch_size`: 8
|
336 |
+
- `per_gpu_train_batch_size`: None
|
337 |
+
- `per_gpu_eval_batch_size`: None
|
338 |
+
- `gradient_accumulation_steps`: 8
|
339 |
+
- `eval_accumulation_steps`: None
|
340 |
+
- `torch_empty_cache_steps`: None
|
341 |
+
- `learning_rate`: 0.0001
|
342 |
+
- `weight_decay`: 0.0
|
343 |
+
- `adam_beta1`: 0.9
|
344 |
+
- `adam_beta2`: 0.999
|
345 |
+
- `adam_epsilon`: 1e-08
|
346 |
+
- `max_grad_norm`: 1.0
|
347 |
+
- `num_train_epochs`: 1
|
348 |
+
- `max_steps`: -1
|
349 |
+
- `lr_scheduler_type`: cosine_with_restarts
|
350 |
+
- `lr_scheduler_kwargs`: {}
|
351 |
+
- `warmup_ratio`: 0.1
|
352 |
+
- `warmup_steps`: 0
|
353 |
+
- `log_level`: passive
|
354 |
+
- `log_level_replica`: warning
|
355 |
+
- `log_on_each_node`: True
|
356 |
+
- `logging_nan_inf_filter`: True
|
357 |
+
- `save_safetensors`: True
|
358 |
+
- `save_on_each_node`: False
|
359 |
+
- `save_only_model`: False
|
360 |
+
- `restore_callback_states_from_checkpoint`: False
|
361 |
+
- `no_cuda`: False
|
362 |
+
- `use_cpu`: False
|
363 |
+
- `use_mps_device`: False
|
364 |
+
- `seed`: 42
|
365 |
+
- `data_seed`: None
|
366 |
+
- `jit_mode_eval`: False
|
367 |
+
- `use_ipex`: False
|
368 |
+
- `bf16`: True
|
369 |
+
- `fp16`: False
|
370 |
+
- `fp16_opt_level`: O1
|
371 |
+
- `half_precision_backend`: auto
|
372 |
+
- `bf16_full_eval`: False
|
373 |
+
- `fp16_full_eval`: False
|
374 |
+
- `tf32`: None
|
375 |
+
- `local_rank`: 0
|
376 |
+
- `ddp_backend`: None
|
377 |
+
- `tpu_num_cores`: None
|
378 |
+
- `tpu_metrics_debug`: False
|
379 |
+
- `debug`: []
|
380 |
+
- `dataloader_drop_last`: False
|
381 |
+
- `dataloader_num_workers`: 0
|
382 |
+
- `dataloader_prefetch_factor`: None
|
383 |
+
- `past_index`: -1
|
384 |
+
- `disable_tqdm`: False
|
385 |
+
- `remove_unused_columns`: True
|
386 |
+
- `label_names`: None
|
387 |
+
- `load_best_model_at_end`: False
|
388 |
+
- `ignore_data_skip`: False
|
389 |
+
- `fsdp`: []
|
390 |
+
- `fsdp_min_num_params`: 0
|
391 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
392 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
393 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
394 |
+
- `deepspeed`: None
|
395 |
+
- `label_smoothing_factor`: 0.0
|
396 |
+
- `optim`: adamw_torch
|
397 |
+
- `optim_args`: None
|
398 |
+
- `adafactor`: False
|
399 |
+
- `group_by_length`: False
|
400 |
+
- `length_column_name`: length
|
401 |
+
- `ddp_find_unused_parameters`: None
|
402 |
+
- `ddp_bucket_cap_mb`: None
|
403 |
+
- `ddp_broadcast_buffers`: False
|
404 |
+
- `dataloader_pin_memory`: True
|
405 |
+
- `dataloader_persistent_workers`: False
|
406 |
+
- `skip_memory_metrics`: True
|
407 |
+
- `use_legacy_prediction_loop`: False
|
408 |
+
- `push_to_hub`: False
|
409 |
+
- `resume_from_checkpoint`: None
|
410 |
+
- `hub_model_id`: None
|
411 |
+
- `hub_strategy`: every_save
|
412 |
+
- `hub_private_repo`: False
|
413 |
+
- `hub_always_push`: False
|
414 |
+
- `gradient_checkpointing`: False
|
415 |
+
- `gradient_checkpointing_kwargs`: None
|
416 |
+
- `include_inputs_for_metrics`: False
|
417 |
+
- `eval_do_concat_batches`: True
|
418 |
+
- `fp16_backend`: auto
|
419 |
+
- `push_to_hub_model_id`: None
|
420 |
+
- `push_to_hub_organization`: None
|
421 |
+
- `mp_parameters`:
|
422 |
+
- `auto_find_batch_size`: False
|
423 |
+
- `full_determinism`: False
|
424 |
+
- `torchdynamo`: None
|
425 |
+
- `ray_scope`: last
|
426 |
+
- `ddp_timeout`: 1800
|
427 |
+
- `torch_compile`: False
|
428 |
+
- `torch_compile_backend`: None
|
429 |
+
- `torch_compile_mode`: None
|
430 |
+
- `dispatch_batches`: None
|
431 |
+
- `split_batches`: None
|
432 |
+
- `include_tokens_per_second`: False
|
433 |
+
- `include_num_input_tokens_seen`: False
|
434 |
+
- `neftune_noise_alpha`: None
|
435 |
+
- `optim_target_modules`: None
|
436 |
+
- `batch_eval_metrics`: False
|
437 |
+
- `eval_on_start`: False
|
438 |
+
- `eval_use_gather_object`: False
|
439 |
+
- `prompts`: None
|
440 |
+
- `batch_sampler`: no_duplicates
|
441 |
+
- `multi_dataset_batch_sampler`: proportional
|
442 |
+
|
443 |
+
</details>
|
444 |
+
|
445 |
+
### Training Logs
|
446 |
+
<details><summary>Click to expand</summary>
|
447 |
+
|
448 |
+
| Epoch | Step | Training Loss |
|
449 |
+
|:------:|:----:|:-------------:|
|
450 |
+
| 0.0040 | 1 | 2.1317 |
|
451 |
+
| 0.0079 | 2 | 2.1656 |
|
452 |
+
| 0.0119 | 3 | 2.0907 |
|
453 |
+
| 0.0158 | 4 | 2.1018 |
|
454 |
+
| 0.0198 | 5 | 2.2049 |
|
455 |
+
| 0.0237 | 6 | 2.133 |
|
456 |
+
| 0.0277 | 7 | 2.1612 |
|
457 |
+
| 0.0316 | 8 | 2.1797 |
|
458 |
+
| 0.0356 | 9 | 2.0282 |
|
459 |
+
| 0.0395 | 10 | 2.0335 |
|
460 |
+
| 0.0435 | 11 | 1.953 |
|
461 |
+
| 0.0474 | 12 | 1.9439 |
|
462 |
+
| 0.0514 | 13 | 1.8734 |
|
463 |
+
| 0.0553 | 14 | 1.9584 |
|
464 |
+
| 0.0593 | 15 | 1.7648 |
|
465 |
+
| 0.0632 | 16 | 1.8349 |
|
466 |
+
| 0.0672 | 17 | 1.7773 |
|
467 |
+
| 0.0711 | 18 | 1.7721 |
|
468 |
+
| 0.0751 | 19 | 1.6587 |
|
469 |
+
| 0.0791 | 20 | 1.5767 |
|
470 |
+
| 0.0830 | 21 | 1.4761 |
|
471 |
+
| 0.0870 | 22 | 1.4714 |
|
472 |
+
| 0.0909 | 23 | 1.4471 |
|
473 |
+
| 0.0949 | 24 | 1.3233 |
|
474 |
+
| 0.0988 | 25 | 1.2631 |
|
475 |
+
| 0.1028 | 26 | 1.1757 |
|
476 |
+
| 0.1067 | 27 | 1.0742 |
|
477 |
+
| 0.1107 | 28 | 1.0249 |
|
478 |
+
| 0.1146 | 29 | 1.1338 |
|
479 |
+
| 0.1186 | 30 | 0.965 |
|
480 |
+
| 0.1225 | 31 | 1.0061 |
|
481 |
+
| 0.1265 | 32 | 0.9607 |
|
482 |
+
| 0.1304 | 33 | 0.8747 |
|
483 |
+
| 0.1344 | 34 | 0.8163 |
|
484 |
+
| 0.1383 | 35 | 0.8643 |
|
485 |
+
| 0.1423 | 36 | 0.7803 |
|
486 |
+
| 0.1462 | 37 | 0.6848 |
|
487 |
+
| 0.1502 | 38 | 0.6727 |
|
488 |
+
| 0.1542 | 39 | 0.7509 |
|
489 |
+
| 0.1581 | 40 | 0.6364 |
|
490 |
+
| 0.1621 | 41 | 0.5834 |
|
491 |
+
| 0.1660 | 42 | 0.5821 |
|
492 |
+
| 0.1700 | 43 | 0.5909 |
|
493 |
+
| 0.1739 | 44 | 0.5541 |
|
494 |
+
| 0.1779 | 45 | 0.5548 |
|
495 |
+
| 0.1818 | 46 | 0.4847 |
|
496 |
+
| 0.1858 | 47 | 0.5016 |
|
497 |
+
| 0.1897 | 48 | 0.4626 |
|
498 |
+
| 0.1937 | 49 | 0.4327 |
|
499 |
+
| 0.1976 | 50 | 0.5319 |
|
500 |
+
| 0.2016 | 51 | 0.4769 |
|
501 |
+
| 0.2055 | 52 | 0.4741 |
|
502 |
+
| 0.2095 | 53 | 0.5004 |
|
503 |
+
| 0.2134 | 54 | 0.4693 |
|
504 |
+
| 0.2174 | 55 | 0.4328 |
|
505 |
+
| 0.2213 | 56 | 0.46 |
|
506 |
+
| 0.2253 | 57 | 0.472 |
|
507 |
+
| 0.2292 | 58 | 0.4168 |
|
508 |
+
| 0.2332 | 59 | 0.5311 |
|
509 |
+
| 0.2372 | 60 | 0.4115 |
|
510 |
+
| 0.2411 | 61 | 0.3221 |
|
511 |
+
| 0.2451 | 62 | 0.3585 |
|
512 |
+
| 0.2490 | 63 | 0.4406 |
|
513 |
+
| 0.2530 | 64 | 0.4495 |
|
514 |
+
| 0.2569 | 65 | 0.4487 |
|
515 |
+
| 0.2609 | 66 | 0.4563 |
|
516 |
+
| 0.2648 | 67 | 0.4414 |
|
517 |
+
| 0.2688 | 68 | 0.3646 |
|
518 |
+
| 0.2727 | 69 | 0.3844 |
|
519 |
+
| 0.2767 | 70 | 0.4201 |
|
520 |
+
| 0.2806 | 71 | 0.4278 |
|
521 |
+
| 0.2846 | 72 | 0.3262 |
|
522 |
+
| 0.2885 | 73 | 0.4403 |
|
523 |
+
| 0.2925 | 74 | 0.4391 |
|
524 |
+
| 0.2964 | 75 | 0.3564 |
|
525 |
+
| 0.3004 | 76 | 0.2476 |
|
526 |
+
| 0.3043 | 77 | 0.3881 |
|
527 |
+
| 0.3083 | 78 | 0.455 |
|
528 |
+
| 0.3123 | 79 | 0.3182 |
|
529 |
+
| 0.3162 | 80 | 0.4281 |
|
530 |
+
| 0.3202 | 81 | 0.3926 |
|
531 |
+
| 0.3241 | 82 | 0.3842 |
|
532 |
+
| 0.3281 | 83 | 0.4574 |
|
533 |
+
| 0.3320 | 84 | 0.3087 |
|
534 |
+
| 0.3360 | 85 | 0.3651 |
|
535 |
+
| 0.3399 | 86 | 0.3744 |
|
536 |
+
| 0.3439 | 87 | 0.4061 |
|
537 |
+
| 0.3478 | 88 | 0.3568 |
|
538 |
+
| 0.3518 | 89 | 0.3193 |
|
539 |
+
| 0.3557 | 90 | 0.3384 |
|
540 |
+
| 0.3597 | 91 | 0.3822 |
|
541 |
+
| 0.3636 | 92 | 0.3818 |
|
542 |
+
| 0.3676 | 93 | 0.4413 |
|
543 |
+
| 0.3715 | 94 | 0.3446 |
|
544 |
+
| 0.3755 | 95 | 0.3336 |
|
545 |
+
| 0.3794 | 96 | 0.3527 |
|
546 |
+
| 0.3834 | 97 | 0.3501 |
|
547 |
+
| 0.3874 | 98 | 0.3454 |
|
548 |
+
| 0.3913 | 99 | 0.3346 |
|
549 |
+
| 0.3953 | 100 | 0.3516 |
|
550 |
+
| 0.3992 | 101 | 0.3836 |
|
551 |
+
| 0.4032 | 102 | 0.3856 |
|
552 |
+
| 0.4071 | 103 | 0.3484 |
|
553 |
+
| 0.4111 | 104 | 0.2827 |
|
554 |
+
| 0.4150 | 105 | 0.2877 |
|
555 |
+
| 0.4190 | 106 | 0.365 |
|
556 |
+
| 0.4229 | 107 | 0.3424 |
|
557 |
+
| 0.4269 | 108 | 0.3468 |
|
558 |
+
| 0.4308 | 109 | 0.3962 |
|
559 |
+
| 0.4348 | 110 | 0.3328 |
|
560 |
+
| 0.4387 | 111 | 0.3916 |
|
561 |
+
| 0.4427 | 112 | 0.3186 |
|
562 |
+
| 0.4466 | 113 | 0.3315 |
|
563 |
+
| 0.4506 | 114 | 0.2868 |
|
564 |
+
| 0.4545 | 115 | 0.2986 |
|
565 |
+
| 0.4585 | 116 | 0.2903 |
|
566 |
+
| 0.4625 | 117 | 0.3371 |
|
567 |
+
| 0.4664 | 118 | 0.3204 |
|
568 |
+
| 0.4704 | 119 | 0.3594 |
|
569 |
+
| 0.4743 | 120 | 0.3615 |
|
570 |
+
| 0.4783 | 121 | 0.3611 |
|
571 |
+
| 0.4822 | 122 | 0.3388 |
|
572 |
+
| 0.4862 | 123 | 0.334 |
|
573 |
+
| 0.4901 | 124 | 0.3937 |
|
574 |
+
| 0.4941 | 125 | 0.3874 |
|
575 |
+
| 0.4980 | 126 | 0.3928 |
|
576 |
+
| 0.5020 | 127 | 0.3227 |
|
577 |
+
| 0.5059 | 128 | 0.3285 |
|
578 |
+
| 0.5099 | 129 | 0.2938 |
|
579 |
+
| 0.5138 | 130 | 0.401 |
|
580 |
+
| 0.5178 | 131 | 0.2996 |
|
581 |
+
| 0.5217 | 132 | 0.2548 |
|
582 |
+
| 0.5257 | 133 | 0.3076 |
|
583 |
+
| 0.5296 | 134 | 0.3449 |
|
584 |
+
| 0.5336 | 135 | 0.3977 |
|
585 |
+
| 0.5375 | 136 | 0.38 |
|
586 |
+
| 0.5415 | 137 | 0.3634 |
|
587 |
+
| 0.5455 | 138 | 0.3287 |
|
588 |
+
| 0.5494 | 139 | 0.332 |
|
589 |
+
| 0.5534 | 140 | 0.3111 |
|
590 |
+
| 0.5573 | 141 | 0.323 |
|
591 |
+
| 0.5613 | 142 | 0.32 |
|
592 |
+
| 0.5652 | 143 | 0.3831 |
|
593 |
+
| 0.5692 | 144 | 0.2635 |
|
594 |
+
| 0.5731 | 145 | 0.3777 |
|
595 |
+
| 0.5771 | 146 | 0.3701 |
|
596 |
+
| 0.5810 | 147 | 0.3251 |
|
597 |
+
| 0.5850 | 148 | 0.3246 |
|
598 |
+
| 0.5889 | 149 | 0.2807 |
|
599 |
+
| 0.5929 | 150 | 0.2726 |
|
600 |
+
| 0.5968 | 151 | 0.2527 |
|
601 |
+
| 0.6008 | 152 | 0.3566 |
|
602 |
+
| 0.6047 | 153 | 0.2611 |
|
603 |
+
| 0.6087 | 154 | 0.2831 |
|
604 |
+
| 0.6126 | 155 | 0.3591 |
|
605 |
+
| 0.6166 | 156 | 0.3237 |
|
606 |
+
| 0.6206 | 157 | 0.2818 |
|
607 |
+
| 0.6245 | 158 | 0.3863 |
|
608 |
+
| 0.6285 | 159 | 0.2499 |
|
609 |
+
| 0.6324 | 160 | 0.3633 |
|
610 |
+
| 0.6364 | 161 | 0.3356 |
|
611 |
+
| 0.6403 | 162 | 0.2561 |
|
612 |
+
| 0.6443 | 163 | 0.3032 |
|
613 |
+
| 0.6482 | 164 | 0.2511 |
|
614 |
+
| 0.6522 | 165 | 0.3402 |
|
615 |
+
| 0.6561 | 166 | 0.3838 |
|
616 |
+
| 0.6601 | 167 | 0.3171 |
|
617 |
+
| 0.6640 | 168 | 0.3001 |
|
618 |
+
| 0.6680 | 169 | 0.3474 |
|
619 |
+
| 0.6719 | 170 | 0.2721 |
|
620 |
+
| 0.6759 | 171 | 0.2755 |
|
621 |
+
| 0.6798 | 172 | 0.3078 |
|
622 |
+
| 0.6838 | 173 | 0.2617 |
|
623 |
+
| 0.6877 | 174 | 0.3669 |
|
624 |
+
| 0.6917 | 175 | 0.3094 |
|
625 |
+
| 0.6957 | 176 | 0.2802 |
|
626 |
+
| 0.6996 | 177 | 0.3803 |
|
627 |
+
| 0.7036 | 178 | 0.3262 |
|
628 |
+
| 0.7075 | 179 | 0.3241 |
|
629 |
+
| 0.7115 | 180 | 0.3132 |
|
630 |
+
| 0.7154 | 181 | 0.2579 |
|
631 |
+
| 0.7194 | 182 | 0.3221 |
|
632 |
+
| 0.7233 | 183 | 0.3497 |
|
633 |
+
| 0.7273 | 184 | 0.2853 |
|
634 |
+
| 0.7312 | 185 | 0.3576 |
|
635 |
+
| 0.7352 | 186 | 0.348 |
|
636 |
+
| 0.7391 | 187 | 0.2487 |
|
637 |
+
| 0.7431 | 188 | 0.2732 |
|
638 |
+
| 0.7470 | 189 | 0.3023 |
|
639 |
+
| 0.7510 | 190 | 0.2351 |
|
640 |
+
| 0.7549 | 191 | 0.2663 |
|
641 |
+
| 0.7589 | 192 | 0.2483 |
|
642 |
+
| 0.7628 | 193 | 0.3116 |
|
643 |
+
| 0.7668 | 194 | 0.2435 |
|
644 |
+
| 0.7708 | 195 | 0.3982 |
|
645 |
+
| 0.7747 | 196 | 0.3503 |
|
646 |
+
| 0.7787 | 197 | 0.3364 |
|
647 |
+
| 0.7826 | 198 | 0.2872 |
|
648 |
+
| 0.7866 | 199 | 0.3554 |
|
649 |
+
| 0.7905 | 200 | 0.352 |
|
650 |
+
| 0.7945 | 201 | 0.2781 |
|
651 |
+
| 0.7984 | 202 | 0.2604 |
|
652 |
+
| 0.8024 | 203 | 0.3174 |
|
653 |
+
| 0.8063 | 204 | 0.257 |
|
654 |
+
| 0.8103 | 205 | 0.2591 |
|
655 |
+
| 0.8142 | 206 | 0.2861 |
|
656 |
+
| 0.8182 | 207 | 0.3764 |
|
657 |
+
| 0.8221 | 208 | 0.3702 |
|
658 |
+
| 0.8261 | 209 | 0.2953 |
|
659 |
+
| 0.8300 | 210 | 0.2472 |
|
660 |
+
| 0.8340 | 211 | 0.3193 |
|
661 |
+
| 0.8379 | 212 | 0.2944 |
|
662 |
+
| 0.8419 | 213 | 0.373 |
|
663 |
+
| 0.8458 | 214 | 0.2736 |
|
664 |
+
| 0.8498 | 215 | 0.3392 |
|
665 |
+
| 0.8538 | 216 | 0.2611 |
|
666 |
+
| 0.8577 | 217 | 0.3074 |
|
667 |
+
| 0.8617 | 218 | 0.3041 |
|
668 |
+
| 0.8656 | 219 | 0.3103 |
|
669 |
+
| 0.8696 | 220 | 0.3111 |
|
670 |
+
| 0.8735 | 221 | 0.3066 |
|
671 |
+
| 0.8775 | 222 | 0.3117 |
|
672 |
+
| 0.8814 | 223 | 0.3109 |
|
673 |
+
| 0.8854 | 224 | 0.2266 |
|
674 |
+
| 0.8893 | 225 | 0.2774 |
|
675 |
+
| 0.8933 | 226 | 0.2816 |
|
676 |
+
| 0.8972 | 227 | 0.3015 |
|
677 |
+
| 0.9012 | 228 | 0.3339 |
|
678 |
+
| 0.9051 | 229 | 0.3166 |
|
679 |
+
| 0.9091 | 230 | 0.3214 |
|
680 |
+
| 0.9130 | 231 | 0.3425 |
|
681 |
+
| 0.9170 | 232 | 0.2001 |
|
682 |
+
| 0.9209 | 233 | 0.2849 |
|
683 |
+
| 0.9249 | 234 | 0.2981 |
|
684 |
+
| 0.9289 | 235 | 0.2695 |
|
685 |
+
| 0.9328 | 236 | 0.2568 |
|
686 |
+
| 0.9368 | 237 | 0.2672 |
|
687 |
+
| 0.9407 | 238 | 0.2554 |
|
688 |
+
| 0.9447 | 239 | 0.2786 |
|
689 |
+
| 0.9486 | 240 | 0.3506 |
|
690 |
+
| 0.9526 | 241 | 0.2983 |
|
691 |
+
| 0.9565 | 242 | 0.2254 |
|
692 |
+
| 0.9605 | 243 | 0.3054 |
|
693 |
+
| 0.9644 | 244 | 0.3031 |
|
694 |
+
| 0.9684 | 245 | 0.2216 |
|
695 |
+
| 0.9723 | 246 | 0.2185 |
|
696 |
+
| 0.9763 | 247 | 0.2781 |
|
697 |
+
| 0.9802 | 248 | 0.3696 |
|
698 |
+
| 0.9842 | 249 | 0.3164 |
|
699 |
+
| 0.9881 | 250 | 0.2713 |
|
700 |
+
| 0.9921 | 251 | 0.3063 |
|
701 |
+
| 0.9960 | 252 | 0.2969 |
|
702 |
+
| 1.0 | 253 | 0.2826 |
|
703 |
+
|
704 |
+
</details>
|
705 |
+
|
706 |
+
### Framework Versions
|
707 |
+
- Python: 3.12.3
|
708 |
+
- Sentence Transformers: 3.3.1
|
709 |
+
- Transformers: 4.44.2
|
710 |
+
- PyTorch: 2.5.1
|
711 |
+
- Accelerate: 1.2.1
|
712 |
+
- Datasets: 2.19.0
|
713 |
+
- Tokenizers: 0.19.1
|
714 |
+
|
715 |
+
## Citation
|
716 |
+
|
717 |
+
### BibTeX
|
718 |
+
|
719 |
+
#### Sentence Transformers
|
720 |
+
```bibtex
|
721 |
+
@inproceedings{reimers-2019-sentence-bert,
|
722 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
723 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
724 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
725 |
+
month = "11",
|
726 |
+
year = "2019",
|
727 |
+
publisher = "Association for Computational Linguistics",
|
728 |
+
url = "https://arxiv.org/abs/1908.10084",
|
729 |
+
}
|
730 |
+
```
|
731 |
+
|
732 |
+
#### MultipleNegativesRankingLoss
|
733 |
+
```bibtex
|
734 |
+
@misc{henderson2017efficient,
|
735 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
736 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
737 |
+
year={2017},
|
738 |
+
eprint={1705.00652},
|
739 |
+
archivePrefix={arXiv},
|
740 |
+
primaryClass={cs.CL}
|
741 |
+
}
|
742 |
+
```
|
743 |
+
|
744 |
+
<!--
|
745 |
+
## Glossary
|
746 |
+
|
747 |
+
*Clearly define terms in order to be accessible across audiences.*
|
748 |
+
-->
|
749 |
+
|
750 |
+
<!--
|
751 |
+
## Model Card Authors
|
752 |
+
|
753 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
754 |
+
-->
|
755 |
+
|
756 |
+
<!--
|
757 |
+
## Model Card Contact
|
758 |
+
|
759 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
760 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "intfloat/multilingual-e5-large",
|
3 |
+
"architectures": [
|
4 |
+
"XLMRobertaModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "gelu",
|
11 |
+
"hidden_dropout_prob": 0.1,
|
12 |
+
"hidden_size": 1024,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 4096,
|
15 |
+
"layer_norm_eps": 1e-05,
|
16 |
+
"max_position_embeddings": 514,
|
17 |
+
"model_type": "xlm-roberta",
|
18 |
+
"num_attention_heads": 16,
|
19 |
+
"num_hidden_layers": 24,
|
20 |
+
"output_past": true,
|
21 |
+
"pad_token_id": 1,
|
22 |
+
"position_embedding_type": "absolute",
|
23 |
+
"torch_dtype": "bfloat16",
|
24 |
+
"transformers_version": "4.44.2",
|
25 |
+
"type_vocab_size": 1,
|
26 |
+
"use_cache": true,
|
27 |
+
"vocab_size": 250002
|
28 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.3.1",
|
4 |
+
"transformers": "4.44.2",
|
5 |
+
"pytorch": "2.5.1"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7fc15c02d27689f1cce0a61545d80527fc3efb54de4f1f7fbb65d2d9b8e5250
|
3 |
+
size 1119826072
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
sentencepiece.bpe.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
3 |
+
size 5069051
|
special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "</s>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "<unk>",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
|
3 |
+
size 17082987
|
tokenizer_config.json
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<pad>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "</s>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "<unk>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"250001": {
|
36 |
+
"content": "<mask>",
|
37 |
+
"lstrip": true,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"bos_token": "<s>",
|
45 |
+
"clean_up_tokenization_spaces": true,
|
46 |
+
"cls_token": "<s>",
|
47 |
+
"eos_token": "</s>",
|
48 |
+
"mask_token": "<mask>",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"pad_token": "<pad>",
|
51 |
+
"sep_token": "</s>",
|
52 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
53 |
+
"unk_token": "<unk>"
|
54 |
+
}
|