atiwari751 commited on
Commit
af587c3
·
1 Parent(s): fa753cb

Streamlit app working

Browse files
Files changed (10) hide show
  1. BPE.py +4 -2
  2. README.md +28 -0
  3. app.py +87 -0
  4. data_analysis.py +0 -30
  5. decoded_output.txt +1 -3
  6. encode_decode.py +15 -1
  7. requirements.txt +2 -0
  8. text_file_eng.txt +0 -1100
  9. text_file_eng_long.txt +0 -0
  10. tokenizer.py +75 -0
BPE.py CHANGED
@@ -3,7 +3,7 @@ import regex as re
3
  from tqdm import tqdm
4
 
5
  # Read text from a file
6
- with open('text_file_eng_long.txt', 'r', encoding='utf-8') as file:
7
  text = file.read()
8
 
9
  # Hindi-focused pattern
@@ -88,9 +88,11 @@ def perform_bpe():
88
 
89
  if __name__ == "__main__":
90
  print('---')
91
- print("length of text:", len(text))
 
92
  print('---')
93
  print("length of tokens:", len(tokens))
 
94
 
95
  # Run BPE and save results
96
  merges, ids, num_merges = perform_bpe()
 
3
  from tqdm import tqdm
4
 
5
  # Read text from a file
6
+ with open('text_file.txt', 'r', encoding='utf-8') as file:
7
  text = file.read()
8
 
9
  # Hindi-focused pattern
 
88
 
89
  if __name__ == "__main__":
90
  print('---')
91
+ print("length of text (characters):", len(text))
92
+ print("length of text (words):", len(text.split()))
93
  print('---')
94
  print("length of tokens:", len(tokens))
95
+ #print("sample tokens:", tokens[:5]) # Show first 5 tokens
96
 
97
  # Run BPE and save results
98
  merges, ids, num_merges = perform_bpe()
README.md CHANGED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hindi Tokenizer
2
+
3
+ ## Dataset
4
+
5
+ The final dataset used for the tokenizer training is found in text_file.txt in the repo.
6
+
7
+ There were 2 prime sources which were combined in the .txt file -
8
+
9
+ - https://www.kaggle.com/datasets/disisbig/hindi-text-short-summarization-corpus (test dataset)
10
+ - https://hindi.newslaundry.com/report
11
+
12
+ length of text (characters): 5933269
13
+
14
+ length of text (words): 1150937
15
+
16
+
17
+ ## Results
18
+
19
+ length of text (words): 1150937
20
+
21
+ length of tokens (regex): 1354962
22
+
23
+ ---
24
+ Total bytes before: 14659421
25
+
26
+ Total bytes after: 1889786
27
+
28
+ Compression ratio: 7.76X
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from tokenizer import HindiTokenizer
3
+
4
+ # Initialize tokenizer
5
+ @st.cache_resource
6
+ def load_tokenizer():
7
+ return HindiTokenizer()
8
+
9
+ def format_token_ids(token_ids):
10
+ # Format token IDs in a readable way, 10 per line
11
+ lines = []
12
+ for i in range(0, len(token_ids), 10):
13
+ line = token_ids[i:i+10]
14
+ lines.append(' '.join(str(id) for id in line))
15
+ return '\n'.join(lines)
16
+
17
+ def format_hindi_tokens(tokens):
18
+ # Join tokens with double spaces
19
+ return ' '.join(tokens)
20
+
21
+ def main():
22
+ st.title("Hindi Text Tokenizer")
23
+
24
+ tokenizer = load_tokenizer()
25
+
26
+ # Create columns for metrics
27
+ col1, col2, col3 = st.columns(3)
28
+
29
+ with col1:
30
+ st.subheader("Word Count")
31
+ with col2:
32
+ st.subheader("Compression Ratio")
33
+ with col3:
34
+ st.subheader("BPE Tokens") # Renamed to clarify these are post-BPE tokens
35
+
36
+ # Text input
37
+ st.subheader("Input Text:")
38
+ text_input = st.text_area(
39
+ label="Input Hindi text",
40
+ height=150,
41
+ key="input",
42
+ label_visibility="collapsed"
43
+ )
44
+
45
+ if st.button("Tokenize"):
46
+ if text_input:
47
+ # Get tokens and IDs
48
+ token_ids, original_tokens, decoded_tokens = tokenizer.tokenize(text_input)
49
+
50
+ # Calculate metrics
51
+ word_count = len(text_input.split())
52
+ original_bytes = sum(len(token.encode('utf-8')) for token in original_tokens)
53
+ compression_ratio = original_bytes / len(token_ids)
54
+
55
+ # Update metrics
56
+ col1.write(f"{word_count}")
57
+ col2.write(f"{compression_ratio:.2f}X")
58
+ col3.write(f"{len(token_ids)}") # This is post-BPE token count
59
+
60
+ # Optional: Display both token counts for comparison
61
+ st.caption(f"Initial tokens (after regex): {len(original_tokens)}")
62
+ st.caption(f"Final tokens (after BPE): {len(token_ids)}")
63
+
64
+ # Display token IDs in a formatted way
65
+ st.subheader("Token IDs:")
66
+ st.text_area(
67
+ label="Generated token IDs",
68
+ value=format_token_ids(token_ids),
69
+ height=150,
70
+ key="ids",
71
+ label_visibility="collapsed"
72
+ )
73
+
74
+ # Display decoded tokens with tab separation
75
+ st.subheader("Tokenized Text:")
76
+ st.text_area(
77
+ label="Tokenized output",
78
+ value='\t'.join(decoded_tokens),
79
+ height=150,
80
+ key="tokens",
81
+ label_visibility="collapsed"
82
+ )
83
+ else:
84
+ st.warning("Please enter some text to tokenize.")
85
+
86
+ if __name__ == "__main__":
87
+ main()
data_analysis.py DELETED
@@ -1,30 +0,0 @@
1
- # Read text from a file
2
- with open('text_file.txt', 'r', encoding='utf-8') as file:
3
- text = file.read()
4
-
5
- tokens = text.encode("utf-8") # raw bytes
6
- tokens = list(map(int, tokens)) # convert to a list of integers in range 0..255 for convenience
7
-
8
- print('---')
9
- print("length of text:", len(text))
10
- print('---')
11
- #print(tokens)
12
- print('---')
13
- print("length of tokens:", len(tokens))
14
-
15
- def get_stats(ids):
16
- counts = {}
17
- for pair in zip(ids, ids[1:]): # Pythonic way to iterate consecutive elements
18
- counts[pair] = counts.get(pair, 0) + 1
19
- return counts
20
-
21
- stats = get_stats(tokens)
22
- print('---')
23
- # print(stats)
24
- #print(sorted(((v,k) for k,v in stats.items()), reverse=True))
25
-
26
- print('---')
27
- top_pair = max(stats, key=stats.get)
28
- print(top_pair)
29
-
30
- #print(chr(224), chr(164))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
decoded_output.txt CHANGED
@@ -1,3 +1 @@
1
- अम के परिवार की तीन पी ढ़ ियां चांद नी चौ क निर्� � ाचन क्षेत्र में ह वे ली आज ़ म ां के नाम से पहच ाने जाने वाले एक दम ट कर बने रों के ुण ्ड में रहती हैं . यह इलाक दिल्ली की त िहास िक ामा मस्जिद से पै दल की ूरी पर है , और इस परिवार के 23 सदस्य मतदान केंद्र 10 पर प ंज ीक ृत मत ाता हैं . लेकिन पिछले साल लोकसभा चुनावों के दौरान अम को पता चला कि वह अपने परिवार के उन 20 लोगों में से एक हैं , जिन का नाम मत ाता सू ची से इस वजह से काट दिया गया कि उन्होंने अपना घर बदल लिया है .
2
-
3
- 5 5 वर्षीय अम ज द ने न्यू ज़ ल ॉन ्ड ्री को बताया , " हम ारे सामने ये पहली बार हुआ है . लेकिन नाम कट ने के बारे में सबसे ज्यादा निर ाश ाज न क बात ये थी कि इसका पता मतदान के दिन ही चला . जब हम पहली बार बू थ 10 पर गए तो उन्होंने हमें बताया कि उन्हें मत द ाता सू ची में हमारा नाम नहीं मिला . इसलिए हमें ज ामा मस्जिद में किसी दूसरे बू थ पर जाकर देखना चाहिए . वहां से हमें दूसरे बू थ पर भेज दिया गया . इस तरह हमने पांच से छह बू थ ों का दौर ा किया . और फिर अंत में हमें जो कारण बताया गया , वो यह था कि शायद घर - घ र जाकर सर्वे क्षण के दौरान बी एल ओ ( ब ू थ ले वल ऑफिस र ) को हम घर पर नहीं मिले इसलिए उन्होंने हमारे नाम काट दिए .”
 
1
+ ां द नी चौ क विधानसभा क्षेत्र में सबसे ज्यादा मत दाता सूची से हट ाए गए तीन बू थ ों में से एक बू वो था , जहां अम का घर था और दूसरा बू कांग्रेस उम्मीदवार जे पी अग्र वाल के पड़ ोस में था े . अपने पक्ष में मतदान में 14 प्रतिशत की बढ़ ोत री के बावजूद अग्र वाल भाजपा से 9 0 , 000 वोटों के अंतर से हार गए . न्यू ज़ ल ॉन ्ड ्री ने घर - घ र जाकर सर्वे क्षण किया और प ुष्ट ि की , कि इन तीन बू थ ों पर हट ाए गए 4 9 7 मत दाताओं में से कम से कम 14 7 ( लग भग 29 . 6 प्रतिशत ) गलत तरीके से हट ाए गए थे .
 
 
encode_decode.py CHANGED
@@ -25,7 +25,21 @@ def decode(ids):
25
  return text
26
 
27
  # Example: Decode a list of IDs
28
- set_of_ids = [2342, 307, 295, 286, 1413, 302, 839, 644, 574, 982, 3877, 405, 1086, 272, 978, 181, 3927, 1171, 294, 274, 964, 438, 767, 337, 284, 361, 332, 286, 776, 315, 2331, 429, 841, 631, 385, 1694, 273, 310, 418, 1607, 445, 935, 286, 962, 1244, 698, 294, 3069, 347, 46, 450, 1462, 259, 646, 302, 554, 276, 2252, 334, 292, 2835, 2500, 315, 1006, 3367, 302, 296, 1299, 330, 289, 44, 327, 345, 1413, 286, 2911, 1906, 2592, 1322, 888, 330, 279, 711, 1474, 997, 1068, 295, 1236, 347, 46, 513, 1067, 579, 1194, 2596, 286, 847, 732, 307, 295, 309, 1423, 1953, 340, 555, 563, 1413, 286, 376, 466, 596, 294, 315, 385, 347, 44, 1001, 478, 776, 1068, 295, 1236, 919, 1216, 315, 345, 1115, 315, 3189, 481, 437, 340, 557, 1125, 1135, 1501, 857, 289, 46, 10, 10, 53, 53, 2794, 732, 307, 295, 317, 2705, 2246, 280, 1308, 698, 486, 309, 739, 44, 32, 34, 808, 830, 1015, 516, 1315, 544, 667, 289, 46, 513, 776, 1914, 311, 286, 948, 294, 856, 915, 2438, 658, 367, 271, 272, 564, 516, 472, 340, 1571, 1423, 2592, 286, 638, 416, 1953, 46, 586, 462, 1315, 544, 3075, 583, 888, 330, 588, 444, 557, 1448, 739, 340, 737, 1068, 295, 1236, 919, 1216, 294, 3253, 776, 391, 1410, 46, 1496, 1448, 292, 2835, 2500, 294, 738, 1374, 3075, 583, 330, 2660, 3252, 904, 46, 1441, 315, 1448, 1374, 3075, 583, 330, 1473, 481, 437, 46, 345, 778, 1758, 1307, 315, 2210, 3075, 583, 299, 333, 751, 259, 420, 46, 327, 766, 1200, 294, 1448, 499, 1394, 739, 437, 44, 707, 450, 413, 340, 3602, 1135, 45, 864, 261, 2660, 2749, 1930, 286, 847, 447, 1782, 1633, 510, 308, 306, 583, 399, 1508, 2632, 261, 41, 309, 462, 1135, 330, 391, 1193, 1496, 557, 1574, 776, 3189, 1340, 3435]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  decoded_text = decode(set_of_ids) # Pass the list of IDs
30
  print(decoded_text)
31
 
 
25
  return text
26
 
27
  # Example: Decode a list of IDs
28
+ set_of_ids = [335, 332, 295, 401, 1050, 273, 1153, 1094, 294, 843,
29
+ 859, 1092, 3583, 3327, 315, 2457, 437, 585, 867, 3747,
30
+ 587, 299, 294, 315, 388, 3747, 587, 785, 414, 44,
31
+ 1110, 712, 307, 295, 334, 984, 414, 329, 2892, 3747,
32
+ 587, 583, 1160, 1593, 427, 3934, 621, 285, 1583, 1936,
33
+ 294, 414, 260, 46, 548, 2007, 294, 2733, 294, 1467,
34
+ 1553, 300, 763, 2045, 381, 285, 2093, 3934, 621, 1882,
35
+ 315, 1077, 48, 44, 1581, 3991, 285, 1909, 315, 1595,
36
+ 585, 46, 2161, 2714, 280, 1016, 698, 475, 316, 984,
37
+ 45, 861, 261, 2836, 2999, 1947, 418, 329, 279, 3331,
38
+ 266, 300, 44, 343, 591, 867, 3747, 587, 299, 330,
39
+ 2457, 437, 585, 715, 57, 55, 1092, 3017, 294, 315,
40
+ 565, 315, 565, 1467, 55, 489, 2139, 2057, 2927, 46,
41
+ 54, 1553, 41, 2217, 2695, 315, 2457, 437, 585, 533,
42
+ 46]
43
  decoded_text = decode(set_of_ids) # Pass the list of IDs
44
  print(decoded_text)
45
 
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ streamlit
2
+ regex
text_file_eng.txt DELETED
@@ -1,1100 +0,0 @@
1
- First Citizen:
2
- Before we proceed any further, hear me speak.
3
-
4
- All:
5
- Speak, speak.
6
-
7
- First Citizen:
8
- You are all resolved rather to die than to famish?
9
-
10
- All:
11
- Resolved. resolved.
12
-
13
- First Citizen:
14
- First, you know Caius Marcius is chief enemy to the people.
15
-
16
- All:
17
- We know't, we know't.
18
-
19
- First Citizen:
20
- Let us kill him, and we'll have corn at our own price.
21
- Is't a verdict?
22
-
23
- All:
24
- No more talking on't; let it be done: away, away!
25
-
26
- Second Citizen:
27
- One word, good citizens.
28
-
29
- First Citizen:
30
- We are accounted poor citizens, the patricians good.
31
- What authority surfeits on would relieve us: if they
32
- would yield us but the superfluity, while it were
33
- wholesome, we might guess they relieved us humanely;
34
- but they think we are too dear: the leanness that
35
- afflicts us, the object of our misery, is as an
36
- inventory to particularise their abundance; our
37
- sufferance is a gain to them Let us revenge this with
38
- our pikes, ere we become rakes: for the gods know I
39
- speak this in hunger for bread, not in thirst for revenge.
40
-
41
- Second Citizen:
42
- Would you proceed especially against Caius Marcius?
43
-
44
- All:
45
- Against him first: he's a very dog to the commonalty.
46
-
47
- Second Citizen:
48
- Consider you what services he has done for his country?
49
-
50
- First Citizen:
51
- Very well; and could be content to give him good
52
- report fort, but that he pays himself with being proud.
53
-
54
- Second Citizen:
55
- Nay, but speak not maliciously.
56
-
57
- First Citizen:
58
- I say unto you, what he hath done famously, he did
59
- it to that end: though soft-conscienced men can be
60
- content to say it was for his country he did it to
61
- please his mother and to be partly proud; which he
62
- is, even till the altitude of his virtue.
63
-
64
- Second Citizen:
65
- What he cannot help in his nature, you account a
66
- vice in him. You must in no way say he is covetous.
67
-
68
- First Citizen:
69
- If I must not, I need not be barren of accusations;
70
- he hath faults, with surplus, to tire in repetition.
71
- What shouts are these? The other side o' the city
72
- is risen: why stay we prating here? to the Capitol!
73
-
74
- All:
75
- Come, come.
76
-
77
- First Citizen:
78
- Soft! who comes here?
79
-
80
- Second Citizen:
81
- Worthy Menenius Agrippa; one that hath always loved
82
- the people.
83
-
84
- First Citizen:
85
- He's one honest enough: would all the rest were so!
86
-
87
- MENENIUS:
88
- What work's, my countrymen, in hand? where go you
89
- With bats and clubs? The matter? speak, I pray you.
90
-
91
- First Citizen:
92
- Our business is not unknown to the senate; they have
93
- had inkling this fortnight what we intend to do,
94
- which now we'll show 'em in deeds. They say poor
95
- suitors have strong breaths: they shall know we
96
- have strong arms too.
97
-
98
- MENENIUS:
99
- Why, masters, my good friends, mine honest neighbours,
100
- Will you undo yourselves?
101
-
102
- First Citizen:
103
- We cannot, sir, we are undone already.
104
-
105
- MENENIUS:
106
- I tell you, friends, most charitable care
107
- Have the patricians of you. For your wants,
108
- Your suffering in this dearth, you may as well
109
- Strike at the heaven with your staves as lift them
110
- Against the Roman state, whose course will on
111
- The way it takes, cracking ten thousand curbs
112
- Of more strong link asunder than can ever
113
- Appear in your impediment. For the dearth,
114
- The gods, not the patricians, make it, and
115
- Your knees to them, not arms, must help. Alack,
116
- You are transported by calamity
117
- Thither where more attends you, and you slander
118
- The helms o' the state, who care for you like fathers,
119
- When you curse them as enemies.
120
-
121
- First Citizen:
122
- Care for us! True, indeed! They ne'er cared for us
123
- yet: suffer us to famish, and their store-houses
124
- crammed with grain; make edicts for usury, to
125
- support usurers; repeal daily any wholesome act
126
- established against the rich, and provide more
127
- piercing statutes daily, to chain up and restrain
128
- the poor. If the wars eat us not up, they will; and
129
- there's all the love they bear us.
130
-
131
- MENENIUS:
132
- Either you must
133
- Confess yourselves wondrous malicious,
134
- Or be accused of folly. I shall tell you
135
- A pretty tale: it may be you have heard it;
136
- But, since it serves my purpose, I will venture
137
- To stale 't a little more.
138
-
139
- First Citizen:
140
- Well, I'll hear it, sir: yet you must not think to
141
- fob off our disgrace with a tale: but, an 't please
142
- you, deliver.
143
-
144
- MENENIUS:
145
- There was a time when all the body's members
146
- Rebell'd against the belly, thus accused it:
147
- That only like a gulf it did remain
148
- I' the midst o' the body, idle and unactive,
149
- Still cupboarding the viand, never bearing
150
- Like labour with the rest, where the other instruments
151
- Did see and hear, devise, instruct, walk, feel,
152
- And, mutually participate, did minister
153
- Unto the appetite and affection common
154
- Of the whole body. The belly answer'd--
155
-
156
- First Citizen:
157
- Well, sir, what answer made the belly?
158
-
159
- MENENIUS:
160
- Sir, I shall tell you. With a kind of smile,
161
- Which ne'er came from the lungs, but even thus--
162
- For, look you, I may make the belly smile
163
- As well as speak--it tauntingly replied
164
- To the discontented members, the mutinous parts
165
- That envied his receipt; even so most fitly
166
- As you malign our senators for that
167
- They are not such as you.
168
-
169
- First Citizen:
170
- Your belly's answer? What!
171
- The kingly-crowned head, the vigilant eye,
172
- The counsellor heart, the arm our soldier,
173
- Our steed the leg, the tongue our trumpeter.
174
- With other muniments and petty helps
175
- In this our fabric, if that they--
176
-
177
- MENENIUS:
178
- What then?
179
- 'Fore me, this fellow speaks! What then? what then?
180
-
181
- First Citizen:
182
- Should by the cormorant belly be restrain'd,
183
- Who is the sink o' the body,--
184
-
185
- MENENIUS:
186
- Well, what then?
187
-
188
- First Citizen:
189
- The former agents, if they did complain,
190
- What could the belly answer?
191
-
192
- MENENIUS:
193
- I will tell you
194
- If you'll bestow a small--of what you have little--
195
- Patience awhile, you'll hear the belly's answer.
196
-
197
- First Citizen:
198
- Ye're long about it.
199
-
200
- MENENIUS:
201
- Note me this, good friend;
202
- Your most grave belly was deliberate,
203
- Not rash like his accusers, and thus answer'd:
204
- 'True is it, my incorporate friends,' quoth he,
205
- 'That I receive the general food at first,
206
- Which you do live upon; and fit it is,
207
- Because I am the store-house and the shop
208
- Of the whole body: but, if you do remember,
209
- I send it through the rivers of your blood,
210
- Even to the court, the heart, to the seat o' the brain;
211
- And, through the cranks and offices of man,
212
- The strongest nerves and small inferior veins
213
- From me receive that natural competency
214
- Whereby they live: and though that all at once,
215
- You, my good friends,'--this says the belly, mark me,--
216
-
217
- First Citizen:
218
- Ay, sir; well, well.
219
-
220
- MENENIUS:
221
- 'Though all at once cannot
222
- See what I do deliver out to each,
223
- Yet I can make my audit up, that all
224
- From me do back receive the flour of all,
225
- And leave me but the bran.' What say you to't?
226
-
227
- First Citizen:
228
- It was an answer: how apply you this?
229
-
230
- MENENIUS:
231
- The senators of Rome are this good belly,
232
- And you the mutinous members; for examine
233
- Their counsels and their cares, digest things rightly
234
- Touching the weal o' the common, you shall find
235
- No public benefit which you receive
236
- But it proceeds or comes from them to you
237
- And no way from yourselves. What do you think,
238
- You, the great toe of this assembly?
239
-
240
- First Citizen:
241
- I the great toe! why the great toe?
242
-
243
- MENENIUS:
244
- For that, being one o' the lowest, basest, poorest,
245
- Of this most wise rebellion, thou go'st foremost:
246
- Thou rascal, that art worst in blood to run,
247
- Lead'st first to win some vantage.
248
- But make you ready your stiff bats and clubs:
249
- Rome and her rats are at the point of battle;
250
- The one side must have bale.
251
- Hail, noble Marcius!
252
-
253
- MARCIUS:
254
- Thanks. What's the matter, you dissentious rogues,
255
- That, rubbing the poor itch of your opinion,
256
- Make yourselves scabs?
257
-
258
- First Citizen:
259
- We have ever your good word.
260
-
261
- MARCIUS:
262
- He that will give good words to thee will flatter
263
- Beneath abhorring. What would you have, you curs,
264
- That like nor peace nor war? the one affrights you,
265
- The other makes you proud. He that trusts to you,
266
- Where he should find you lions, finds you hares;
267
- Where foxes, geese: you are no surer, no,
268
- Than is the coal of fire upon the ice,
269
- Or hailstone in the sun. Your virtue is
270
- To make him worthy whose offence subdues him
271
- And curse that justice did it.
272
- Who deserves greatness
273
- Deserves your hate; and your affections are
274
- A sick man's appetite, who desires most that
275
- Which would increase his evil. He that depends
276
- Upon your favours swims with fins of lead
277
- And hews down oaks with rushes. Hang ye! Trust Ye?
278
- With every minute you do change a mind,
279
- And call him noble that was now your hate,
280
- Him vile that was your garland. What's the matter,
281
- That in these several places of the city
282
- You cry against the noble senate, who,
283
- Under the gods, keep you in awe, which else
284
- Would feed on one another? What's their seeking?
285
-
286
- MENENIUS:
287
- For corn at their own rates; whereof, they say,
288
- The city is well stored.
289
-
290
- MARCIUS:
291
- Hang 'em! They say!
292
- They'll sit by the fire, and presume to know
293
- What's done i' the Capitol; who's like to rise,
294
- Who thrives and who declines; side factions
295
- and give out
296
- Conjectural marriages; making parties strong
297
- And feebling such as stand not in their liking
298
- Below their cobbled shoes. They say there's
299
- grain enough!
300
- Would the nobility lay aside their ruth,
301
- And let me use my sword, I'll make a quarry
302
- With thousands of these quarter'd slaves, as high
303
- As I could pick my lance.
304
-
305
- MENENIUS:
306
- Nay, these are almost thoroughly persuaded;
307
- For though abundantly they lack discretion,
308
- Yet are they passing cowardly. But, I beseech you,
309
- What says the other troop?
310
-
311
- MARCIUS:
312
- They are dissolved: hang 'em!
313
- They said they were an-hungry; sigh'd forth proverbs,
314
- That hunger broke stone walls, that dogs must eat,
315
- That meat was made for mouths, that the gods sent not
316
- Corn for the rich men only: with these shreds
317
- They vented their complainings; which being answer'd,
318
- And a petition granted them, a strange one--
319
- To break the heart of generosity,
320
- And make bold power look pale--they threw their caps
321
- As they would hang them on the horns o' the moon,
322
- Shouting their emulation.
323
-
324
- MENENIUS:
325
- What is granted them?
326
-
327
- MARCIUS:
328
- Five tribunes to defend their vulgar wisdoms,
329
- Of their own choice: one's Junius Brutus,
330
- Sicinius Velutus, and I know not--'Sdeath!
331
- The rabble should have first unroof'd the city,
332
- Ere so prevail'd with me: it will in time
333
- Win upon power and throw forth greater themes
334
- For insurrection's arguing.
335
-
336
- MENENIUS:
337
- This is strange.
338
-
339
- MARCIUS:
340
- Go, get you home, you fragments!
341
-
342
- Messenger:
343
- Where's Caius Marcius?
344
-
345
- MARCIUS:
346
- Here: what's the matter?
347
-
348
- Messenger:
349
- The news is, sir, the Volsces are in arms.
350
-
351
- MARCIUS:
352
- I am glad on 't: then we shall ha' means to vent
353
- Our musty superfluity. See, our best elders.
354
-
355
- First Senator:
356
- Marcius, 'tis true that you have lately told us;
357
- The Volsces are in arms.
358
-
359
- MARCIUS:
360
- They have a leader,
361
- Tullus Aufidius, that will put you to 't.
362
- I sin in envying his nobility,
363
- And were I any thing but what I am,
364
- I would wish me only he.
365
-
366
- COMINIUS:
367
- You have fought together.
368
-
369
- MARCIUS:
370
- Were half to half the world by the ears and he.
371
- Upon my party, I'ld revolt to make
372
- Only my wars with him: he is a lion
373
- That I am proud to hunt.
374
-
375
- First Senator:
376
- Then, worthy Marcius,
377
- Attend upon Cominius to these wars.
378
-
379
- COMINIUS:
380
- It is your former promise.
381
-
382
- MARCIUS:
383
- Sir, it is;
384
- And I am constant. Titus Lartius, thou
385
- Shalt see me once more strike at Tullus' face.
386
- What, art thou stiff? stand'st out?
387
-
388
- TITUS:
389
- No, Caius Marcius;
390
- I'll lean upon one crutch and fight with t'other,
391
- Ere stay behind this business.
392
-
393
- MENENIUS:
394
- O, true-bred!
395
-
396
- First Senator:
397
- Your company to the Capitol; where, I know,
398
- Our greatest friends attend us.
399
-
400
- TITUS:
401
-
402
- COMINIUS:
403
- Noble Marcius!
404
-
405
- First Senator:
406
-
407
- MARCIUS:
408
- Nay, let them follow:
409
- The Volsces have much corn; take these rats thither
410
- To gnaw their garners. Worshipful mutiners,
411
- Your valour puts well forth: pray, follow.
412
-
413
- SICINIUS:
414
- Was ever man so proud as is this Marcius?
415
-
416
- BRUTUS:
417
- He has no equal.
418
-
419
- SICINIUS:
420
- When we were chosen tribunes for the people,--
421
-
422
- BRUTUS:
423
- Mark'd you his lip and eyes?
424
-
425
- SICINIUS:
426
- Nay. but his taunts.
427
-
428
- BRUTUS:
429
- Being moved, he will not spare to gird the gods.
430
-
431
- SICINIUS:
432
- Be-mock the modest moon.
433
-
434
- BRUTUS:
435
- The present wars devour him: he is grown
436
- Too proud to be so valiant.
437
-
438
- SICINIUS:
439
- Such a nature,
440
- Tickled with good success, disdains the shadow
441
- Which he treads on at noon: but I do wonder
442
- His insolence can brook to be commanded
443
- Under Cominius.
444
-
445
- BRUTUS:
446
- Fame, at the which he aims,
447
- In whom already he's well graced, can not
448
- Better be held nor more attain'd than by
449
- A place below the first: for what miscarries
450
- Shall be the general's fault, though he perform
451
- To the utmost of a man, and giddy censure
452
- Will then cry out of Marcius 'O if he
453
- Had borne the business!'
454
-
455
- SICINIUS:
456
- Besides, if things go well,
457
- Opinion that so sticks on Marcius shall
458
- Of his demerits rob Cominius.
459
-
460
- BRUTUS:
461
- Come:
462
- Half all Cominius' honours are to Marcius.
463
- Though Marcius earned them not, and all his faults
464
- To Marcius shall be honours, though indeed
465
- In aught he merit not.
466
-
467
- SICINIUS:
468
- Let's hence, and hear
469
- How the dispatch is made, and in what fashion,
470
- More than his singularity, he goes
471
- Upon this present action.
472
-
473
- BRUTUS:
474
- Lets along.
475
-
476
- First Senator:
477
- So, your opinion is, Aufidius,
478
- That they of Rome are entered in our counsels
479
- And know how we proceed.
480
-
481
- AUFIDIUS:
482
- Is it not yours?
483
- What ever have been thought on in this state,
484
- That could be brought to bodily act ere Rome
485
- Had circumvention? 'Tis not four days gone
486
- Since I heard thence; these are the words: I think
487
- I have the letter here; yes, here it is.
488
- 'They have press'd a power, but it is not known
489
- Whether for east or west: the dearth is great;
490
- The people mutinous; and it is rumour'd,
491
- Cominius, Marcius your old enemy,
492
- Who is of Rome worse hated than of you,
493
- And Titus Lartius, a most valiant Roman,
494
- These three lead on this preparation
495
- Whither 'tis bent: most likely 'tis for you:
496
- Consider of it.'
497
-
498
- First Senator:
499
- Our army's in the field
500
- We never yet made doubt but Rome was ready
501
- To answer us.
502
-
503
- AUFIDIUS:
504
- Nor did you think it folly
505
- To keep your great pretences veil'd till when
506
- They needs must show themselves; which
507
- in the hatching,
508
- It seem'd, appear'd to Rome. By the discovery.
509
- We shall be shorten'd in our aim, which was
510
- To take in many towns ere almost Rome
511
- Should know we were afoot.
512
-
513
- Second Senator:
514
- Noble Aufidius,
515
- Take your commission; hie you to your bands:
516
- Let us alone to guard Corioli:
517
- If they set down before 's, for the remove
518
- Bring your army; but, I think, you'll find
519
- They've not prepared for us.
520
-
521
- AUFIDIUS:
522
- O, doubt not that;
523
- I speak from certainties. Nay, more,
524
- Some parcels of their power are forth already,
525
- And only hitherward. I leave your honours.
526
- If we and Caius Marcius chance to meet,
527
- 'Tis sworn between us we shall ever strike
528
- Till one can do no more.
529
-
530
- All:
531
- The gods assist you!
532
-
533
- AUFIDIUS:
534
- And keep your honours safe!
535
-
536
- First Senator:
537
- Farewell.
538
-
539
- Second Senator:
540
- Farewell.
541
-
542
- All:
543
- Farewell.
544
-
545
- VOLUMNIA:
546
- I pray you, daughter, sing; or express yourself in a
547
- more comfortable sort: if my son were my husband, I
548
- should freelier rejoice in that absence wherein he
549
- won honour than in the embracements of his bed where
550
- he would show most love. When yet he was but
551
- tender-bodied and the only son of my womb, when
552
- youth with comeliness plucked all gaze his way, when
553
- for a day of kings' entreaties a mother should not
554
- sell him an hour from her beholding, I, considering
555
- how honour would become such a person. that it was
556
- no better than picture-like to hang by the wall, if
557
- renown made it not stir, was pleased to let him seek
558
- danger where he was like to find fame. To a cruel
559
- war I sent him; from whence he returned, his brows
560
- bound with oak. I tell thee, daughter, I sprang not
561
- more in joy at first hearing he was a man-child
562
- than now in first seeing he had proved himself a
563
- man.
564
-
565
- VIRGILIA:
566
- But had he died in the business, madam; how then?
567
-
568
- VOLUMNIA:
569
- Then his good report should have been my son; I
570
- therein would have found issue. Hear me profess
571
- sincerely: had I a dozen sons, each in my love
572
- alike and none less dear than thine and my good
573
- Marcius, I had rather had eleven die nobly for their
574
- country than one voluptuously surfeit out of action.
575
-
576
- Gentlewoman:
577
- Madam, the Lady Valeria is come to visit you.
578
-
579
- VIRGILIA:
580
- Beseech you, give me leave to retire myself.
581
-
582
- VOLUMNIA:
583
- Indeed, you shall not.
584
- Methinks I hear hither your husband's drum,
585
- See him pluck Aufidius down by the hair,
586
- As children from a bear, the Volsces shunning him:
587
- Methinks I see him stamp thus, and call thus:
588
- 'Come on, you cowards! you were got in fear,
589
- Though you were born in Rome:' his bloody brow
590
- With his mail'd hand then wiping, forth he goes,
591
- Like to a harvest-man that's task'd to mow
592
- Or all or lose his hire.
593
-
594
- VIRGILIA:
595
- His bloody brow! O Jupiter, no blood!
596
-
597
- VOLUMNIA:
598
- Away, you fool! it more becomes a man
599
- Than gilt his trophy: the breasts of Hecuba,
600
- When she did suckle Hector, look'd not lovelier
601
- Than Hector's forehead when it spit forth blood
602
- At Grecian sword, contemning. Tell Valeria,
603
- We are fit to bid her welcome.
604
-
605
- VIRGILIA:
606
- Heavens bless my lord from fell Aufidius!
607
-
608
- VOLUMNIA:
609
- He'll beat Aufidius 'head below his knee
610
- And tread upon his neck.
611
-
612
- VALERIA:
613
- My ladies both, good day to you.
614
-
615
- VOLUMNIA:
616
- Sweet madam.
617
-
618
- VIRGILIA:
619
- I am glad to see your ladyship.
620
-
621
- VALERIA:
622
- How do you both? you are manifest house-keepers.
623
- What are you sewing here? A fine spot, in good
624
- faith. How does your little son?
625
-
626
- VIRGILIA:
627
- I thank your ladyship; well, good madam.
628
-
629
- VOLUMNIA:
630
- He had rather see the swords, and hear a drum, than
631
- look upon his school-master.
632
-
633
- VALERIA:
634
- O' my word, the father's son: I'll swear,'tis a
635
- very pretty boy. O' my troth, I looked upon him o'
636
- Wednesday half an hour together: has such a
637
- confirmed countenance. I saw him run after a gilded
638
- butterfly: and when he caught it, he let it go
639
- again; and after it again; and over and over he
640
- comes, and again; catched it again; or whether his
641
- fall enraged him, or how 'twas, he did so set his
642
- teeth and tear it; O, I warrant it, how he mammocked
643
- it!
644
-
645
- VOLUMNIA:
646
- One on 's father's moods.
647
-
648
- VALERIA:
649
- Indeed, la, 'tis a noble child.
650
-
651
- VIRGILIA:
652
- A crack, madam.
653
-
654
- VALERIA:
655
- Come, lay aside your stitchery; I must have you play
656
- the idle husewife with me this afternoon.
657
-
658
- VIRGILIA:
659
- No, good madam; I will not out of doors.
660
-
661
- VALERIA:
662
- Not out of doors!
663
-
664
- VOLUMNIA:
665
- She shall, she shall.
666
-
667
- VIRGILIA:
668
- Indeed, no, by your patience; I'll not over the
669
- threshold till my lord return from the wars.
670
-
671
- VALERIA:
672
- Fie, you confine yourself most unreasonably: come,
673
- you must go visit the good lady that lies in.
674
-
675
- VIRGILIA:
676
- I will wish her speedy strength, and visit her with
677
- my prayers; but I cannot go thither.
678
-
679
- VOLUMNIA:
680
- Why, I pray you?
681
-
682
- VIRGILIA:
683
- 'Tis not to save labour, nor that I want love.
684
-
685
- VALERIA:
686
- You would be another Penelope: yet, they say, all
687
- the yarn she spun in Ulysses' absence did but fill
688
- Ithaca full of moths. Come; I would your cambric
689
- were sensible as your finger, that you might leave
690
- pricking it for pity. Come, you shall go with us.
691
-
692
- VIRGILIA:
693
- No, good madam, pardon me; indeed, I will not forth.
694
-
695
- VALERIA:
696
- In truth, la, go with me; and I'll tell you
697
- excellent news of your husband.
698
-
699
- VIRGILIA:
700
- O, good madam, there can be none yet.
701
-
702
- VALERIA:
703
- Verily, I do not jest with you; there came news from
704
- him last night.
705
-
706
- VIRGILIA:
707
- Indeed, madam?
708
-
709
- VALERIA:
710
- In earnest, it's true; I heard a senator speak it.
711
- Thus it is: the Volsces have an army forth; against
712
- whom Cominius the general is gone, with one part of
713
- our Roman power: your lord and Titus Lartius are set
714
- down before their city Corioli; they nothing doubt
715
- prevailing and to make it brief wars. This is true,
716
- on mine honour; and so, I pray, go with us.
717
-
718
- VIRGILIA:
719
- Give me excuse, good madam; I will obey you in every
720
- thing hereafter.
721
-
722
- VOLUMNIA:
723
- Let her alone, lady: as she is now, she will but
724
- disease our better mirth.
725
-
726
- VALERIA:
727
- In troth, I think she would. Fare you well, then.
728
- Come, good sweet lady. Prithee, Virgilia, turn thy
729
- solemness out o' door. and go along with us.
730
-
731
- VIRGILIA:
732
- No, at a word, madam; indeed, I must not. I wish
733
- you much mirth.
734
-
735
- VALERIA:
736
- Well, then, farewell.
737
-
738
- MARCIUS:
739
- Yonder comes news. A wager they have met.
740
-
741
- LARTIUS:
742
- My horse to yours, no.
743
-
744
- MARCIUS:
745
- 'Tis done.
746
-
747
- LARTIUS:
748
- Agreed.
749
-
750
- MARCIUS:
751
- Say, has our general met the enemy?
752
-
753
- Messenger:
754
- They lie in view; but have not spoke as yet.
755
-
756
- LARTIUS:
757
- So, the good horse is mine.
758
-
759
- MARCIUS:
760
- I'll buy him of you.
761
-
762
- LARTIUS:
763
- No, I'll nor sell nor give him: lend you him I will
764
- For half a hundred years. Summon the town.
765
-
766
- MARCIUS:
767
- How far off lie these armies?
768
-
769
- Messenger:
770
- Within this mile and half.
771
-
772
- MARCIUS:
773
- Then shall we hear their 'larum, and they ours.
774
- Now, Mars, I prithee, make us quick in work,
775
- That we with smoking swords may march from hence,
776
- To help our fielded friends! Come, blow thy blast.
777
- Tutus Aufidius, is he within your walls?
778
-
779
- First Senator:
780
- No, nor a man that fears you less than he,
781
- That's lesser than a little.
782
- Hark! our drums
783
- Are bringing forth our youth. We'll break our walls,
784
- Rather than they shall pound us up: our gates,
785
- Which yet seem shut, we, have but pinn'd with rushes;
786
- They'll open of themselves.
787
- Hark you. far off!
788
- There is Aufidius; list, what work he makes
789
- Amongst your cloven army.
790
-
791
- MARCIUS:
792
- O, they are at it!
793
-
794
- LARTIUS:
795
- Their noise be our instruction. Ladders, ho!
796
-
797
- MARCIUS:
798
- They fear us not, but issue forth their city.
799
- Now put your shields before your hearts, and fight
800
- With hearts more proof than shields. Advance,
801
- brave Titus:
802
- They do disdain us much beyond our thoughts,
803
- Which makes me sweat with wrath. Come on, my fellows:
804
- He that retires I'll take him for a Volsce,
805
- And he shall feel mine edge.
806
-
807
- MARCIUS:
808
- All the contagion of the south light on you,
809
- You shames of Rome! you herd of--Boils and plagues
810
- Plaster you o'er, that you may be abhorr'd
811
- Further than seen and one infect another
812
- Against the wind a mile! You souls of geese,
813
- That bear the shapes of men, how have you run
814
- From slaves that apes would beat! Pluto and hell!
815
- All hurt behind; backs red, and faces pale
816
- With flight and agued fear! Mend and charge home,
817
- Or, by the fires of heaven, I'll leave the foe
818
- And make my wars on you: look to't: come on;
819
- If you'll stand fast, we'll beat them to their wives,
820
- As they us to our trenches followed.
821
- So, now the gates are ope: now prove good seconds:
822
- 'Tis for the followers fortune widens them,
823
- Not for the fliers: mark me, and do the like.
824
-
825
- First Soldier:
826
- Fool-hardiness; not I.
827
-
828
- Second Soldier:
829
- Nor I.
830
-
831
- First Soldier:
832
- See, they have shut him in.
833
-
834
- All:
835
- To the pot, I warrant him.
836
-
837
- LARTIUS:
838
- What is become of Marcius?
839
-
840
- All:
841
- Slain, sir, doubtless.
842
-
843
- First Soldier:
844
- Following the fliers at the very heels,
845
- With them he enters; who, upon the sudden,
846
- Clapp'd to their gates: he is himself alone,
847
- To answer all the city.
848
-
849
- LARTIUS:
850
- O noble fellow!
851
- Who sensibly outdares his senseless sword,
852
- And, when it bows, stands up. Thou art left, Marcius:
853
- A carbuncle entire, as big as thou art,
854
- Were not so rich a jewel. Thou wast a soldier
855
- Even to Cato's wish, not fierce and terrible
856
- Only in strokes; but, with thy grim looks and
857
- The thunder-like percussion of thy sounds,
858
- Thou madst thine enemies shake, as if the world
859
- Were feverous and did tremble.
860
-
861
- First Soldier:
862
- Look, sir.
863
-
864
- LARTIUS:
865
- O,'tis Marcius!
866
- Let's fetch him off, or make remain alike.
867
-
868
- First Roman:
869
- This will I carry to Rome.
870
-
871
- Second Roman:
872
- And I this.
873
-
874
- Third Roman:
875
- A murrain on't! I took this for silver.
876
-
877
- MARCIUS:
878
- See here these movers that do prize their hours
879
- At a crack'd drachm! Cushions, leaden spoons,
880
- Irons of a doit, doublets that hangmen would
881
- Bury with those that wore them, these base slaves,
882
- Ere yet the fight be done, pack up: down with them!
883
- And hark, what noise the general makes! To him!
884
- There is the man of my soul's hate, Aufidius,
885
- Piercing our Romans: then, valiant Titus, take
886
- Convenient numbers to make good the city;
887
- Whilst I, with those that have the spirit, will haste
888
- To help Cominius.
889
-
890
- LARTIUS:
891
- Worthy sir, thou bleed'st;
892
- Thy exercise hath been too violent for
893
- A second course of fight.
894
-
895
- MARCIUS:
896
- Sir, praise me not;
897
- My work hath yet not warm'd me: fare you well:
898
- The blood I drop is rather physical
899
- Than dangerous to me: to Aufidius thus
900
- I will appear, and fight.
901
-
902
- LARTIUS:
903
- Now the fair goddess, Fortune,
904
- Fall deep in love with thee; and her great charms
905
- Misguide thy opposers' swords! Bold gentleman,
906
- Prosperity be thy page!
907
-
908
- MARCIUS:
909
- Thy friend no less
910
- Than those she placeth highest! So, farewell.
911
-
912
- LARTIUS:
913
- Thou worthiest Marcius!
914
- Go, sound thy trumpet in the market-place;
915
- Call thither all the officers o' the town,
916
- Where they shall know our mind: away!
917
-
918
- COMINIUS:
919
- Breathe you, my friends: well fought;
920
- we are come off
921
- Like Romans, neither foolish in our stands,
922
- Nor cowardly in retire: believe me, sirs,
923
- We shall be charged again. Whiles we have struck,
924
- By interims and conveying gusts we have heard
925
- The charges of our friends. Ye Roman gods!
926
- Lead their successes as we wish our own,
927
- That both our powers, with smiling
928
- fronts encountering,
929
- May give you thankful sacrifice.
930
- Thy news?
931
-
932
- Messenger:
933
- The citizens of Corioli have issued,
934
- And given to Lartius and to Marcius battle:
935
- I saw our party to their trenches driven,
936
- And then I came away.
937
-
938
- COMINIUS:
939
- Though thou speak'st truth,
940
- Methinks thou speak'st not well.
941
- How long is't since?
942
-
943
- Messenger:
944
- Above an hour, my lord.
945
-
946
- COMINIUS:
947
- 'Tis not a mile; briefly we heard their drums:
948
- How couldst thou in a mile confound an hour,
949
- And bring thy news so late?
950
-
951
- Messenger:
952
- Spies of the Volsces
953
- Held me in chase, that I was forced to wheel
954
- Three or four miles about, else had I, sir,
955
- Half an hour since brought my report.
956
-
957
- COMINIUS:
958
- Who's yonder,
959
- That does appear as he were flay'd? O gods
960
- He has the stamp of Marcius; and I have
961
- Before-time seen him thus.
962
-
963
- MARCIUS:
964
-
965
- COMINIUS:
966
- The shepherd knows not thunder from a tabour
967
- More than I know the sound of Marcius' tongue
968
- From every meaner man.
969
-
970
- MARCIUS:
971
- Come I too late?
972
-
973
- COMINIUS:
974
- Ay, if you come not in the blood of others,
975
- But mantled in your own.
976
-
977
- MARCIUS:
978
- O, let me clip ye
979
- In arms as sound as when I woo'd, in heart
980
- As merry as when our nuptial day was done,
981
- And tapers burn'd to bedward!
982
-
983
- COMINIUS:
984
- Flower of warriors,
985
- How is it with Titus Lartius?
986
-
987
- MARCIUS:
988
- As with a man busied about decrees:
989
- Condemning some to death, and some to exile;
990
- Ransoming him, or pitying, threatening the other;
991
- Holding Corioli in the name of Rome,
992
- Even like a fawning greyhound in the leash,
993
- To let him slip at will.
994
-
995
- COMINIUS:
996
- Where is that slave
997
- Which told me they had beat you to your trenches?
998
- Where is he? call him hither.
999
-
1000
- MARCIUS:
1001
- Let him alone;
1002
- He did inform the truth: but for our gentlemen,
1003
- The common file--a plague! tribunes for them!--
1004
- The mouse ne'er shunn'd the cat as they did budge
1005
- From rascals worse than they.
1006
-
1007
- COMINIUS:
1008
- But how prevail'd you?
1009
-
1010
- MARCIUS:
1011
- Will the time serve to tell? I do not think.
1012
- Where is the enemy? are you lords o' the field?
1013
- If not, why cease you till you are so?
1014
-
1015
- COMINIUS:
1016
- Marcius,
1017
- We have at disadvantage fought and did
1018
- Retire to win our purpose.
1019
-
1020
- MARCIUS:
1021
- How lies their battle? know you on which side
1022
- They have placed their men of trust?
1023
-
1024
- COMINIUS:
1025
- As I guess, Marcius,
1026
- Their bands i' the vaward are the Antiates,
1027
- Of their best trust; o'er them Aufidius,
1028
- Their very heart of hope.
1029
-
1030
- MARCIUS:
1031
- I do beseech you,
1032
- By all the battles wherein we have fought,
1033
- By the blood we have shed together, by the vows
1034
- We have made to endure friends, that you directly
1035
- Set me against Aufidius and his Antiates;
1036
- And that you not delay the present, but,
1037
- Filling the air with swords advanced and darts,
1038
- We prove this very hour.
1039
-
1040
- COMINIUS:
1041
- Though I could wish
1042
- You were conducted to a gentle bath
1043
- And balms applied to, you, yet dare I never
1044
- Deny your asking: take your choice of those
1045
- That best can aid your action.
1046
-
1047
- MARCIUS:
1048
- Those are they
1049
- That most are willing. If any such be here--
1050
- As it were sin to doubt--that love this painting
1051
- Wherein you see me smear'd; if any fear
1052
- Lesser his person than an ill report;
1053
- If any think brave death outweighs bad life
1054
- And that his country's dearer than himself;
1055
- Let him alone, or so many so minded,
1056
- Wave thus, to express his disposition,
1057
- And follow Marcius.
1058
- O, me alone! make you a sword of me?
1059
- If these shows be not outward, which of you
1060
- But is four Volsces? none of you but is
1061
- Able to bear against the great Aufidius
1062
- A shield as hard as his. A certain number,
1063
- Though thanks to all, must I select
1064
- from all: the rest
1065
- Shall bear the business in some other fight,
1066
- As cause will be obey'd. Please you to march;
1067
- And four shall quickly draw out my command,
1068
- Which men are best inclined.
1069
-
1070
- COMINIUS:
1071
- March on, my fellows:
1072
- Make good this ostentation, and you shall
1073
- Divide in all with us.
1074
-
1075
- LARTIUS:
1076
- So, let the ports be guarded: keep your duties,
1077
- As I have set them down. If I do send, dispatch
1078
- Those centuries to our aid: the rest will serve
1079
- For a short holding: if we lose the field,
1080
- We cannot keep the town.
1081
-
1082
- Lieutenant:
1083
- Fear not our care, sir.
1084
-
1085
- LARTIUS:
1086
- Hence, and shut your gates upon's.
1087
- Our guider, come; to the Roman camp conduct us.
1088
-
1089
- MARCIUS:
1090
- I'll fight with none but thee; for I do hate thee
1091
- Worse than a promise-breaker.
1092
-
1093
- AUFIDIUS:
1094
- We hate alike:
1095
- Not Afric owns a serpent I abhor
1096
- More than thy fame and envy. Fix thy foot.
1097
-
1098
- MARCIUS:
1099
- Let the first budger die the other's slave,
1100
- And the gods doom him after!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
text_file_eng_long.txt DELETED
The diff for this file is too large to render. See raw diff
 
tokenizer.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import regex as re
3
+ from typing import List, Tuple
4
+
5
+ class HindiTokenizer:
6
+ def __init__(self, model_path: str = 'bpe_results.pkl'):
7
+ # Load the BPE model
8
+ with open(model_path, 'rb') as f:
9
+ self.merges, self.ids, self.num_merges = pickle.load(f)
10
+
11
+ # Initialize vocabulary
12
+ self.vocab = {idx: bytes([idx]) for idx in range(256)}
13
+ for (p0, p1), idx in self.merges.items():
14
+ self.vocab[idx] = self.vocab[p0] + self.vocab[p1]
15
+
16
+ # Hindi-focused pattern
17
+ self.pattern = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{N}+| ?(?:[\u0904-\u0939\u093d-\u093d\u0950-\u0950\u0958-\u0961\u0970-\u097f\ua8f2-\ua8fe\U00011b00-\U00011b09\u1cd3-\u1cd3\u1ce9-\u1cec\u1cee-\u1cf3\u1cf5-\u1cf6\u1cfa-\u1cfa][\u0900-\u0903\u093a-\u093c\u093e-\u094f\u0951-\u0957\u0962-\u0963\ua8e0-\ua8f1\ua8ff-\ua8ff\u1cd0-\u1cd2\u1cd4-\u1ce8\u1ced-\u1ced\u1cf4-\u1cf4\u1cf7-\u1cf9]*)+| ?\p{L}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""")
18
+
19
+ def tokenize(self, text: str) -> Tuple[List[int], List[str], List[str]]:
20
+ # Get initial tokens using regex
21
+ tokens = re.findall(self.pattern, text)
22
+
23
+ # Convert tokens to byte sequences and maintain grouping
24
+ byte_tokens = [token.encode('utf-8') for token in tokens]
25
+ token_list = [list(token) for token in byte_tokens]
26
+
27
+ # Process each token
28
+ final_tokens = []
29
+ for token in token_list:
30
+ current_token = list(token)
31
+ while len(current_token) >= 2:
32
+ stats = self._get_stats([current_token])
33
+ if not stats:
34
+ break
35
+ pair = min(stats, key=lambda p: self.merges.get(p, float("inf")))
36
+ if pair not in self.merges:
37
+ break
38
+ idx = self.merges[pair]
39
+ current_token = self._merge([current_token], pair, idx)[0]
40
+ final_tokens.extend(current_token)
41
+
42
+ # Decode the tokens
43
+ decoded_tokens = [self.vocab[idx].decode("utf-8", errors="replace") for idx in final_tokens]
44
+
45
+ return final_tokens, tokens, decoded_tokens
46
+
47
+ def _get_stats(self, token_list):
48
+ """Count frequency of pairs across all tokens"""
49
+ counts = {}
50
+ for token in token_list:
51
+ if len(token) < 2:
52
+ continue
53
+ for pair in zip(token, token[1:]):
54
+ counts[pair] = counts.get(pair, 0) + 1
55
+ return counts
56
+
57
+ def _merge(self, token_list, pair, idx):
58
+ """Merge all occurrences of pair within each token"""
59
+ newids = []
60
+ for token in token_list:
61
+ if len(token) < 2:
62
+ newids.append(token)
63
+ continue
64
+
65
+ new_token = []
66
+ i = 0
67
+ while i < len(token):
68
+ if i < len(token) - 1 and (token[i], token[i+1]) == pair:
69
+ new_token.append(idx)
70
+ i += 2
71
+ else:
72
+ new_token.append(token[i])
73
+ i += 1
74
+ newids.append(new_token)
75
+ return newids