kibrq commited on
Commit
bad1bc5
·
1 Parent(s): 623e9da

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +50 -0
README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ To load this model, use the following code:
2
+ ```py
3
+ from transformers import PreTrainedTokenizerFast, AutoModelForCausalLM, AutoConfig
4
+
5
+ tokenizer = PreTrainedTokenizerFast.from_pretrained('kibrq/greedy-intersection')
6
+
7
+ config = AutoConfig.from_pretrained('kibrq/greedy-intersection', trust_remote_code = True)
8
+ config._from_tokenizer(freegroup_dimension, tokenizer)
9
+
10
+ model = AutoModelForCausalLM.from_config(config, trust_remote_code = True)
11
+ ```
12
+
13
+ To generate words from the intersection, use this code:
14
+
15
+ ```py
16
+ from freegroup.sampling import free_group_bounded
17
+ from freegroup.tools import is_from_singleton_normal_closure
18
+
19
+ from freegroup.commutators import to_tokenizer, from_tokenizer
20
+
21
+ from itertools import islice
22
+
23
+ batch_size = 20
24
+ prefix_length = 15
25
+
26
+ generation_config = dict(
27
+ max_new_tokens = 200,
28
+ )
29
+
30
+ num_runs = 10
31
+
32
+ for _ in range(num_runs):
33
+
34
+ inputs = islice(free_group_bounded(3, max_length = prefix_length, random_length_method="constant"), batch_size)
35
+ inputs = list(map(to_tokenizer, input))
36
+ inputs = tokenizer(input, return_tensors='pt').input_ids
37
+
38
+ outputs = model.generate(
39
+ inputs = input,
40
+ **generation_config
41
+ )
42
+
43
+ outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)
44
+ outputs = map(from_tokenizer, outputs)
45
+
46
+ condition = lambda x: all(map(lambda gen: is_from_singleton_normal_closure(gen, x), [[1], [2], [3], [1, 2, 3]]))
47
+ outputs = filter(condition, outputs)
48
+
49
+ print(list(outputs))
50
+ ```