govisi commited on
Commit
5bd1756
·
1 Parent(s): 515138a
Files changed (1) hide show
  1. app.py +11 -6
app.py CHANGED
@@ -5,12 +5,20 @@ from discord.ext import commands
5
  from dotenv import load_dotenv
6
  from threading import Thread
7
  from rwkvstic.load import RWKV
8
- from rwkvstic.agnostic.backends import TORCH
9
  import torch
10
 
11
  load_dotenv()
12
  bot = commands.Bot("", intents=discord.Intents.all())
13
 
 
 
 
 
 
 
 
 
14
  # model_pipe = pickle.load(open('pipe.pkl', 'rb'))
15
  # this is the dtype used for trivial operations, such as vector->vector operations and is the dtype that will determine the accuracy of the model
16
  runtimedtype = torch.float32 # torch.float64, torch.bfloat16
@@ -25,11 +33,8 @@ useGPU = torch.cuda.is_available() # False
25
  async def on_ready():
26
  print(f'We have logged in as {bot.user}')
27
  global model
28
- model = RWKV("https://huggingface.co/BlinkDL/rwkv-4-pile-7b/resolve/main/RWKV-4-Pile-7B-20230109-ctx4096.pth",
29
- mode=TORCH,
30
- useGPU=useGPU,
31
- runtimedtype=runtimedtype,
32
- dtype=dtype)
33
 
34
 
35
  @bot.listen('on_message')
 
5
  from dotenv import load_dotenv
6
  from threading import Thread
7
  from rwkvstic.load import RWKV
8
+ from rwkvstic.agnostic.backends import TORCH, TORCH_QUANT
9
  import torch
10
 
11
  load_dotenv()
12
  bot = commands.Bot("", intents=discord.Intents.all())
13
 
14
+ quantized = {
15
+ "mode": TORCH_QUANT,
16
+ "runtimedtype": torch.bfloat16,
17
+ "useGPU": torch.cuda.is_available(),
18
+ "chunksize": 32, # larger = more accurate, but more memory
19
+ "target": 100 # your gpu max size, excess vram offloaded to cpu
20
+ }
21
+
22
  # model_pipe = pickle.load(open('pipe.pkl', 'rb'))
23
  # this is the dtype used for trivial operations, such as vector->vector operations and is the dtype that will determine the accuracy of the model
24
  runtimedtype = torch.float32 # torch.float64, torch.bfloat16
 
33
  async def on_ready():
34
  print(f'We have logged in as {bot.user}')
35
  global model
36
+ model = RWKV("https://huggingface.co/Hazzzardous/RWKV-8Bit/resolve/main/RWKV-4-Pile-7B-Instruct.pqth",
37
+ **quantized)
 
 
 
38
 
39
 
40
  @bot.listen('on_message')