Spaces:

govisi
/

anipins-streamlit

Runtime error

govisi commited on Feb 25, 2023

Commit

5bd1756

1 Parent(s): 515138a

testing

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,12 +5,20 @@ from discord.ext import commands
 from dotenv import load_dotenv
 from threading import Thread
 from rwkvstic.load import RWKV
-from rwkvstic.agnostic.backends import TORCH
 import torch
 load_dotenv()
 bot = commands.Bot("", intents=discord.Intents.all())
 # model_pipe = pickle.load(open('pipe.pkl', 'rb'))
 # this is the dtype used for trivial operations, such as vector->vector operations and is the dtype that will determine the accuracy of the model
 runtimedtype = torch.float32 # torch.float64, torch.bfloat16
@@ -25,11 +33,8 @@ useGPU = torch.cuda.is_available() # False
 async def on_ready():
     print(f'We have logged in as {bot.user}')
     global model
-    model = RWKV("https://huggingface.co/BlinkDL/rwkv-4-pile-7b/resolve/main/RWKV-4-Pile-7B-20230109-ctx4096.pth",
-                 mode=TORCH,
-                 useGPU=useGPU,
-                 runtimedtype=runtimedtype,
-                 dtype=dtype)
 @bot.listen('on_message')

 from dotenv import load_dotenv
 from threading import Thread
 from rwkvstic.load import RWKV
+from rwkvstic.agnostic.backends import TORCH, TORCH_QUANT
 import torch
 load_dotenv()
 bot = commands.Bot("", intents=discord.Intents.all())
+quantized = {
+    "mode": TORCH_QUANT,
+    "runtimedtype": torch.bfloat16,
+    "useGPU": torch.cuda.is_available(),
+    "chunksize": 32,  # larger = more accurate, but more memory
+    "target": 100  # your gpu max size, excess vram offloaded to cpu
+}
 # model_pipe = pickle.load(open('pipe.pkl', 'rb'))
 # this is the dtype used for trivial operations, such as vector->vector operations and is the dtype that will determine the accuracy of the model
 runtimedtype = torch.float32 # torch.float64, torch.bfloat16
 async def on_ready():
     print(f'We have logged in as {bot.user}')
     global model
+    model = RWKV("https://huggingface.co/Hazzzardous/RWKV-8Bit/resolve/main/RWKV-4-Pile-7B-Instruct.pqth",
+                 **quantized)
 @bot.listen('on_message')