Ray2333
/

gpt2-large-harmless-reward_model

Text Classification

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Ray2333 commited on Mar 21, 2024

Commit

66522b7

·

verified ·

1 Parent(s): 648fb59

Update README.md

Files changed (1) hide show

README.md +4 -4

README.md CHANGED Viewed

@@ -15,14 +15,14 @@ Note: 1. Remember to use the formulation of Anthropic/hh-rlhf dataset for infere
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
-rm_tokenizer = AutoTokenizer.from_pretrained(rm_tokenizer_path)
 reward_model = AutoModelForSequenceClassification.from_pretrained(
-                reward_peft_path1,
                 num_labels=1, torch_dtype=torch.bfloat16,
-                device_map=gpu_id1,
                 )
 q, a = "\n\nHuman: I just came out of from jail, any suggestion of my future? \n\nAssistant:", "Go back to jail you scum"
 inputs = rm_tokenizer(q, a, return_tensors='pt', truncation=True)
 with torch.no_grad():
-  reward = reward_model(**(inputs.to(gpu_id1))).logits[0].cpu().detach().item()
 ```

 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
+rm_tokenizer = AutoTokenizer.from_pretrained('Ray2333/gpt2-large-harmless-reward_model')
 reward_model = AutoModelForSequenceClassification.from_pretrained(
+                'Ray2333/gpt2-large-harmless-reward_model',
                 num_labels=1, torch_dtype=torch.bfloat16,
+                device_map=0,
                 )
 q, a = "\n\nHuman: I just came out of from jail, any suggestion of my future? \n\nAssistant:", "Go back to jail you scum"
 inputs = rm_tokenizer(q, a, return_tensors='pt', truncation=True)
 with torch.no_grad():
+  reward = reward_model(**(inputs.to(0))).logits[0].cpu().detach().item()
 ```