noddysnots commited on
Commit
4df7255
Β·
verified Β·
1 Parent(s): 48bf064

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -9
app.py CHANGED
@@ -3,20 +3,23 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
  import torch
4
  import requests
5
 
6
- # Load DeepSeek-R1 model
7
  model_name = "deepseek-ai/DeepSeek-R1"
8
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
9
 
10
- # Ensure the model uses float16 instead of fp8
11
- model = AutoModelForCausalLM.from_pretrained(
12
- model_name,
13
- torch_dtype=torch.float16, # Forces float16 to prevent fp8 issue
14
- device_map="auto",
15
- trust_remote_code=True
16
- )
 
 
 
17
 
18
  # Use a text-generation pipeline for better inference
19
- generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
20
 
21
 
22
  # 🎯 Function to extract interests from user input
 
3
  import torch
4
  import requests
5
 
6
+ # Load DeepSeek-R1 model with trust_remote_code enabled
7
  model_name = "deepseek-ai/DeepSeek-R1"
8
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
9
 
10
+ # Ensure compatibility with `flash_attn` and force proper dtype
11
+ try:
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ model_name,
14
+ torch_dtype=torch.float16, # Forces float16 to prevent fp8 issue
15
+ device_map="auto",
16
+ trust_remote_code=True
17
+ )
18
+ except ImportError as e:
19
+ raise RuntimeError("Missing required dependency: flash_attn. Install with `pip install flash_attn`") from e
20
 
21
  # Use a text-generation pipeline for better inference
22
+ generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
23
 
24
 
25
  # 🎯 Function to extract interests from user input