TKgumi commited on
Commit
c815242
·
verified ·
1 Parent(s): 1e9e4b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -7
app.py CHANGED
@@ -1,20 +1,29 @@
1
  from fastapi import FastAPI
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
3
 
4
- app = FastAPI()
 
 
 
 
 
 
 
 
 
5
 
6
- # 🚀 軽量な日本語モデルを選択(7B → 3.6B)
7
- MODEL_NAME = "rinna/japanese-gpt-neox-3.6b"
8
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
9
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
10
  generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
11
 
 
 
12
  @app.get("/")
13
  def root():
14
- return {"message": "DeepSeek API is running!"}
15
 
16
  @app.get("/generate")
17
  def generate(prompt: str, max_length: int = 100):
18
- result = generator(prompt, max_length=max_length, do_sample=True)[0]['generated_text']
19
  return {"response": result}
20
 
 
 
1
  from fastapi import FastAPI
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ import torch
4
 
5
+ MODEL_NAME = "EQUES/TinyDeepSeek-1.5B"
6
+
7
+ # メモリ最適化を適用
8
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ MODEL_NAME,
11
+ torch_dtype=torch.float16, # メモリ節約のため16bit
12
+ device_map="auto", # CPUメモリへ分割割り当て
13
+ low_cpu_mem_usage=True # 初期化時のメモリ削減
14
+ )
15
 
 
 
 
 
16
  generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
17
 
18
+ app = FastAPI()
19
+
20
  @app.get("/")
21
  def root():
22
+ return {"message": "TinyDeepSeek API is running!"}
23
 
24
  @app.get("/generate")
25
  def generate(prompt: str, max_length: int = 100):
26
+ result = generator(prompt, max_length=max_length)[0]['generated_text']
27
  return {"response": result}
28
 
29
+