Tri4 commited on
Commit
6ab5056
·
verified ·
1 Parent(s): d3e16e4

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +24 -5
main.py CHANGED
@@ -1,6 +1,11 @@
 
1
  from flask import Flask, request, jsonify
2
- from hugchat import hugchat
3
- from hugchat.login import Login
 
 
 
 
4
  import os
5
 
6
  app = Flask(__name__)
@@ -10,10 +15,24 @@ print("Hello welcome to Sema AI", flush=True) # Flush to ensure immediate outpu
10
  # Get Hugging Face credentials from environment variables
11
  email = os.getenv('HF_EMAIL')
12
  password = os.getenv('HF_PASS')
 
 
 
 
 
 
 
 
13
 
14
- #email = "[email protected]"
15
- #password = "@Tri@n@th@1"
16
- print(f"email is {email} and password is {password}", flush=True)
 
 
 
 
 
 
17
 
18
  @app.route("/")
19
  def hello():
 
1
+ from transformers import AutoModelForCausalLM, GemmaTokenizerFast, TextIteratorStreamer
2
  from flask import Flask, request, jsonify
3
+
4
+ from threading import Thread
5
+ from typing import Iterator
6
+
7
+ import spaces
8
+ import torch
9
  import os
10
 
11
  app = Flask(__name__)
 
15
  # Get Hugging Face credentials from environment variables
16
  email = os.getenv('HF_EMAIL')
17
  password = os.getenv('HF_PASS')
18
+ GEMMA_TOKEN = os.getenv("GEMMA_TOKEN")
19
+ #print(f"email is {email} and password is {password}", flush=True)
20
+
21
+ MAX_MAX_NEW_TOKENS = 2048
22
+ DEFAULT_MAX_NEW_TOKENS = 1024
23
+ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
24
+
25
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
26
 
27
+ model_id = "google/gemma-2-2b-it"
28
+ tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
29
+ model = AutoModelForCausalLM.from_pretrained(
30
+ model_id,
31
+ device_map="auto",
32
+ torch_dtype=torch.bfloat16,
33
+ )
34
+ model.config.sliding_window = 4096
35
+ model.eval()
36
 
37
  @app.route("/")
38
  def hello():