Tri4 commited on
Commit
2751952
·
verified ·
1 Parent(s): 7ea6aae

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +82 -0
main.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, Response, stream_with_context
2
+ from huggingface_hub import InferenceClient
3
+
4
+ # Initialize Flask app
5
+ app = Flask(__name__)
6
+
7
+ # Initialize InferenceClient
8
+ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
9
+
10
+ def format_prompt(message, history):
11
+ prompt = "<s>"
12
+ for user_prompt, bot_response in history:
13
+ prompt += f"[INST] {user_prompt} [/INST]"
14
+ prompt += f" {bot_response}</s> "
15
+ prompt += f"[INST] {message} [/INST]"
16
+ return prompt
17
+
18
+ def generate_stream(prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
19
+ temperature = float(temperature)
20
+ if temperature < 1e-2:
21
+ temperature = 1e-2
22
+ top_p = float(top_p)
23
+
24
+ generate_kwargs = dict(
25
+ temperature=temperature,
26
+ max_new_tokens=max_new_tokens,
27
+ top_p=top_p,
28
+ repetition_penalty=repetition_penalty,
29
+ do_sample=True,
30
+ seed=42,
31
+ )
32
+
33
+ formatted_prompt = format_prompt(prompt, history)
34
+
35
+ # Get response from Mistral model
36
+ response = client.text_generation(
37
+ formatted_prompt,
38
+ **generate_kwargs,
39
+ stream=True,
40
+ details=True,
41
+ return_full_text=False
42
+ )
43
+
44
+ def generate():
45
+ output = ""
46
+ try:
47
+ for token in response:
48
+ if hasattr(token, 'token') and hasattr(token.token, 'text'):
49
+ output += token.token.text
50
+ yield output # Yield intermediate response
51
+ else:
52
+ print(f"Unexpected token structure: {token}")
53
+ except Exception as e:
54
+ print(f"Error while processing streaming response: {str(e)}")
55
+
56
+ return generate
57
+
58
+ @app.route("/generate", methods=["POST"])
59
+ def generate_text():
60
+ data = request.json
61
+ prompt = data.get("prompt", "")
62
+ history = data.get("history", [])
63
+ temperature = data.get("temperature", 0.9)
64
+ max_new_tokens = data.get("max_new_tokens", 256)
65
+ top_p = data.get("top_p", 0.95)
66
+ repetition_penalty = data.get("repetition_penalty", 1.0)
67
+
68
+ try:
69
+ return Response(stream_with_context(generate_stream(
70
+ prompt,
71
+ history,
72
+ temperature=temperature,
73
+ max_new_tokens=max_new_tokens,
74
+ top_p=top_p,
75
+ repetition_penalty=repetition_penalty
76
+ )), content_type='text/plain')
77
+ except Exception as e:
78
+ print(f"Error: {str(e)}")
79
+ return jsonify({"error": str(e)}), 500
80
+
81
+ if __name__ == "__main__":
82
+ app.run(debug=True, port=5000)