Spaces:

jikoni
/

llamaSMS

Sleeping

Tri4 commited on Aug 11, 2024

Commit

fb7b6d3

verified ·

1 Parent(s): a4db3e3

Rename chatv2.py to main.py

Files changed (1) hide show

chatv2.py → main.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from flask import Flask, request, jsonify
 from huggingface_hub import InferenceClient
 # Initialize Flask app
 app = Flask(__name__)
@@ -47,11 +48,22 @@ def generate(prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, r
     )
     output = ""
-    for token in response:
-        if hasattr(token, 'token') and hasattr(token.token, 'text'):
-            output += token.token.text
-        else:
-            print(f"Unexpected token structure: {token}", flush=True)
     # Print AI response
     print(f"\nSema AI: {output}\n", flush=True)

 from flask import Flask, request, jsonify
 from huggingface_hub import InferenceClient
+import time
 # Initialize Flask app
 app = Flask(__name__)
     )
     output = ""
+    complete = False
+    # Use a loop to ensure response is fully received
+    while not complete:
+        for token in response:
+            if hasattr(token, 'token') and hasattr(token.token, 'text'):
+                output += token.token.text
+            else:
+                print(f"Unexpected token structure: {token}", flush=True)
+            # Check if the response seems complete
+            if token.token.text.endswith('</s>'):
+                complete = True
+                break
+        # Introduce a delay to handle streaming responses more smoothly
+        time.sleep(0.1)
     # Print AI response
     print(f"\nSema AI: {output}\n", flush=True)