Spaces:

Organika
/

merlin-chat-demo

Runtime error

App Files Files Community

Colby commited on Mar 11, 2024

Commit

03c5aaf

verified ·

1 Parent(s): 54033c4

Upload app.py

Browse files

Files changed (1) hide show

app.py +19 -70

app.py CHANGED Viewed

@@ -29,74 +29,13 @@ wiki_wiki = wikipediaapi.Wikipedia('Organika ([email protected])', 'en')
 # Use a pipeline as a high-level helper
 from transformers import pipeline
-topic_model = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-9")
-#model = pipeline("text-generation", model="Colby/StarCoder-3B-WoW-JSON", device=0)
-import requests
-# function for Huggingface API calls
-def query(payload, model_path, headers):
-    API_URL = "https://api-inference.huggingface.co/models/" + model_path
-    for retry in range(3):
-        response = requests.post(API_URL, headers=headers, json=payload)
-        if response.status_code == requests.codes.ok:
-            try:
-                results = response.json()
-                return results
-            except:
-                print('Invalid response received from server')
-                print(response)
-                return None
-        else:
-            # Not connected to internet maybe?
-            if response.status_code==404:
-                print('Are you connected to the internet?')
-                print('URL attempted = '+API_URL)
-                break
-            if response.status_code==503:
-                print(response.json())
-                continue
-            if response.status_code==504:
-                print('504 Gateway Timeout')
-            else:
-                print('Unsuccessful request, status code '+ str(response.status_code))
-                # print(response.json()) #debug only
-                print(payload)
-def generate_text(prompt, model_path, text_generation_parameters, headers):
-    start_time = time.time()
-    options = {'use_cache': False, 'wait_for_model': True}
-    payload = {"inputs": prompt, "parameters": text_generation_parameters, "options": options}
-    output_list = query(payload, model_path, headers)
-    if not output_list:
-        print('Generation failed')
-    end_time = time.time()
-    duration = round(end_time - start_time, 1)
-    stringlist = []
-    if output_list and 'generated_text' in output_list[0].keys():
-        print(f'{len(output_list)} sample(s) of text generated in {duration} seconds.')
-        for gendict in output_list:
-            stringlist.append(gendict['generated_text'])
-    else:
-        print(output_list)
-    return(stringlist)
-model_path = "Colby/StarCoder-1B-WoW-JSON"
-parameters = {
-    "max_new_tokens": 250,
-    "return_full_text": False,
-    "do_sample": True,
-    "temperature": 0.8,
-    "top_p": 0.9,
-    "top_k": 50,
-    "repetition_penalty": 1.1
-}
-headers = {"Authorization": "Bearer " + os.environ['HF_TOKEN']}
 def merlin_chat(message, history):
     chat_text = ""
     chat_list = []
-    for turn in history:
         chat_text += f"{turn[0]}\n\n{turn[1]}\n\n"
         chat_list.append({"role": "user", "content": turn[0]})
         chat_list.append({"role": "assistant", "content": turn[1]})
@@ -111,9 +50,12 @@ def merlin_chat(message, history):
                 continue
             if ent.text in ents_found:
                 continue
-            ents_found.append(ent.text.title())
     r.extract_keywords_from_text(chat_text)
-    ents_found = ents_found + r.get_ranked_phrases()[:3]
     context = ""
     scores = topic_model(chat_text, ents_found, multi_label=True)['scores']
     if ents_found:
@@ -135,6 +77,8 @@ def merlin_chat(message, history):
                     continue
                 else:
                     context += entsum + '\n\n'
     system_msg = {
         'role': 'system', 'content': context
     }
@@ -142,10 +86,12 @@ def merlin_chat(message, history):
     user_msg = {'role': 'user', 'content': message}
     chat_list.append(user_msg)
     prompt = json.dumps(chat_list)[:-1] + ",{\"role\": \"assistant\", \"content\": \""
     for attempt in range(3):
-        result = generate_text(prompt, model_path, parameters, headers)
-        response = result[0]
-        print(response) # so we can see it in logs
         start = 0
         end = 0
         cleanStr = response.lstrip()
@@ -161,7 +107,10 @@ def merlin_chat(message, history):
         message = messages[-1]
         if message['role'] != 'assistant':
             continue
         return message['content']
     return "🤔"
-gr.ChatInterface(merlin_chat).launch()

 # Use a pipeline as a high-level helper
 from transformers import pipeline
+topic_model = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-9", device=0)
+model = pipeline("text-generation", model="Colby/StarCoder-1B-WoW-JSON", device=0)
 def merlin_chat(message, history):
     chat_text = ""
     chat_list = []
+    for turn in history[-3:]:
         chat_text += f"{turn[0]}\n\n{turn[1]}\n\n"
         chat_list.append({"role": "user", "content": turn[0]})
         chat_list.append({"role": "assistant", "content": turn[1]})
                 continue
             if ent.text in ents_found:
                 continue
+            ents_found.append(ent.text.title().lower())
     r.extract_keywords_from_text(chat_text)
+    for phrase in r.get_ranked_phrases()[:3]:
+        phrase = phrase.lower()
+        if phrase not in ents_found:
+            ents_found.append(phrase)
     context = ""
     scores = topic_model(chat_text, ents_found, multi_label=True)['scores']
     if ents_found:
                     continue
                 else:
                     context += entsum + '\n\n'
+            else:
+                print("not found.")
     system_msg = {
         'role': 'system', 'content': context
     }
     user_msg = {'role': 'user', 'content': message}
     chat_list.append(user_msg)
     prompt = json.dumps(chat_list)[:-1] + ",{\"role\": \"assistant\", \"content\": \""
+    print(f"PROMPT: {prompt}")
     for attempt in range(3):
+        #result = generate_text(prompt, model_path, parameters, headers)
+        result = model(prompt,return_full_text=False, max_new_tokens=256, temperature=0.8, repetition_penalty=1.1)
+        response = result[0]['generated_text']
+        print(f"COMPLETION: {response}") # so we can see it in logs
         start = 0
         end = 0
         cleanStr = response.lstrip()
         message = messages[-1]
         if message['role'] != 'assistant':
             continue
+        msg_text = message['content']
+        if chat_text.find(msg_text) >= 0:
+            continue
         return message['content']
     return "🤔"
+gr.ChatInterface(merlin_chat).launch(share=True)