Spaces:

Organika
/

merlin-chat-demo

Runtime error

App Files Files Community

Colby commited on Feb 3, 2024

Commit

1a94aaa

verified ·

1 Parent(s): 675d880

Upload 2 files

Browse files

Add app and requirements

Files changed (2) hide show

app.py +95 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import json
+import gradio as gr
+import spacy
+spacy.cli.download('en_core_web_sm')
+nlp = spacy.load('en_core_web_sm')
+from rake_nltk import Rake
+r = Rake()
+import wikipediaapi
+wiki_wiki = wikipediaapi.Wikipedia('Organika ([email protected])', 'en')
+## ctransformers disabled for now
+# from ctransformers import AutoModelForCausalLM
+# model = AutoModelForCausalLM.from_pretrained(
+#     "Colby/StarCoder-3B-WoW-JSON",
+#     model_file="StarCoder-3B-WoW-JSON-ggml.bin",
+#     model_type="gpt_bigcode"
+#     )
+# Use a pipeline as a high-level helper
+from transformers import pipeline
+topic_model = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-9")
+model = pipeline("text-generation", model="Colby/StarCoder-3B-WoW-JSON")
+def merlin_chat(message, history):
+    chat_text = ""
+    chat_json = ""
+    for turn in history:
+        chat_text += f"USER: {turn[0]}\n\nASSISTANT: {turn[1]}\n\n"
+        chat_json += json.dumps({"role": "user", "content": turn[0]})
+        chat_json += json.dumps({"role": "assistant", "content": turn[1]})
+    chat_text += f"USER: {message}\n"
+    doc = nlp(chat_text)
+    ents_found = []
+    if doc.ents:
+        for ent in doc.ents:
+            if len(ents_found) == 3:
+                break
+            if ent.text.isnumeric() or ent.label in ["DATE","TIME","PERCENT","MONEY","QUANTITY","ORDINAL","CARDINAL"]:
+                continue
+            if ent.text in ents_found:
+                continue
+            ents_found.append(ent.text.title())
+    r.extract_keywords_from_text(chat_text)
+    ents_found = ents_found + r.get_ranked_phrases()[:3]
+    context = ""
+    scores = topic_model(context, ents_found, multi_label=True)['scores']
+    if ents_found:
+        max_score = 0
+        for k in range(len(ents_found)):
+            if scores[k] < 0.5:
+                continue
+            entity = ents_found[k]
+            if scores[k] > max_score:
+                max_score = scores[k]
+                max_topic = entity
+            print(f'# Looking up {entity} on Wikipedia... ', end='')
+            wiki_page = wiki_wiki.page(entity)
+            if wiki_page.exists():
+                print("page found... ")
+                entsum = wiki_page.summary
+                if "may refer to" in entsum or "may also refer to" in entsum:
+                    print(" ambiguous, skipping.")
+                    continue
+                else:
+                    context += entsum + '\n\n'
+    context
+    system_msg = {
+        'role': 'system', 'content': context + f'\n\nThe following is a conversation about {max_topic}.'
+    }
+    user_msg = {'role': 'user', 'content': message}
+    prompt = "[" + json.dumps(system_msg) + chat_json + json.dumps(user_msg) + "{'role': 'assistant, 'content': '*recalls \""
+    for attempt in range(3):
+        response = model(prompt, max_new_tokens=250, stop=["]"])
+        start = 0
+        end = 0
+        cleanStr = response.lstrip()
+        start = cleanStr.find('{') # this should skip over whatever it recalls to what it says next
+        if start<=0:
+            continue
+        cleanStr = cleanStr[start:]
+        end = cleanStr.find('}') + 1
+        if end<=0:
+            continue
+        cleanStr = cleanStr[:end]
+        message = json.loads(cleanStr)
+        if message['role'] != 'assistant':
+            continue
+        return message['content']
+    return "Sorry, I don't know what to say."
+gr.ChatInterface(merlin_chat).launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+transformers
+torch
+spacy
+rake_nltk
+wikipedia-api