Spaces:

nitinbhayana
/

testing

Sleeping

App Files Files Community

nitinbhayana commited on May 9, 2024

Commit

4a30f37

verified ·

1 Parent(s): 4785bef

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -14

app.py CHANGED Viewed

@@ -2,11 +2,11 @@ import streamlit as st
 import re
 # Use a pipeline as a high-level helper
-# Load model directly
-from transformers import AutoTokenizer, AutoModelForCausalLM
-tokenizer = AutoTokenizer.from_pretrained("shivanikerai/TinyLlama-1.1B-Chat-v1.0-sku-title-ner-generation-reversed-v1.0")
-model = AutoModelForCausalLM.from_pretrained("shivanikerai/TinyLlama-1.1B-Chat-v1.0-sku-title-ner-generation-reversed-v1.0")
 # import requests
 # import os
 # token=os.environ.get("HUGGING_FACE_TOKEN")
@@ -19,6 +19,11 @@ model = AutoModelForCausalLM.from_pretrained("shivanikerai/TinyLlama-1.1B-Chat-v
 #     response = requests.post(API_URL, headers=headers, json=payload)
 #     return response.json()
 def convert_to_dictionary(input_string):
     try:
         input_string = input_string.replace('</s>', '')
@@ -126,24 +131,26 @@ def ner_title(title):
     B_in, E_in = "[Title]", "[/Title]"
     # Format your prompt template
     prompt = f"""{B_INST} {B_SYS} You are a helpful assistant that provides accurate and concise responses. {E_SYS}\nExtract named entities from the given product title. Provide the output in JSON format.\n{B_in} {title.strip()} {E_in}\n{E_INST}\n\n### NER Response:\n{{"{title.split()[0].lower()}"""
     # output = query({
     # "inputs": prompt,
     # })
-    encoding = tokenizer(prompt, return_tensors="pt").to("cuda:0")
-    output = model.generate(input_ids=encoding.input_ids,
-                            attention_mask=encoding.attention_mask,
-                            max_new_tokens=512,
-                            do_sample=True,
-                            temperature=0.01,
-                            eos_token_id=tokenizer.eos_token_id,
-                            top_k=0)
     # Subtract the length of input_ids from output to get only the model's response
-    output_text = tokenizer.decode(output[0, len(encoding.input_ids[0]):], skip_special_tokens=False)
-    output = re.sub('\n+', '\n', output_text)  # remove excessive newline characters
     #output = f"""{{\"{title.split()[0].lower()} {output_text}"""
     #output = re.sub(' ": "', '": "', output)

 import re
 # Use a pipeline as a high-level helper
+# # Load model directly
+# from transformers import AutoTokenizer, AutoModelForCausalLM
+# tokenizer = AutoTokenizer.from_pretrained("shivanikerai/TinyLlama-1.1B-Chat-v1.0-sku-title-ner-generation-reversed-v1.0")
+# model = AutoModelForCausalLM.from_pretrained("shivanikerai/TinyLlama-1.1B-Chat-v1.0-sku-title-ner-generation-reversed-v1.0")
 # import requests
 # import os
 # token=os.environ.get("HUGGING_FACE_TOKEN")
 #     response = requests.post(API_URL, headers=headers, json=payload)
 #     return response.json()
+# Use a pipeline as a high-level helper
+from transformers import pipeline
+pipe = pipeline("text-generation", model="shivanikerai/TinyLlama-1.1B-Chat-v1.0-sku-title-ner-generation-reversed-v1.0")
 def convert_to_dictionary(input_string):
     try:
         input_string = input_string.replace('</s>', '')
     B_in, E_in = "[Title]", "[/Title]"
     # Format your prompt template
     prompt = f"""{B_INST} {B_SYS} You are a helpful assistant that provides accurate and concise responses. {E_SYS}\nExtract named entities from the given product title. Provide the output in JSON format.\n{B_in} {title.strip()} {E_in}\n{E_INST}\n\n### NER Response:\n{{"{title.split()[0].lower()}"""
+    output= pipe(prompt)
     # output = query({
     # "inputs": prompt,
     # })
+    # encoding = tokenizer(prompt, return_tensors="pt").to("cuda:0")
+    # output = model.generate(input_ids=encoding.input_ids,
+    #                         attention_mask=encoding.attention_mask,
+    #                         max_new_tokens=512,
+    #                         do_sample=True,
+    #                         temperature=0.01,
+    #                         eos_token_id=tokenizer.eos_token_id,
+    #                         top_k=0)
     # Subtract the length of input_ids from output to get only the model's response
+    # output_text = tokenizer.decode(output[0, len(encoding.input_ids[0]):], skip_special_tokens=False)
+    # output = re.sub('\n+', '\n', output_text)  # remove excessive newline characters
     #output = f"""{{\"{title.split()[0].lower()} {output_text}"""
     #output = re.sub(' ": "', '": "', output)