nitinbhayana commited on
Commit
4a30f37
·
verified ·
1 Parent(s): 4785bef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -14
app.py CHANGED
@@ -2,11 +2,11 @@ import streamlit as st
2
  import re
3
 
4
  # Use a pipeline as a high-level helper
5
- # Load model directly
6
- from transformers import AutoTokenizer, AutoModelForCausalLM
7
 
8
- tokenizer = AutoTokenizer.from_pretrained("shivanikerai/TinyLlama-1.1B-Chat-v1.0-sku-title-ner-generation-reversed-v1.0")
9
- model = AutoModelForCausalLM.from_pretrained("shivanikerai/TinyLlama-1.1B-Chat-v1.0-sku-title-ner-generation-reversed-v1.0")
10
  # import requests
11
  # import os
12
  # token=os.environ.get("HUGGING_FACE_TOKEN")
@@ -19,6 +19,11 @@ model = AutoModelForCausalLM.from_pretrained("shivanikerai/TinyLlama-1.1B-Chat-v
19
  # response = requests.post(API_URL, headers=headers, json=payload)
20
  # return response.json()
21
 
 
 
 
 
 
22
  def convert_to_dictionary(input_string):
23
  try:
24
  input_string = input_string.replace('</s>', '')
@@ -126,24 +131,26 @@ def ner_title(title):
126
  B_in, E_in = "[Title]", "[/Title]"
127
  # Format your prompt template
128
  prompt = f"""{B_INST} {B_SYS} You are a helpful assistant that provides accurate and concise responses. {E_SYS}\nExtract named entities from the given product title. Provide the output in JSON format.\n{B_in} {title.strip()} {E_in}\n{E_INST}\n\n### NER Response:\n{{"{title.split()[0].lower()}"""
 
 
129
  # output = query({
130
  # "inputs": prompt,
131
  # })
132
 
133
- encoding = tokenizer(prompt, return_tensors="pt").to("cuda:0")
134
- output = model.generate(input_ids=encoding.input_ids,
135
- attention_mask=encoding.attention_mask,
136
- max_new_tokens=512,
137
- do_sample=True,
138
- temperature=0.01,
139
- eos_token_id=tokenizer.eos_token_id,
140
- top_k=0)
141
 
142
 
143
 
144
  # Subtract the length of input_ids from output to get only the model's response
145
- output_text = tokenizer.decode(output[0, len(encoding.input_ids[0]):], skip_special_tokens=False)
146
- output = re.sub('\n+', '\n', output_text) # remove excessive newline characters
147
  #output = f"""{{\"{title.split()[0].lower()} {output_text}"""
148
  #output = re.sub(' ": "', '": "', output)
149
 
 
2
  import re
3
 
4
  # Use a pipeline as a high-level helper
5
+ # # Load model directly
6
+ # from transformers import AutoTokenizer, AutoModelForCausalLM
7
 
8
+ # tokenizer = AutoTokenizer.from_pretrained("shivanikerai/TinyLlama-1.1B-Chat-v1.0-sku-title-ner-generation-reversed-v1.0")
9
+ # model = AutoModelForCausalLM.from_pretrained("shivanikerai/TinyLlama-1.1B-Chat-v1.0-sku-title-ner-generation-reversed-v1.0")
10
  # import requests
11
  # import os
12
  # token=os.environ.get("HUGGING_FACE_TOKEN")
 
19
  # response = requests.post(API_URL, headers=headers, json=payload)
20
  # return response.json()
21
 
22
+ # Use a pipeline as a high-level helper
23
+ from transformers import pipeline
24
+
25
+ pipe = pipeline("text-generation", model="shivanikerai/TinyLlama-1.1B-Chat-v1.0-sku-title-ner-generation-reversed-v1.0")
26
+
27
  def convert_to_dictionary(input_string):
28
  try:
29
  input_string = input_string.replace('</s>', '')
 
131
  B_in, E_in = "[Title]", "[/Title]"
132
  # Format your prompt template
133
  prompt = f"""{B_INST} {B_SYS} You are a helpful assistant that provides accurate and concise responses. {E_SYS}\nExtract named entities from the given product title. Provide the output in JSON format.\n{B_in} {title.strip()} {E_in}\n{E_INST}\n\n### NER Response:\n{{"{title.split()[0].lower()}"""
134
+ output= pipe(prompt)
135
+
136
  # output = query({
137
  # "inputs": prompt,
138
  # })
139
 
140
+ # encoding = tokenizer(prompt, return_tensors="pt").to("cuda:0")
141
+ # output = model.generate(input_ids=encoding.input_ids,
142
+ # attention_mask=encoding.attention_mask,
143
+ # max_new_tokens=512,
144
+ # do_sample=True,
145
+ # temperature=0.01,
146
+ # eos_token_id=tokenizer.eos_token_id,
147
+ # top_k=0)
148
 
149
 
150
 
151
  # Subtract the length of input_ids from output to get only the model's response
152
+ # output_text = tokenizer.decode(output[0, len(encoding.input_ids[0]):], skip_special_tokens=False)
153
+ # output = re.sub('\n+', '\n', output_text) # remove excessive newline characters
154
  #output = f"""{{\"{title.split()[0].lower()} {output_text}"""
155
  #output = re.sub(' ": "', '": "', output)
156