Spaces:
Running
on
T4
Running
on
T4
Hugo
commited on
Commit
·
4e926ed
1
Parent(s):
506a8d3
content length
Browse files
app.py
CHANGED
@@ -5,10 +5,13 @@ import os
|
|
5 |
import os.path
|
6 |
import pickle
|
7 |
import torch
|
|
|
8 |
|
9 |
base_model_id = "meta-llama/Llama-3.2-3B-Instruct"
|
10 |
model_id = "HiGenius/Headline-Generation-Model"
|
|
|
11 |
hf_token = os.environ.get('HF_TOKEN')
|
|
|
12 |
|
13 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
14 |
|
@@ -19,9 +22,23 @@ def load_model():
|
|
19 |
tokenizer = AutoTokenizer.from_pretrained(base_model_id, use_auth_token=hf_token)
|
20 |
tokenizer.pad_token = tokenizer.eos_token
|
21 |
tokenizer.padding_side='left'
|
|
|
22 |
|
23 |
return tokenizer, model
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
tokenizer, model = load_model()
|
26 |
|
27 |
guideline_path = "./guidelines.txt"
|
@@ -29,6 +46,15 @@ with open(guideline_path, 'r', encoding='utf-8') as f:
|
|
29 |
guidelines = f.read()
|
30 |
|
31 |
def process_prompt(tokenizer, content, video_summary = '', guidelines = None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
if guidelines:
|
33 |
system_prompt = "You are a helpful assistant that writes engaging headlines. To maximize engagement, you may follow these proven guidelines:\n" + guidelines
|
34 |
else:
|
|
|
5 |
import os.path
|
6 |
import pickle
|
7 |
import torch
|
8 |
+
from openai import OpenAI
|
9 |
|
10 |
base_model_id = "meta-llama/Llama-3.2-3B-Instruct"
|
11 |
model_id = "HiGenius/Headline-Generation-Model"
|
12 |
+
|
13 |
hf_token = os.environ.get('HF_TOKEN')
|
14 |
+
openai_api_key = os.environ.get('OPENAI_API_KEY')
|
15 |
|
16 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
17 |
|
|
|
22 |
tokenizer = AutoTokenizer.from_pretrained(base_model_id, use_auth_token=hf_token)
|
23 |
tokenizer.pad_token = tokenizer.eos_token
|
24 |
tokenizer.padding_side='left'
|
25 |
+
tokenizer.truncation_side="left"
|
26 |
|
27 |
return tokenizer, model
|
28 |
|
29 |
+
def summarize_content(content):
|
30 |
+
client = OpenAI(api_key=openai_api_key)
|
31 |
+
response = client.chat.completions.create(
|
32 |
+
model="gpt-4o",
|
33 |
+
messages=[
|
34 |
+
{"role": "system", "content": "Summarize the following article content concisely while preserving key information:"},
|
35 |
+
{"role": "user", "content": content}
|
36 |
+
],
|
37 |
+
max_tokens=600,
|
38 |
+
temperature=0.3
|
39 |
+
)
|
40 |
+
return response.choices[0].message.content
|
41 |
+
|
42 |
tokenizer, model = load_model()
|
43 |
|
44 |
guideline_path = "./guidelines.txt"
|
|
|
46 |
guidelines = f.read()
|
47 |
|
48 |
def process_prompt(tokenizer, content, video_summary = '', guidelines = None):
|
49 |
+
# Check token lengths
|
50 |
+
content_tokens = len(tokenizer.encode(content))
|
51 |
+
total_tokens = content_tokens
|
52 |
+
if video_summary:
|
53 |
+
total_tokens += len(tokenizer.encode(video_summary))
|
54 |
+
|
55 |
+
if content_tokens > 850 or total_tokens > 900:
|
56 |
+
content = summarize_content(content)
|
57 |
+
|
58 |
if guidelines:
|
59 |
system_prompt = "You are a helpful assistant that writes engaging headlines. To maximize engagement, you may follow these proven guidelines:\n" + guidelines
|
60 |
else:
|