Hugo commited on
Commit
4e926ed
·
1 Parent(s): 506a8d3

content length

Browse files
Files changed (1) hide show
  1. app.py +26 -0
app.py CHANGED
@@ -5,10 +5,13 @@ import os
5
  import os.path
6
  import pickle
7
  import torch
 
8
 
9
  base_model_id = "meta-llama/Llama-3.2-3B-Instruct"
10
  model_id = "HiGenius/Headline-Generation-Model"
 
11
  hf_token = os.environ.get('HF_TOKEN')
 
12
 
13
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
14
 
@@ -19,9 +22,23 @@ def load_model():
19
  tokenizer = AutoTokenizer.from_pretrained(base_model_id, use_auth_token=hf_token)
20
  tokenizer.pad_token = tokenizer.eos_token
21
  tokenizer.padding_side='left'
 
22
 
23
  return tokenizer, model
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  tokenizer, model = load_model()
26
 
27
  guideline_path = "./guidelines.txt"
@@ -29,6 +46,15 @@ with open(guideline_path, 'r', encoding='utf-8') as f:
29
  guidelines = f.read()
30
 
31
  def process_prompt(tokenizer, content, video_summary = '', guidelines = None):
 
 
 
 
 
 
 
 
 
32
  if guidelines:
33
  system_prompt = "You are a helpful assistant that writes engaging headlines. To maximize engagement, you may follow these proven guidelines:\n" + guidelines
34
  else:
 
5
  import os.path
6
  import pickle
7
  import torch
8
+ from openai import OpenAI
9
 
10
  base_model_id = "meta-llama/Llama-3.2-3B-Instruct"
11
  model_id = "HiGenius/Headline-Generation-Model"
12
+
13
  hf_token = os.environ.get('HF_TOKEN')
14
+ openai_api_key = os.environ.get('OPENAI_API_KEY')
15
 
16
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
17
 
 
22
  tokenizer = AutoTokenizer.from_pretrained(base_model_id, use_auth_token=hf_token)
23
  tokenizer.pad_token = tokenizer.eos_token
24
  tokenizer.padding_side='left'
25
+ tokenizer.truncation_side="left"
26
 
27
  return tokenizer, model
28
 
29
+ def summarize_content(content):
30
+ client = OpenAI(api_key=openai_api_key)
31
+ response = client.chat.completions.create(
32
+ model="gpt-4o",
33
+ messages=[
34
+ {"role": "system", "content": "Summarize the following article content concisely while preserving key information:"},
35
+ {"role": "user", "content": content}
36
+ ],
37
+ max_tokens=600,
38
+ temperature=0.3
39
+ )
40
+ return response.choices[0].message.content
41
+
42
  tokenizer, model = load_model()
43
 
44
  guideline_path = "./guidelines.txt"
 
46
  guidelines = f.read()
47
 
48
  def process_prompt(tokenizer, content, video_summary = '', guidelines = None):
49
+ # Check token lengths
50
+ content_tokens = len(tokenizer.encode(content))
51
+ total_tokens = content_tokens
52
+ if video_summary:
53
+ total_tokens += len(tokenizer.encode(video_summary))
54
+
55
+ if content_tokens > 850 or total_tokens > 900:
56
+ content = summarize_content(content)
57
+
58
  if guidelines:
59
  system_prompt = "You are a helpful assistant that writes engaging headlines. To maximize engagement, you may follow these proven guidelines:\n" + guidelines
60
  else: