tseronni commited on
Commit
09e191e
·
1 Parent(s): 1ab54ec

first commit

Browse files
Files changed (3) hide show
  1. app.py +51 -10
  2. chatgpt.py +99 -0
  3. requirements.txt +2 -1
app.py CHANGED
@@ -1,6 +1,9 @@
1
  import gradio as gr
2
  import pandas as pd
3
  from transformers import pipeline
 
 
 
4
 
5
  # theme = gr.themes.Monochrome(spacing_size=gr.themes.sizes.spacing_md,
6
  # radius_size=gr.themes.sizes.radius_sm,
@@ -16,16 +19,53 @@ from transformers import pipeline
16
  # df.to_csv('subsectors.csv', index=False)
17
 
18
  df = pd.read_csv('subsectors.csv')
19
- # df_column_width = [200, None, None]
 
 
 
 
 
 
 
 
 
 
20
 
21
 
22
  def click_button(model, abstract):
23
- # labels = {"Classificação 1": 0.75, "Classificação 2": 0.8, "Classificação 3": 0.2}
24
- classifier = pipeline("zero-shot-classification")
25
  labels = df['Subsector'].tolist()
26
- result = classifier(abstract, labels)
27
- return {label: round(prob, 4) for label, prob in zip(result["labels"], result["scores"])}
28
- # return labels
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
 
31
  def on_select(evt: gr.SelectData): # SelectData is a subclass of EventData
@@ -48,9 +88,10 @@ with gr.Blocks() as startup_genome_demo:
48
  with gr.Row():
49
  btn_get_result = gr.Button("Show classification")
50
  with gr.Row():
51
- label_result = gr.Label(num_top_classes=None)
52
- with gr.Row():
53
- reasoning = gr.Textbox(label="Reasoning", lines=5)
 
54
  with gr.Tab("Sector definitions"):
55
  with gr.Row():
56
  with gr.Column(scale=4):
@@ -66,7 +107,7 @@ with gr.Blocks() as startup_genome_demo:
66
  with gr.Tab("Logs"):
67
  pass
68
 
69
- btn_get_result.click(fn=click_button, inputs=[dropdown_model, abstract_description], outputs=[label_result])
70
  df_subsectors.select(fn=on_select, outputs=[subsector_name, s1_definition, s1_keywords, does_include, does_not_include])
71
 
72
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import pandas as pd
3
  from transformers import pipeline
4
+ from openai import OpenAI
5
+
6
+ from chatgpt import MessageChatCompletion
7
 
8
  # theme = gr.themes.Monochrome(spacing_size=gr.themes.sizes.spacing_md,
9
  # radius_size=gr.themes.sizes.radius_sm,
 
19
  # df.to_csv('subsectors.csv', index=False)
20
 
21
  df = pd.read_csv('subsectors.csv')
22
+
23
+
24
+ def build_context(row):
25
+ subsector_name = row['Subsector']
26
+ context = f"Subsector name: {subsector_name}. "
27
+ context += f"{subsector_name} Definition: {row['Definition']}. "
28
+ context += f"{subsector_name} keywords: {row['Keywords']}. "
29
+ context += f"{subsector_name} Does include: {row['Does include']}. "
30
+ context += f"{subsector_name} Does not include: {row['Does not include']}.\n"
31
+
32
+ return context
33
 
34
 
35
  def click_button(model, abstract):
36
+
37
+ classifier = pipeline("zero-shot-classification", model="sileod/deberta-v3-base-tasksource-nli")
38
  labels = df['Subsector'].tolist()
39
+ result = classifier(abstract, labels, multi_label=True)
40
+ # best_x_labels = [label for label in result["labels"]][0:5]
41
+ # df_best = df[df.Subsector.isin(best_x_labels)]
42
+ # contexts = [build_context(row) for _, row in df_best.iterrows()]
43
+
44
+ contexts = [build_context(row) for _, row in df.iterrows()]
45
+
46
+ my_chatgpt = MessageChatCompletion(model='gpt-4')
47
+
48
+ system_message = ('You are a system that will receive an patent abstract and needs to classify in one or more patent subsectors.'
49
+ 'You need to consider that each subsector has an name, definition, keywords, Does include and Does not included.'
50
+ 'Definition describe the subsector. '
51
+ 'The Keywords are important words for that subsector. '
52
+ 'Does include are words that can be included.'
53
+ 'Does not include are words that can not be in the patent abstract that is been classifying.'
54
+ 'nan will be consider as empty.'
55
+ 'Your answer will be subsector: the subsector result name and reasoning: The conclusion why you classify in that subsector specifying if has keywords and does include.'
56
+ 'Folow the subsectors:'
57
+ f'{contexts}')
58
+
59
+ user_message = f'Classify this patent abstract: {abstract}'
60
+
61
+ my_chatgpt.new_system_message(content=system_message)
62
+ my_chatgpt.new_user_message(content=user_message)
63
+ my_chatgpt.send_message()
64
+
65
+ reasoning = my_chatgpt.get_last_message()
66
+
67
+ return {label: round(prob, 4) for label, prob in zip(result["labels"], result["scores"])}, reasoning
68
+
69
 
70
 
71
  def on_select(evt: gr.SelectData): # SelectData is a subclass of EventData
 
88
  with gr.Row():
89
  btn_get_result = gr.Button("Show classification")
90
  with gr.Row():
91
+ with gr.Column(scale=4):
92
+ label_result = gr.Label(num_top_classes=None)
93
+ with gr.Column(scale=6):
94
+ reasoning = gr.Textbox(label="Reasoning", lines=5)
95
  with gr.Tab("Sector definitions"):
96
  with gr.Row():
97
  with gr.Column(scale=4):
 
107
  with gr.Tab("Logs"):
108
  pass
109
 
110
+ btn_get_result.click(fn=click_button, inputs=[dropdown_model, abstract_description], outputs=[label_result, reasoning])
111
  df_subsectors.select(fn=on_select, outputs=[subsector_name, s1_definition, s1_keywords, does_include, does_not_include])
112
 
113
  if __name__ == "__main__":
chatgpt.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+
3
+
4
+ class MessageChatCompletion:
5
+ def __init__(self,
6
+ model: str = 'gpt-3.5-turbo',
7
+ message: str = '',
8
+ api_key: str = '',
9
+ temperature: float = 0.07,
10
+ top_p: float = 1.0,
11
+ n: int = 1,
12
+ stream: bool = False,
13
+ stop: str = "\n",
14
+ max_tokens: int = 256,
15
+ presence_penalty: float = 0.0,
16
+ frequency_penalty: float = 0.0,
17
+ logit_bias: int = None,
18
+ user: str = ''):
19
+
20
+ openai.api_key = ''
21
+ openai.organization = ""
22
+
23
+ if model in ["gpt-4", "gpt-4-turbo-preview", "gpt-3.5-turbo"]:
24
+ self.endpoint = "https://api.openai.com/v1/chat/completions"
25
+ else:
26
+ self.endpoint = "https://api.openai.com/v1/completions"
27
+
28
+ self.headers = {
29
+ "Content-Type": "application/json",
30
+ "Authorization": f"Bearer {api_key}",
31
+ }
32
+
33
+ self.prompt = {
34
+ "model": model,
35
+ "messages": [],
36
+ "temperature": temperature,
37
+ "top_p": top_p,
38
+ "n": n,
39
+ "stream": stream,
40
+ "stop": stop,
41
+ "presence_penalty": presence_penalty,
42
+ "frequency_penalty": frequency_penalty
43
+ }
44
+
45
+ if max_tokens is not None:
46
+ self.prompt["max_tokens"] = max_tokens
47
+
48
+ if logit_bias is not None:
49
+ self.prompt["logit_bias"] = logit_bias
50
+
51
+ if user != '':
52
+ self.prompt["user"] = user
53
+
54
+ if message != '':
55
+ self.new_user_message(content=message)
56
+
57
+ self.response = ''
58
+
59
+ def new_message(self, role: str = 'user', content: str = '', name: str = ''):
60
+ new_message = {"role": role, "content": f"{content}"}
61
+ if name != '':
62
+ new_message['name'] = name
63
+
64
+ self.prompt['messages'].append(new_message)
65
+
66
+ def new_user_message(self, content: str = '', name: str = ''):
67
+ self.new_message(role='user', content=content, name=name)
68
+
69
+ def new_system_message(self, content: str = '', name: str = ''):
70
+ self.new_message(role='system', content=content, name=name)
71
+
72
+ def new_assistant_message(self, content: str = '', name: str = ''):
73
+ self.new_message(role='assistant', content=content, name=name)
74
+
75
+ def get_last_message(self):
76
+ return self.prompt['messages'][-1]['content']
77
+
78
+ def send_message(self):
79
+
80
+ response = openai.chat.completions.create(
81
+ model=self.prompt['model'],
82
+ messages=self.prompt['messages'],
83
+ frequency_penalty=self.prompt['frequency_penalty'],
84
+ temperature=self.prompt['temperature'],
85
+ max_tokens=self.prompt['max_tokens'],
86
+ top_p=self.prompt['top_p'],
87
+ presence_penalty=self.prompt['presence_penalty'],
88
+ stream=True
89
+ )
90
+
91
+ full_response = ""
92
+ for chunk in response:
93
+ chunk_message = chunk.choices[0].delta.content
94
+ if chunk_message is not None:
95
+ full_response += chunk_message
96
+
97
+ self.new_system_message(content=full_response)
98
+
99
+ return self.response
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  gradio
2
  plotly
3
- sentence-transformers
 
 
1
  gradio
2
  plotly
3
+ sentence-transformers
4
+ openai