Spaces:

tseronni
/

startup_genome

Sleeping

App Files Files Community

tseronni commited on Feb 15, 2024

Commit

09e191e

1 Parent(s): 1ab54ec

first commit

Browse files

Files changed (3) hide show

app.py +51 -10
chatgpt.py +99 -0
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import gradio as gr
 import pandas as pd
 from transformers import pipeline
 # theme = gr.themes.Monochrome(spacing_size=gr.themes.sizes.spacing_md,
 #                              radius_size=gr.themes.sizes.radius_sm,
@@ -16,16 +19,53 @@ from transformers import pipeline
 # df.to_csv('subsectors.csv', index=False)
 df = pd.read_csv('subsectors.csv')
-# df_column_width = [200, None, None]
 def click_button(model, abstract):
-    # labels = {"Classificação 1": 0.75, "Classificação 2": 0.8, "Classificação 3": 0.2}
-    classifier = pipeline("zero-shot-classification")
     labels = df['Subsector'].tolist()
-    result = classifier(abstract, labels)
-    return {label: round(prob, 4) for label, prob in zip(result["labels"], result["scores"])}
-    # return labels
 def on_select(evt: gr.SelectData):  # SelectData is a subclass of EventData
@@ -48,9 +88,10 @@ with gr.Blocks() as startup_genome_demo:
         with gr.Row():
             btn_get_result = gr.Button("Show classification")
         with gr.Row():
-            label_result = gr.Label(num_top_classes=None)
-        with gr.Row():
-            reasoning = gr.Textbox(label="Reasoning", lines=5)
     with gr.Tab("Sector definitions"):
         with gr.Row():
             with gr.Column(scale=4):
@@ -66,7 +107,7 @@ with gr.Blocks() as startup_genome_demo:
     with gr.Tab("Logs"):
         pass
-    btn_get_result.click(fn=click_button, inputs=[dropdown_model, abstract_description], outputs=[label_result])
     df_subsectors.select(fn=on_select, outputs=[subsector_name, s1_definition, s1_keywords, does_include, does_not_include])
 if __name__ == "__main__":

 import gradio as gr
 import pandas as pd
 from transformers import pipeline
+from openai import OpenAI
+from chatgpt import MessageChatCompletion
 # theme = gr.themes.Monochrome(spacing_size=gr.themes.sizes.spacing_md,
 #                              radius_size=gr.themes.sizes.radius_sm,
 # df.to_csv('subsectors.csv', index=False)
 df = pd.read_csv('subsectors.csv')
+def build_context(row):
+    subsector_name = row['Subsector']
+    context = f"Subsector name: {subsector_name}. "
+    context += f"{subsector_name} Definition: {row['Definition']}. "
+    context += f"{subsector_name} keywords: {row['Keywords']}. "
+    context += f"{subsector_name} Does include: {row['Does include']}. "
+    context += f"{subsector_name} Does not include: {row['Does not include']}.\n"
+    return context
 def click_button(model, abstract):
+    classifier = pipeline("zero-shot-classification", model="sileod/deberta-v3-base-tasksource-nli")
     labels = df['Subsector'].tolist()
+    result = classifier(abstract, labels, multi_label=True)
+    # best_x_labels = [label for label in result["labels"]][0:5]
+    # df_best = df[df.Subsector.isin(best_x_labels)]
+    # contexts = [build_context(row) for _, row in df_best.iterrows()]
+    contexts = [build_context(row) for _, row in df.iterrows()]
+    my_chatgpt = MessageChatCompletion(model='gpt-4')
+    system_message = ('You are a system that will receive an patent abstract and needs to classify in one or more patent subsectors.'
+                      'You need to consider that each subsector has an name, definition, keywords, Does include and Does not included.'
+                      'Definition describe the subsector. '
+                      'The Keywords are important words for that subsector. '
+                      'Does include are words that can be included.'
+                      'Does not include are words that can not be in the patent abstract that is been classifying.'
+                      'nan will be consider as empty.'
+                      'Your answer will be subsector: the subsector result name and reasoning: The conclusion why you classify in that subsector specifying if has keywords and does include.'
+                      'Folow the subsectors:'
+                      f'{contexts}')
+    user_message = f'Classify this patent abstract: {abstract}'
+    my_chatgpt.new_system_message(content=system_message)
+    my_chatgpt.new_user_message(content=user_message)
+    my_chatgpt.send_message()
+    reasoning = my_chatgpt.get_last_message()
+    return {label: round(prob, 4) for label, prob in zip(result["labels"], result["scores"])}, reasoning
 def on_select(evt: gr.SelectData):  # SelectData is a subclass of EventData
         with gr.Row():
             btn_get_result = gr.Button("Show classification")
         with gr.Row():
+            with gr.Column(scale=4):
+                label_result = gr.Label(num_top_classes=None)
+            with gr.Column(scale=6):
+                reasoning = gr.Textbox(label="Reasoning", lines=5)
     with gr.Tab("Sector definitions"):
         with gr.Row():
             with gr.Column(scale=4):
     with gr.Tab("Logs"):
         pass
+    btn_get_result.click(fn=click_button, inputs=[dropdown_model, abstract_description], outputs=[label_result, reasoning])
     df_subsectors.select(fn=on_select, outputs=[subsector_name, s1_definition, s1_keywords, does_include, does_not_include])
 if __name__ == "__main__":

chatgpt.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import openai
+class MessageChatCompletion:
+    def __init__(self,
+                 model: str = 'gpt-3.5-turbo',
+                 message: str = '',
+                 api_key: str = '',
+                 temperature: float = 0.07,
+                 top_p: float = 1.0,
+                 n: int = 1,
+                 stream: bool = False,
+                 stop: str = "\n",
+                 max_tokens: int = 256,
+                 presence_penalty: float = 0.0,
+                 frequency_penalty: float = 0.0,
+                 logit_bias: int = None,
+                 user: str = ''):
+        openai.api_key = ''
+        openai.organization = ""
+        if model in ["gpt-4", "gpt-4-turbo-preview", "gpt-3.5-turbo"]:
+            self.endpoint = "https://api.openai.com/v1/chat/completions"
+        else:
+            self.endpoint = "https://api.openai.com/v1/completions"
+        self.headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {api_key}",
+        }
+        self.prompt = {
+            "model": model,
+            "messages": [],
+            "temperature": temperature,
+            "top_p": top_p,
+            "n": n,
+            "stream": stream,
+            "stop": stop,
+            "presence_penalty": presence_penalty,
+            "frequency_penalty": frequency_penalty
+        }
+        if max_tokens is not None:
+            self.prompt["max_tokens"] = max_tokens
+        if logit_bias is not None:
+            self.prompt["logit_bias"] = logit_bias
+        if user != '':
+            self.prompt["user"] = user
+        if message != '':
+            self.new_user_message(content=message)
+        self.response = ''
+    def new_message(self, role: str = 'user', content: str = '', name: str = ''):
+        new_message = {"role": role, "content": f"{content}"}
+        if name != '':
+            new_message['name'] = name
+        self.prompt['messages'].append(new_message)
+    def new_user_message(self, content: str = '', name: str = ''):
+        self.new_message(role='user', content=content, name=name)
+    def new_system_message(self, content: str = '', name: str = ''):
+        self.new_message(role='system', content=content, name=name)
+    def new_assistant_message(self, content: str = '', name: str = ''):
+        self.new_message(role='assistant', content=content, name=name)
+    def get_last_message(self):
+        return self.prompt['messages'][-1]['content']
+    def send_message(self):
+        response = openai.chat.completions.create(
+            model=self.prompt['model'],
+            messages=self.prompt['messages'],
+            frequency_penalty=self.prompt['frequency_penalty'],
+            temperature=self.prompt['temperature'],
+            max_tokens=self.prompt['max_tokens'],
+            top_p=self.prompt['top_p'],
+            presence_penalty=self.prompt['presence_penalty'],
+            stream=True
+        )
+        full_response = ""
+        for chunk in response:
+            chunk_message = chunk.choices[0].delta.content
+            if chunk_message is not None:
+                full_response += chunk_message
+        self.new_system_message(content=full_response)
+        return self.response

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 gradio
 plotly
-sentence-transformers

 gradio
 plotly
+sentence-transformers
+openai