initial commit
Browse files- few-shot.txt +24 -0
- tagging.py +7 -31
few-shot.txt
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Example #96
|
| 2 |
+
Tokens: ['Public']
|
| 3 |
+
Skill Labels: ['O']
|
| 4 |
+
Knowledge Labels: ['O']
|
| 5 |
+
|
| 6 |
+
Example #97
|
| 7 |
+
Tokens: ['Technologies']
|
| 8 |
+
Skill Labels: ['O']
|
| 9 |
+
Knowledge Labels: ['O']
|
| 10 |
+
|
| 11 |
+
Example #98
|
| 12 |
+
Tokens: ['cloud', 'java', 'amazon-web-services']
|
| 13 |
+
Skill Labels: ['O', 'O', 'O']
|
| 14 |
+
Knowledge Labels: ['B', 'B', 'B']
|
| 15 |
+
|
| 16 |
+
Example #99
|
| 17 |
+
Tokens: ['Job', 'description']
|
| 18 |
+
Skill Labels: ['O', 'O']
|
| 19 |
+
Knowledge Labels: ['O', 'O']
|
| 20 |
+
|
| 21 |
+
Example #100
|
| 22 |
+
Tokens: ['As', 'a', 'member', 'of', 'our', 'Software', 'Engineering', 'Group', 'we', 'look', 'first', 'and', 'foremost', 'for', 'people', 'who', 'are', 'passionate', 'about', 'solving', 'business', 'problems', 'through', 'innovation', 'and', 'engineering', 'practices', '.']
|
| 23 |
+
Skill Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'I', 'I', 'I', 'I', 'I', 'I', 'O']
|
| 24 |
+
Knowledge Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
|
tagging.py
CHANGED
|
@@ -1,31 +1,3 @@
|
|
| 1 |
-
few_shot_examples = """
|
| 2 |
-
Example #96
|
| 3 |
-
Tokens: ['Public']
|
| 4 |
-
Skill Labels: ['O']
|
| 5 |
-
Knowledge Labels: ['O']
|
| 6 |
-
|
| 7 |
-
Example #97
|
| 8 |
-
Tokens: ['Technologies']
|
| 9 |
-
Skill Labels: ['O']
|
| 10 |
-
Knowledge Labels: ['O']
|
| 11 |
-
|
| 12 |
-
Example #98
|
| 13 |
-
Tokens: ['cloud', 'java', 'amazon-web-services']
|
| 14 |
-
Skill Labels: ['O', 'O', 'O']
|
| 15 |
-
Knowledge Labels: ['B', 'B', 'B']
|
| 16 |
-
|
| 17 |
-
Example #99
|
| 18 |
-
Tokens: ['Job', 'description']
|
| 19 |
-
Skill Labels: ['O', 'O']
|
| 20 |
-
Knowledge Labels: ['O', 'O']
|
| 21 |
-
|
| 22 |
-
Example #100
|
| 23 |
-
Tokens: ['As', 'a', 'member', 'of', 'our', 'Software', 'Engineering', 'Group', 'we', 'look', 'first', 'and', 'foremost', 'for', 'people', 'who', 'are', 'passionate', 'about', 'solving', 'business', 'problems', 'through', 'innovation', 'and', 'engineering', 'practices', '.']
|
| 24 |
-
Skill Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'I', 'I', 'I', 'I', 'I', 'I', 'O']
|
| 25 |
-
Knowledge Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
|
| 26 |
-
"""
|
| 27 |
-
|
| 28 |
-
|
| 29 |
import os
|
| 30 |
from langchain_openai import ChatOpenAI
|
| 31 |
from pydantic import BaseModel
|
|
@@ -41,9 +13,7 @@ from transformers import AutoTokenizer, AutoModelForTokenClassification
|
|
| 41 |
import torch
|
| 42 |
import sys
|
| 43 |
from tabulate import tabulate
|
| 44 |
-
|
| 45 |
load_dotenv(".env")
|
| 46 |
-
# ChatOpenAI.api_key = OPENAI_API_KEY
|
| 47 |
|
| 48 |
|
| 49 |
### LLM-based tag extraction with few-shot learning
|
|
@@ -60,6 +30,8 @@ model = ChatOpenAI(model_name="gpt-4o", temperature=0.0, api_key=os.getenv('OPEN
|
|
| 60 |
tokenizer = AutoTokenizer.from_pretrained("jjzha/jobbert_skill_extraction")
|
| 61 |
parser = JsonOutputParser(pydantic_object=TokenTaggingResult)
|
| 62 |
|
|
|
|
|
|
|
| 63 |
skill_definition = """
|
| 64 |
Skill means the ability to apply knowledge and use know-how to complete tasks and solve problems.
|
| 65 |
"""
|
|
@@ -68,6 +40,10 @@ knowledge_definition = """
|
|
| 68 |
Knowledge means the outcome of the assimilation of information through learning. Knowledge is the body of facts, principles, theories and practices that is related to a field of work or study.
|
| 69 |
"""
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
prompt = PromptTemplate(
|
| 72 |
template="""You are an expert in tagging tokens with skill and knowledge labels. Use the following definitions to tag the input tokens:
|
| 73 |
Skill definition:{skill_definition}
|
|
@@ -92,6 +68,7 @@ def extract_tags(text: str, tokenize = True) -> TokenTaggingResult:
|
|
| 92 |
output = parser.invoke(output)
|
| 93 |
return tokens, output
|
| 94 |
|
|
|
|
| 95 |
### Pre-trained model from Hugging Face
|
| 96 |
|
| 97 |
mapping = {0: 'B', 1: 'I', 2: 'O'}
|
|
@@ -114,7 +91,6 @@ def convert(text):
|
|
| 114 |
return skill_cls, knowledge_cls
|
| 115 |
|
| 116 |
|
| 117 |
-
|
| 118 |
if __name__ == "__main__":
|
| 119 |
text = input('Enter text: ')
|
| 120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
from langchain_openai import ChatOpenAI
|
| 3 |
from pydantic import BaseModel
|
|
|
|
| 13 |
import torch
|
| 14 |
import sys
|
| 15 |
from tabulate import tabulate
|
|
|
|
| 16 |
load_dotenv(".env")
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
### LLM-based tag extraction with few-shot learning
|
|
|
|
| 30 |
tokenizer = AutoTokenizer.from_pretrained("jjzha/jobbert_skill_extraction")
|
| 31 |
parser = JsonOutputParser(pydantic_object=TokenTaggingResult)
|
| 32 |
|
| 33 |
+
# Definitions
|
| 34 |
+
|
| 35 |
skill_definition = """
|
| 36 |
Skill means the ability to apply knowledge and use know-how to complete tasks and solve problems.
|
| 37 |
"""
|
|
|
|
| 40 |
Knowledge means the outcome of the assimilation of information through learning. Knowledge is the body of facts, principles, theories and practices that is related to a field of work or study.
|
| 41 |
"""
|
| 42 |
|
| 43 |
+
# Few-shot examples
|
| 44 |
+
with open('few-shot.txt', 'r') as file:
|
| 45 |
+
few_shot_examples = file.read()
|
| 46 |
+
|
| 47 |
prompt = PromptTemplate(
|
| 48 |
template="""You are an expert in tagging tokens with skill and knowledge labels. Use the following definitions to tag the input tokens:
|
| 49 |
Skill definition:{skill_definition}
|
|
|
|
| 68 |
output = parser.invoke(output)
|
| 69 |
return tokens, output
|
| 70 |
|
| 71 |
+
|
| 72 |
### Pre-trained model from Hugging Face
|
| 73 |
|
| 74 |
mapping = {0: 'B', 1: 'I', 2: 'O'}
|
|
|
|
| 91 |
return skill_cls, knowledge_cls
|
| 92 |
|
| 93 |
|
|
|
|
| 94 |
if __name__ == "__main__":
|
| 95 |
text = input('Enter text: ')
|
| 96 |
|