Spaces:
Runtime error
Runtime error
File size: 3,684 Bytes
208053f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
from typing import List, Dict, Any
from easygoogletranslate import EasyGoogleTranslate
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
LANGUAGE_TO_GOOGLE_TRANSLATE_MARK = {
"english": "en",
"bambara": "bm",
"ewe": "ee",
"hausa": "ha",
"igbo": "ig",
"kinyarwanda": "rw",
"chichewa": "ny",
"twi": "ak",
"yoruba": "yo",
"slovak": "sk",
"serbian": "sr",
"swedish": "sv",
"vietnamese": "vi",
"italian": "it",
"portuguese": "pt",
"chinese": "zh",
"english": "en",
"french": "fr"
}
LANGAUGE_TO_PREFIX = {
"bambara": "bam",
"ewe": "ewe",
"fon": "fon",
"hausa": "hau",
"igbo": "ibo",
"kinyarwanda": "kin",
"chichewa": "nya",
"twi": "twi",
"yoruba": "yor",
"slovak": "sk",
"serbian": "sr",
"swedish": "sv",
"vietnamese": "vi",
"italian": "it",
"portuguese": "pt",
"chinese": "zh",
"english": "en",
"french": "fr"
}
def _translate_instruction(basic_instruction: str, target_language: str) -> str:
translator = EasyGoogleTranslate(
source_language="en",
target_language=LANGAUGE_TO_PREFIX[target_language],
timeout=10,
)
return translator.translate(basic_instruction)
def create_instruction(lang: str, expected_output: str):
basic_instruction = f"""You are an NLP assistant whose
purpose is to perform Named Entity Recognition
(NER). You will need to give each entity a tag, from the following:
PER means a person, ORG means organization.
LOC means a location entity.
The output should be a list of tuples of the format:
['Tag: Entity', 'Tag: Entity'] for each entity in the sentence.
The entities should be in {expected_output} language"""
return (
basic_instruction
if lang == "english"
else _translate_instruction(basic_instruction, target_language=lang)
)
def construct_prompt(
instruction: str,
test_example: dict,
zero_shot: bool,
dataset: str,
num_examples: int,
lang: str,
config: Dict[str, str],
):
if not instruction:
print(lang)
instruction = create_instruction(lang, config['prefix'])
example_prompt = PromptTemplate(
input_variables=["summary", "text"], template="Text: {text}\nSummary: {summary}"
)
zero_shot_template = f"""{instruction}""" + "\n Input: {text} " ""
test_data = load_xlsum_data(lang=lang, split="test", limit=100)
print(test_data)
print(num_examples)
print(lang)
ic_examples = []
if not zero_shot:
ic_examples = choose_few_shot_examples(
train_dataset=test_data,
few_shot_size=num_examples,
context=[config["context"]] * num_examples,
selection_criteria="random",
lang=lang,
)
prompt = (
FewShotPromptTemplate(
examples=ic_examples,
prefix=instruction,
example_prompt=example_prompt,
suffix="<Text>: {text}",
input_variables=["text"],
)
if not zero_shot
else PromptTemplate(input_variables=["text"], template=zero_shot_template)
)
print("lang", lang)
print(config["input"] , lang)
if config["input"] != lang:
test_example = _translate_example(
example=test_example, src_language=lang, target_language=config["input"]
)
print("test_example", prompt)
return prompt.format(text=test_example["text"])
|