Spaces:
Runtime error
Runtime error
from typing import List, Dict, Any | |
from easygoogletranslate import EasyGoogleTranslate | |
from langchain.prompts import PromptTemplate, FewShotPromptTemplate | |
LANGUAGE_TO_GOOGLE_TRANSLATE_MARK = { | |
"english": "en", | |
"bambara": "bm", | |
"ewe": "ee", | |
"hausa": "ha", | |
"igbo": "ig", | |
"kinyarwanda": "rw", | |
"chichewa": "ny", | |
"twi": "ak", | |
"yoruba": "yo", | |
"slovak": "sk", | |
"serbian": "sr", | |
"swedish": "sv", | |
"vietnamese": "vi", | |
"italian": "it", | |
"portuguese": "pt", | |
"chinese": "zh", | |
"english": "en", | |
"french": "fr" | |
} | |
LANGAUGE_TO_PREFIX = { | |
"bambara": "bam", | |
"ewe": "ewe", | |
"fon": "fon", | |
"hausa": "hau", | |
"igbo": "ibo", | |
"kinyarwanda": "kin", | |
"chichewa": "nya", | |
"twi": "twi", | |
"yoruba": "yor", | |
"slovak": "sk", | |
"serbian": "sr", | |
"swedish": "sv", | |
"vietnamese": "vi", | |
"italian": "it", | |
"portuguese": "pt", | |
"chinese": "zh", | |
"english": "en", | |
"french": "fr" | |
} | |
def _translate_instruction(basic_instruction: str, target_language: str) -> str: | |
translator = EasyGoogleTranslate( | |
source_language="en", | |
target_language=LANGAUGE_TO_PREFIX[target_language], | |
timeout=10, | |
) | |
return translator.translate(basic_instruction) | |
def create_instruction(lang: str, expected_output: str): | |
basic_instruction = f"""You are an NLP assistant whose | |
purpose is to perform Named Entity Recognition | |
(NER). You will need to give each entity a tag, from the following: | |
PER means a person, ORG means organization. | |
LOC means a location entity. | |
The output should be a list of tuples of the format: | |
['Tag: Entity', 'Tag: Entity'] for each entity in the sentence. | |
The entities should be in {expected_output} language""" | |
return ( | |
basic_instruction | |
if lang == "english" | |
else _translate_instruction(basic_instruction, target_language=lang) | |
) | |
def construct_prompt( | |
instruction: str, | |
test_example: dict, | |
zero_shot: bool, | |
dataset: str, | |
num_examples: int, | |
lang: str, | |
config: Dict[str, str], | |
): | |
if not instruction: | |
print(lang) | |
instruction = create_instruction(lang, config['prefix']) | |
example_prompt = PromptTemplate( | |
input_variables=["summary", "text"], template="Text: {text}\nSummary: {summary}" | |
) | |
zero_shot_template = f"""{instruction}""" + "\n Input: {text} " "" | |
test_data = load_xlsum_data(lang=lang, split="test", limit=100) | |
print(test_data) | |
print(num_examples) | |
print(lang) | |
ic_examples = [] | |
if not zero_shot: | |
ic_examples = choose_few_shot_examples( | |
train_dataset=test_data, | |
few_shot_size=num_examples, | |
context=[config["context"]] * num_examples, | |
selection_criteria="random", | |
lang=lang, | |
) | |
prompt = ( | |
FewShotPromptTemplate( | |
examples=ic_examples, | |
prefix=instruction, | |
example_prompt=example_prompt, | |
suffix="<Text>: {text}", | |
input_variables=["text"], | |
) | |
if not zero_shot | |
else PromptTemplate(input_variables=["text"], template=zero_shot_template) | |
) | |
print("lang", lang) | |
print(config["input"] , lang) | |
if config["input"] != lang: | |
test_example = _translate_example( | |
example=test_example, src_language=lang, target_language=config["input"] | |
) | |
print("test_example", prompt) | |
return prompt.format(text=test_example["text"]) | |