SixOpen's picture
MoA
50efed6
import subprocess
from langchain_community.llms import Ollama
def check_model_exists(model_name):
try:
# List available models
output = subprocess.check_output("ollama list", shell=True, stderr=subprocess.STDOUT, universal_newlines=True)
available_models = [line.split()[0] for line in output.strip().split('\n')[1:]]
return any(model_name in model for model in available_models)
except subprocess.CalledProcessError as e:
print(f"Error checking models: {e.output}")
return False
except Exception as e:
print(f"An unexpected error occurred: {str(e)}")
return False
def download_model(model_name):
remote_models = ['llama3', 'llama3:70b', 'phi3', 'mistral', 'neural-chat', 'starling-lm', 'codellama',
'llama2-uncensored', 'llava', 'gemma:2b', 'gemma:7b', 'solar']
if model_name in remote_models:
try:
# Download the model
print(f"Downloading model '{model_name}'...")
subprocess.check_call(f"ollama pull {model_name}", shell=True)
print(f"Model '{model_name}' downloaded successfully.")
except subprocess.CalledProcessError as e:
print(f"Error downloading model: {e.output}")
raise e
except Exception as e:
print(f"An unexpected error occurred: {str(e)}")
raise e
else:
print("Not supported model currently")
def check_model(model_name):
if not check_model_exists(model_name):
try:
download_model(model_name)
except Exception as e:
print(f"Failed to download model '{model_name}': {e}")
return
else:
print("OK")
def make_prompt(input, messages, model):
if model == "llama3":
# Special Tokens used with Meta Llama 3
BEGIN_OF_TEXT = "<|begin_of_text|>"
EOT_ID = "<|eot_id|>"
START_HEADER_ID = "<|start_header_id|>"
END_HEADER_ID = "<|end_header_id|>"
elif model == "mistral":
# Special tokens Mistral
BEGIN_OF_TEXT = "<s>"
EOT_ID = "</s>"
START_HEADER_ID = "" # Not applicable to Mistral
END_HEADER_ID = "" # Not applicable to Mistral
else:
# No Special tokens
BEGIN_OF_TEXT = ""
EOT_ID = ""
START_HEADER_ID = ""
END_HEADER_ID = ""
if len(messages) == 1:
prompt = f'''{BEGIN_OF_TEXT}{START_HEADER_ID}system{END_HEADER_ID}
You are a friendly AI companion.
{EOT_ID}{START_HEADER_ID}user{END_HEADER_ID}
{input}
{EOT_ID}'''
else:
conversation_history = '\n'.join(
f"{START_HEADER_ID}{message['role']}{END_HEADER_ID}\n{message['content']}{EOT_ID}" for message in reversed(messages[:-1])
)
prompt = f'''{BEGIN_OF_TEXT}{START_HEADER_ID}system{END_HEADER_ID}
You are a friendly AI companion.
history:
{conversation_history}
{EOT_ID}{START_HEADER_ID}user{END_HEADER_ID}
{input}
{EOT_ID}'''
return prompt
def inject_references_to_messages(messages, references):
system = f"""You have been provided with a set of responses from various open-source models to the latest user query. Your task is to synthesize these responses into a single, high-quality response. It is crucial to critically evaluate the information provided in these responses, recognizing that some of it may be biased or incorrect. Your response should not simply replicate the given answers but should offer a refined, accurate, and comprehensive reply to the instruction. Ensure your response is well-structured, coherent, and adheres to the highest standards of accuracy and reliability.
Responses from models:"""
for i, reference in enumerate(references):
system += f"\n{i+1}. {reference}"
if messages[0]["role"] == "system":
messages[0]["content"] += "\n\n" + system
else:
messages = [{"role": "system", "content": system}] + messages
return messages
def generate_with_references(model, messages, references=None):
if references:
messages = inject_references_to_messages(messages, references)
llm = Ollama(model=model)
response = llm.invoke(messages)
return response