|
import subprocess |
|
from langchain_community.llms import Ollama |
|
|
|
def check_model_exists(model_name): |
|
try: |
|
|
|
output = subprocess.check_output("ollama list", shell=True, stderr=subprocess.STDOUT, universal_newlines=True) |
|
available_models = [line.split()[0] for line in output.strip().split('\n')[1:]] |
|
return any(model_name in model for model in available_models) |
|
except subprocess.CalledProcessError as e: |
|
print(f"Error checking models: {e.output}") |
|
return False |
|
except Exception as e: |
|
print(f"An unexpected error occurred: {str(e)}") |
|
return False |
|
|
|
def download_model(model_name): |
|
remote_models = ['llama3', 'llama3:70b', 'phi3', 'mistral', 'neural-chat', 'starling-lm', 'codellama', |
|
'llama2-uncensored', 'llava', 'gemma:2b', 'gemma:7b', 'solar'] |
|
if model_name in remote_models: |
|
try: |
|
|
|
print(f"Downloading model '{model_name}'...") |
|
subprocess.check_call(f"ollama pull {model_name}", shell=True) |
|
print(f"Model '{model_name}' downloaded successfully.") |
|
except subprocess.CalledProcessError as e: |
|
print(f"Error downloading model: {e.output}") |
|
raise e |
|
except Exception as e: |
|
print(f"An unexpected error occurred: {str(e)}") |
|
raise e |
|
else: |
|
print("Not supported model currently") |
|
|
|
def check_model(model_name): |
|
if not check_model_exists(model_name): |
|
try: |
|
download_model(model_name) |
|
except Exception as e: |
|
print(f"Failed to download model '{model_name}': {e}") |
|
return |
|
else: |
|
print("OK") |
|
|
|
def make_prompt(input, messages, model): |
|
if model == "llama3": |
|
|
|
BEGIN_OF_TEXT = "<|begin_of_text|>" |
|
EOT_ID = "<|eot_id|>" |
|
START_HEADER_ID = "<|start_header_id|>" |
|
END_HEADER_ID = "<|end_header_id|>" |
|
elif model == "mistral": |
|
|
|
BEGIN_OF_TEXT = "<s>" |
|
EOT_ID = "</s>" |
|
START_HEADER_ID = "" |
|
END_HEADER_ID = "" |
|
else: |
|
|
|
BEGIN_OF_TEXT = "" |
|
EOT_ID = "" |
|
START_HEADER_ID = "" |
|
END_HEADER_ID = "" |
|
|
|
if len(messages) == 1: |
|
prompt = f'''{BEGIN_OF_TEXT}{START_HEADER_ID}system{END_HEADER_ID} |
|
You are a friendly AI companion. |
|
{EOT_ID}{START_HEADER_ID}user{END_HEADER_ID} |
|
{input} |
|
{EOT_ID}''' |
|
else: |
|
conversation_history = '\n'.join( |
|
f"{START_HEADER_ID}{message['role']}{END_HEADER_ID}\n{message['content']}{EOT_ID}" for message in reversed(messages[:-1]) |
|
) |
|
prompt = f'''{BEGIN_OF_TEXT}{START_HEADER_ID}system{END_HEADER_ID} |
|
You are a friendly AI companion. |
|
history: |
|
{conversation_history} |
|
{EOT_ID}{START_HEADER_ID}user{END_HEADER_ID} |
|
{input} |
|
{EOT_ID}''' |
|
|
|
return prompt |
|
|
|
def inject_references_to_messages(messages, references): |
|
system = f"""You have been provided with a set of responses from various open-source models to the latest user query. Your task is to synthesize these responses into a single, high-quality response. It is crucial to critically evaluate the information provided in these responses, recognizing that some of it may be biased or incorrect. Your response should not simply replicate the given answers but should offer a refined, accurate, and comprehensive reply to the instruction. Ensure your response is well-structured, coherent, and adheres to the highest standards of accuracy and reliability. |
|
|
|
Responses from models:""" |
|
|
|
for i, reference in enumerate(references): |
|
system += f"\n{i+1}. {reference}" |
|
|
|
if messages[0]["role"] == "system": |
|
messages[0]["content"] += "\n\n" + system |
|
else: |
|
messages = [{"role": "system", "content": system}] + messages |
|
|
|
return messages |
|
|
|
def generate_with_references(model, messages, references=None): |
|
if references: |
|
messages = inject_references_to_messages(messages, references) |
|
|
|
llm = Ollama(model=model) |
|
response = llm.invoke(messages) |
|
|
|
return response |