Spaces:

atlasia
/

Mohadata

Running

File size: 5,351 Bytes

import gradio as gr
from openai import OpenAI
import os
from tqdm import tqdm
import pandas as pd
from pathlib import Path
from datasets import Dataset,load_dataset,concatenate_datasets
import asyncio
import threading
from dotenv import load_dotenv

load_dotenv()
HF_READ=os.environ["HF_READ"]
HF_WRITE=os.environ["HF_WRITE"]
print(HF_READ,HF_WRITE)
model_base_url={}
LANGUAGE="MOROCCAN Arabic"
HF_DATASET="abdeljalilELmajjodi/Mohadata"
SYSTEM_PROMPT = {
            "role": "system",
            "content":  f"""This is a context-based Q&A game where two AIs interact with a user-provided context. All interactions MUST be in {LANGUAGE}.

            QUESTIONER_AI:
            - Must only ask questions that can be answered from the provided context
            - Should identify key information gaps or unclear points
            - Cannot ask questions about information not present in the context
            - Must communicate exclusively in {LANGUAGE}

            ANSWERER_AI:
            - Must only answer using information explicitly stated in the context
            - Cannot add external information or assumptions
            - Must indicate if a question cannot be answered from the context alone
            - Must communicate exclusively in {LANGUAGE}"""
        }

def add_model(model_name,base_url,api_key):
    model_base_url[model_name]=base_url
    #model_quest.choices=list(model_base_url.keys())
    #print(model_quest)
    os.environ[model_name]=api_key
    return gr.Dropdown(label="Questioner Model",choices=list(model_base_url.keys())),gr.Dropdown(label="Answerer Model",choices=list(model_base_url.keys()))


def model_init(model):
    try:
        api_key=os.environ.get(model)
        base_url=model_base_url[model]
        client = OpenAI(api_key=api_key, base_url=base_url)
        return client
    except Exception as e:
         print(f"You should add api key of {model}")

# generate questions
def init_req_messages(sample_context):
  messages_quest=[
      SYSTEM_PROMPT,
      {
            "role":"user",
            "content":f"""Context for analysis:
            {sample_context}
            As QUESTIONER_AI, generate a question based on this context.
            """
      }
  ]
  return messages_quest
# generate Answers
def init_resp_messages(sample_context,question):
  messages_answ=[
      SYSTEM_PROMPT,
      {
          "role": "user",
          "content": f"""
          Context for analysis:
          {sample_context}
          Question: {question}
          As ANSWERER_AI, answer this question using only information from the context.
          """}

  ]
  return messages_answ

def chat_generation(client,model_name,messages):
  return client.chat.completions.create(
    model=model_name,
    messages=messages,
    temperature=0.5
    ).choices[0].message.content

def generate_question(client,model_name,messages_quest):
  question=chat_generation(client,model_name,messages_quest)
  messages_quest.append({"role":"assistant","content":question})
  return question

def generate_answer(client,model_name,messages_answ):
  answer=chat_generation(client,model_name,messages_answ)
  messages_answ.append({"role":"assistant","content":answer})
  return answer

def load_upload_ds_hf(df):
    dataset_stream=load_dataset("atlasia/Mohadata_Dataset",token=HF_READ,split="train")
    print("[INFO] dataset loaded successfully")
    new_ds=Dataset.from_pandas(df,preserve_index=False)
    updated_ds=concatenate_datasets([dataset_stream,new_ds])
    updated_ds.push_to_hub("atlasia/Mohadata_Dataset",token=HF_WRITE)
    print("[INFO] dataset uploaded successfully")

async def load_upload_ds_hf_async(df):
    await asyncio.to_thread(load_upload_ds_hf,df)
    


def save_conversation(conversation,context,num_rounds):
    conv_flat={"user":[],"assistant":[]}
    for i in range(0,len(conversation)):
        conv_flat[conversation[i]["role"]].append(conversation[i]["content"])
    conv_flat["context"]=[context]*num_rounds
    df=pd.DataFrame(conv_flat)
    df.to_csv("data.csv")
    print("[INFO] conversation saved successfully")
    print("[INFO] uploading dataset to huggingface")
    
    thread=threading.Thread(target=load_upload_ds_hf,args=(df,))
    thread.daemon=True
    thread.start()

    return Path("data.csv").name

def user_input(context,model_a,model_b,num_rounds,conversation_history):
    conversation_history.clear()
    client_quest=model_init(model_a)
    client_ans=model_init(model_b)
    messages_quest=init_req_messages(context)
    for round_num in tqdm(range(num_rounds)):
            question = generate_question(client_quest,model_a,messages_quest)
            conversation_history.append(
                {"role":"user","content":question},
            )
            if round_num==0:
              messages_answ=init_resp_messages(context,question)
            else:
              messages_answ.append({"role":"user","content":question})
            answer = generate_answer(client_ans,model_b,messages_answ)
            messages_quest.append({"role":"user","content":answer})
            conversation_history.append(
                {"role":"assistant","content":answer},
            )
    file_path=save_conversation(conversation_history,context,num_rounds)
    return conversation_history,gr.DownloadButton(label="Save Conversation",value=file_path,visible=True)