R-PRM-Demo

Running on Zero

File size: 6,486 Bytes

import gradio as gr
from functools import lru_cache
import os
import spaces
import gradio as gr
from transformers import AutoTokenizer, AutoModel,AutoModelForCausalLM
import torch

# 假设openai_client已定义，例如：
template="""You are an excellent math teacher. Please verify the correctness of the Now Step.

You first need to analyze the Now Step and the Previous Steps and then summarize based on your analysis.
Analysis:
You need to analyze the following aspects.
**Previous Steps Analysis**: You need to analyze the Previous Steps step by step. For each step, you need to first explain what the current step is doing, then you try to find any error in the current step.
**Now Step Analysis**: You first need to explain what the Now Step is doing, and then point out which part of the Question it is trying to solve or which part of the information it states.
**Data Source Analysis**: First you need to find out what data are used in the Now Step, and then you need to determine whether the source of the data is reasonable and correct. When you judge whether the source of a data is reasonable and correct, you need to specify the specific source of this data: such as which part of the question, or which content of the previous step; and then determine the source and current use is consistent, the Now Step is used correctly.
**Consistency Analysis**: You need to check that the Now Step is consistent with the contents of the Previous Steps, and then you need to check that all the information inside the Now Step is consistent.
**Calculation Analysis**: If the Now Step involves any calculations, such as addition, subtraction, multiplication, division, equations, modulo operations, etc., you will first need to perform a check on the calculation, such as a reverse operation, to see if the calculation was done correctly, and then analyze the results of your check to see if there was an error in the calculation.
Conclusion:
Please verify the correctness of the Now Step based on your analysis, if there is any error in the Now Step then the Now Step is wrong and vice versa the Now Step is correct. At the end of the Conclusion, when you give your final answer, write it in the form "Verification: Is the step correct (Yes/No)? X", where X is either Yes or No.

Question: {}
Previous Steps: {}
Now Step: {}
Please carefully analyze the correctness of the Now Step.
Reply:"""

import math

def split_string_into_max_six_chunks(input_str: str) -> list[str]:
    """
    Splits a string by newlines into a maximum of 6 chunks.

    For example, if the string has 12 lines, it will be split into 6 chunks,
    with each chunk containing 2 lines.

    Args:
        input_str: The input string with newline characters.

    Returns:
        A list of strings, where the list contains at most 6 elements.
    """
    # Split the string into individual lines
    lines = input_str.splitlines()
    num_lines = len(lines)

    # If there are no lines, return an empty list
    if num_lines == 0:
        return []

    # Define the maximum number of chunks desired
    max_chunks = 5

    # If the number of lines is already within the limit, return the lines as they are
    if num_lines <= max_chunks:
        return lines
    
    # Calculate how many lines should be in each chunk, rounding up
    lines_per_chunk = math.ceil(num_lines / max_chunks)
    
    # Group the lines into chunks
    result_chunks = []
    for i in range(0, num_lines, lines_per_chunk):
        # Slice the lines list to get the current chunk
        chunk_lines = lines[i:i + lines_per_chunk]
        # Join the lines back together with newlines
        result_chunks.append('\n'.join(chunk_lines))
        
    return result_chunks
    
device = "cuda"
MODEL_NAME = "kevinpro/R-PRM-7B-DPO"

print("Start dowload")
def load_model():
    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,torch_dtype="bfloat16").to(device)
    print(f"Model loaded in {device}")
    return model


model = load_model()
print("Ednd dowload")
# Loading the tokenizer once, because re-loading it takes about 1.5 seconds each time
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

    
# Only assign GPU if cache not used
@spaces.GPU
def working(input_text):
    input_tokens = (
        tokenizer(input_text, return_tensors="pt")
        .input_ids[0]
        .cpu()
        .numpy()
        .tolist()
    )
    translated_chunk = model.generate(
        input_ids=torch.tensor([input_tokens]).to(device),
        max_length=len(input_tokens) + 2048,
        num_return_sequences=1,
    )
    full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True)
    full_output = full_output.replace(input_text,"")
    return full_output

def Judge(input_question,input_cot):
    all_judge = ""
    reasoning_chunk = split_string_into_max_six_chunks(input_cot)
    previsous_step_string = ""
    for index,r in enumerate(reasoning_chunk):
        previsous_step_string = previsous_step_string.strip()
        cur_step = "Step {}: ".format(index) + r
        input_string = template.format(input_question,previsous_step_string,cur_step)   
        print(input_string)
        output = working(input_string)
        previsous_step_string += "\n" + input_string 
        all_judge += "Step {}: ".format(index) + output + "\n\n"
        print(output)
        print("============================\n\n")
    return all_judge



description = """
<div style="text-align: center;">
    <h1 style="color: #0077be; font-size: 4em;">R-PRM, powered by NJUNLP</h1>
    <h3 style="font-size: 1em;">🚀 We introduce Reasoning-Driven Process Reward Modeling (R-PRM), a novel approach that enhances LLMs' ability to evaluate mathematical reasoning step-by-step. By leveraging stronger LLMs to generate seed data, optimizing preferences without additional annotations, and scaling inference-time computation, R-PRM delivers comprehensive, transparent, and robust assessments of reasoning processes.</h3>
</div>
"""



with gr.Blocks() as demo:
    gr.Markdown(description)
    with gr.Row():
        input_question = gr.Textbox(label="Question", lines=4)
    with gr.Row():
        input_cot = gr.Textbox(label="Reasoning", lines=12)
    with gr.Row():
        btn = gr.Button("Start Analysis")
    with gr.Row():
        output = gr.Textbox(label="Output Text", lines=6)
    btn.click(
        Judge,
        inputs=[input_question,input_cot],
        outputs=output,
    )

print("Prepared")
demo.launch()