File size: 6,486 Bytes
38742d7
6634f63
53c0aa9
3de2dd6
 
734f1a7
3de2dd6
 
1978c10
939f8b7
113c3ed
939f8b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31b8285
939f8b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfee1b3
d1485a4
d447070
f0def04
d447070
dd32415
d447070
 
 
 
 
f0def04
d447070
 
 
31b8285
d447070
 
31b8285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
939f8b7
 
 
 
 
 
 
 
31b8285
939f8b7
31b8285
 
 
939f8b7
d447070
29ba4e2
e50fa51
38742d7
e73c7fc
939f8b7
 
e73c7fc
 
38742d7
939f8b7
6634f63
38742d7
 
 
939f8b7
 
 
38742d7
939f8b7
38742d7
 
 
31b8285
939f8b7
38742d7
 
f0def04
 
1978c10
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import gradio as gr
from functools import lru_cache
import os
import spaces
import gradio as gr
from transformers import AutoTokenizer, AutoModel,AutoModelForCausalLM
import torch

# 假设openai_client已定义,例如:
template="""You are an excellent math teacher. Please verify the correctness of the Now Step.

You first need to analyze the Now Step and the Previous Steps and then summarize based on your analysis.
Analysis:
You need to analyze the following aspects.
**Previous Steps Analysis**: You need to analyze the Previous Steps step by step. For each step, you need to first explain what the current step is doing, then you try to find any error in the current step.
**Now Step Analysis**: You first need to explain what the Now Step is doing, and then point out which part of the Question it is trying to solve or which part of the information it states.
**Data Source Analysis**: First you need to find out what data are used in the Now Step, and then you need to determine whether the source of the data is reasonable and correct. When you judge whether the source of a data is reasonable and correct, you need to specify the specific source of this data: such as which part of the question, or which content of the previous step; and then determine the source and current use is consistent, the Now Step is used correctly.
**Consistency Analysis**: You need to check that the Now Step is consistent with the contents of the Previous Steps, and then you need to check that all the information inside the Now Step is consistent.
**Calculation Analysis**: If the Now Step involves any calculations, such as addition, subtraction, multiplication, division, equations, modulo operations, etc., you will first need to perform a check on the calculation, such as a reverse operation, to see if the calculation was done correctly, and then analyze the results of your check to see if there was an error in the calculation.
Conclusion:
Please verify the correctness of the Now Step based on your analysis, if there is any error in the Now Step then the Now Step is wrong and vice versa the Now Step is correct. At the end of the Conclusion, when you give your final answer, write it in the form "Verification: Is the step correct (Yes/No)? X", where X is either Yes or No.

Question: {}
Previous Steps: {}
Now Step: {}
Please carefully analyze the correctness of the Now Step.
Reply:"""

import math

def split_string_into_max_six_chunks(input_str: str) -> list[str]:
    """
    Splits a string by newlines into a maximum of 6 chunks.

    For example, if the string has 12 lines, it will be split into 6 chunks,
    with each chunk containing 2 lines.

    Args:
        input_str: The input string with newline characters.

    Returns:
        A list of strings, where the list contains at most 6 elements.
    """
    # Split the string into individual lines
    lines = input_str.splitlines()
    num_lines = len(lines)

    # If there are no lines, return an empty list
    if num_lines == 0:
        return []

    # Define the maximum number of chunks desired
    max_chunks = 5

    # If the number of lines is already within the limit, return the lines as they are
    if num_lines <= max_chunks:
        return lines
    
    # Calculate how many lines should be in each chunk, rounding up
    lines_per_chunk = math.ceil(num_lines / max_chunks)
    
    # Group the lines into chunks
    result_chunks = []
    for i in range(0, num_lines, lines_per_chunk):
        # Slice the lines list to get the current chunk
        chunk_lines = lines[i:i + lines_per_chunk]
        # Join the lines back together with newlines
        result_chunks.append('\n'.join(chunk_lines))
        
    return result_chunks
    
device = "cuda"
MODEL_NAME = "kevinpro/R-PRM-7B-DPO"

print("Start dowload")
def load_model():
    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,torch_dtype="bfloat16").to(device)
    print(f"Model loaded in {device}")
    return model


model = load_model()
print("Ednd dowload")
# Loading the tokenizer once, because re-loading it takes about 1.5 seconds each time
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

    
# Only assign GPU if cache not used
@spaces.GPU
def working(input_text):
    input_tokens = (
        tokenizer(input_text, return_tensors="pt")
        .input_ids[0]
        .cpu()
        .numpy()
        .tolist()
    )
    translated_chunk = model.generate(
        input_ids=torch.tensor([input_tokens]).to(device),
        max_length=len(input_tokens) + 2048,
        num_return_sequences=1,
    )
    full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True)
    full_output = full_output.replace(input_text,"")
    return full_output

def Judge(input_question,input_cot):
    all_judge = ""
    reasoning_chunk = split_string_into_max_six_chunks(input_cot)
    previsous_step_string = ""
    for index,r in enumerate(reasoning_chunk):
        previsous_step_string = previsous_step_string.strip()
        cur_step = "Step {}: ".format(index) + r
        input_string = template.format(input_question,previsous_step_string,cur_step)   
        print(input_string)
        output = working(input_string)
        previsous_step_string += "\n" + input_string 
        all_judge += "Step {}: ".format(index) + output + "\n\n"
        print(output)
        print("============================\n\n")
    return all_judge



description = """
<div style="text-align: center;">
    <h1 style="color: #0077be; font-size: 4em;">R-PRM, powered by NJUNLP</h1>
    <h3 style="font-size: 1em;">🚀 We introduce Reasoning-Driven Process Reward Modeling (R-PRM), a novel approach that enhances LLMs' ability to evaluate mathematical reasoning step-by-step. By leveraging stronger LLMs to generate seed data, optimizing preferences without additional annotations, and scaling inference-time computation, R-PRM delivers comprehensive, transparent, and robust assessments of reasoning processes.</h3>
</div>
"""



with gr.Blocks() as demo:
    gr.Markdown(description)
    with gr.Row():
        input_question = gr.Textbox(label="Question", lines=4)
    with gr.Row():
        input_cot = gr.Textbox(label="Reasoning", lines=12)
    with gr.Row():
        btn = gr.Button("Start Analysis")
    with gr.Row():
        output = gr.Textbox(label="Output Text", lines=6)
    btn.click(
        Judge,
        inputs=[input_question,input_cot],
        outputs=output,
    )

print("Prepared")
demo.launch()