R-PRM-Demo

Sleeping

App Files Files Community

kevinpro commited on Jul 24

Commit

939f8b7

verified ·

1 Parent(s): 6b84df3

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -27

app.py CHANGED Viewed

@@ -7,7 +7,68 @@ from transformers import AutoTokenizer, AutoModel,AutoModelForCausalLM
 import torch
 # 假设openai_client已定义，例如：
 device = "cuda"
 MODEL_NAME = "kevinpro/R-PRM-7B-DPO"
@@ -24,54 +85,62 @@ print("Ednd dowload")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-@lru_cache(maxsize=100)
-def translate(text: str):
-    return _translate(text)
 # Only assign GPU if cache not used
 @spaces.GPU
-def _translate(text: str):
-    input_tokens = (
-        tokenizer(text, return_tensors="pt")
-        .input_ids[0]
-        .cpu()
-        .numpy()
-        .tolist()
-    )
-    translated_chunk = model.generate(
-        input_ids=torch.tensor([input_tokens]).to(device),
-        max_length=len(input_tokens) + 2048,
-        num_return_sequences=1,
-    )
-    full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True).strip()
-    print(full_output)
-    return full_output
 description = """
 <div style="text-align: center;">
-    <h1 style="color: #0077be; font-size: 3em;">R-PRM, powered by NJUNLP</h1>
-    <h3 style="font-size: 3em;">🚀 We introduce Reasoning-Driven Process Reward Modeling (R-PRM), a novel approach that enhances LLMs' ability to evaluate mathematical reasoning step-by-step. By leveraging stronger LLMs to generate seed data, optimizing preferences without additional annotations, and scaling inference-time computation, R-PRM delivers comprehensive, transparent, and robust assessments of reasoning processes.</h3>
 </div>
 """
-examples_inputs = [["test"]]
 with gr.Blocks() as demo:
     gr.Markdown(description)
     with gr.Row():
-        input_text = gr.Textbox(label="Input Text", lines=6)
     with gr.Row():
-        btn = gr.Button("Translate text")
     with gr.Row():
         output = gr.Textbox(label="Output Text", lines=6)
     btn.click(
         translate,
-        inputs=[input_text],
         outputs=output,
     )
-    examples = gr.Examples(examples=examples_inputs,inputs=[input_text], fn=translate, outputs=output, cache_examples=True)
 print("Prepared")
 demo.launch()

 import torch
 # 假设openai_client已定义，例如：
+template="""You are an excellent math teacher. Please verify the correctness of the Now Step.
+You first need to analyze the Now Step and the Previous Steps and then summarize based on your analysis.
+Analysis:
+You need to analyze the following aspects.
+**Previous Steps Analysis**: You need to analyze the Previous Steps step by step. For each step, you need to first explain what the current step is doing, then you try to find any error in the current step.
+**Now Step Analysis**: You first need to explain what the Now Step is doing, and then point out which part of the Question it is trying to solve or which part of the information it states.
+**Data Source Analysis**: First you need to find out what data are used in the Now Step, and then you need to determine whether the source of the data is reasonable and correct. When you judge whether the source of a data is reasonable and correct, you need to specify the specific source of this data: such as which part of the question, or which content of the previous step; and then determine the source and current use is consistent, the Now Step is used correctly.
+**Consistency Analysis**: You need to check that the Now Step is consistent with the contents of the Previous Steps, and then you need to check that all the information inside the Now Step is consistent.
+**Calculation Analysis**: If the Now Step involves any calculations, such as addition, subtraction, multiplication, division, equations, modulo operations, etc., you will first need to perform a check on the calculation, such as a reverse operation, to see if the calculation was done correctly, and then analyze the results of your check to see if there was an error in the calculation.
+Conclusion:
+Please verify the correctness of the Now Step based on your analysis, if there is any error in the Now Step then the Now Step is wrong and vice versa the Now Step is correct. At the end of the Conclusion, when you give your final answer, write it in the form "Verification: Is the step correct (Yes/No)? X", where X is either Yes or No.
+Question: {}
+Previous Steps: {}
+Now Step: {}
+Please carefully analyze the correctness of the Now Step.
+Reply:"""
+import math
+def split_string_into_max_six_chunks(input_str: str) -> list[str]:
+    """
+    Splits a string by newlines into a maximum of 6 chunks.
+    For example, if the string has 12 lines, it will be split into 6 chunks,
+    with each chunk containing 2 lines.
+    Args:
+        input_str: The input string with newline characters.
+    Returns:
+        A list of strings, where the list contains at most 6 elements.
+    """
+    # Split the string into individual lines
+    lines = input_str.splitlines()
+    num_lines = len(lines)
+    # If there are no lines, return an empty list
+    if num_lines == 0:
+        return []
+    # Define the maximum number of chunks desired
+    max_chunks = 6
+    # If the number of lines is already within the limit, return the lines as they are
+    if num_lines <= max_chunks:
+        return lines
+    # Calculate how many lines should be in each chunk, rounding up
+    lines_per_chunk = math.ceil(num_lines / max_chunks)
+    # Group the lines into chunks
+    result_chunks = []
+    for i in range(0, num_lines, lines_per_chunk):
+        # Slice the lines list to get the current chunk
+        chunk_lines = lines[i:i + lines_per_chunk]
+        # Join the lines back together with newlines
+        result_chunks.append('\n'.join(chunk_lines))
+    return result_chunks
 device = "cuda"
 MODEL_NAME = "kevinpro/R-PRM-7B-DPO"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 # Only assign GPU if cache not used
 @spaces.GPU
+def translate(input_question,input_cot):
+    all_judge = ""
+    reasoning_chunk = split_string_into_max_six_chunks(input_cot)
+    previsous_step_string = ""
+    for index,r in enumerate(reasoning_chunk):
+        previsous_step_string = previsous_step_string.strip()
+        cur_step = "Step {}: ".format(index) + r
+        input_string = template.format(input_question,previsous_step_string,cur_step)
+        print(input_string)
+        input_tokens = (
+            tokenizer(input_string, return_tensors="pt")
+            .input_ids[0]
+            .cpu()
+            .numpy()
+            .tolist()
+        )
+        translated_chunk = model.generate(
+            input_ids=torch.tensor([input_tokens]).to(device),
+            max_length=len(input_tokens) + 2048,
+            num_return_sequences=1,
+        )
+        full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True)
+        full_output = full_output.replace(input_string,"")
+        previsous_step_string += "\n" + input_string
+        all_judge += "Step {}: ".format(index) + full_output + "\n\n"
+        print(full_output)
+    return all_judge
 description = """
 <div style="text-align: center;">
+    <h1 style="color: #0077be; font-size: 4em;">R-PRM, powered by NJUNLP</h1>
+    <h3 style="font-size: 1em;">🚀 We introduce Reasoning-Driven Process Reward Modeling (R-PRM), a novel approach that enhances LLMs' ability to evaluate mathematical reasoning step-by-step. By leveraging stronger LLMs to generate seed data, optimizing preferences without additional annotations, and scaling inference-time computation, R-PRM delivers comprehensive, transparent, and robust assessments of reasoning processes.</h3>
 </div>
 """
 with gr.Blocks() as demo:
     gr.Markdown(description)
     with gr.Row():
+        input_question = gr.Textbox(label="Question", lines=4)
+    with gr.Row():
+        input_cot = gr.Textbox(label="Reasoning", lines=12)
     with gr.Row():
+        btn = gr.Button("Start Analysis")
     with gr.Row():
         output = gr.Textbox(label="Output Text", lines=6)
     btn.click(
         translate,
+        inputs=[input_question,input_cot],
         outputs=output,
     )
 print("Prepared")
 demo.launch()