kevinpro commited on
Commit
939f8b7
·
verified ·
1 Parent(s): 6b84df3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -27
app.py CHANGED
@@ -7,7 +7,68 @@ from transformers import AutoTokenizer, AutoModel,AutoModelForCausalLM
7
  import torch
8
 
9
  # 假设openai_client已定义,例如:
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  device = "cuda"
12
  MODEL_NAME = "kevinpro/R-PRM-7B-DPO"
13
 
@@ -24,54 +85,62 @@ print("Ednd dowload")
24
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
25
 
26
 
27
- @lru_cache(maxsize=100)
28
- def translate(text: str):
29
- return _translate(text)
30
-
31
  # Only assign GPU if cache not used
32
  @spaces.GPU
33
- def _translate(text: str):
34
- input_tokens = (
35
- tokenizer(text, return_tensors="pt")
36
- .input_ids[0]
37
- .cpu()
38
- .numpy()
39
- .tolist()
40
- )
41
- translated_chunk = model.generate(
42
- input_ids=torch.tensor([input_tokens]).to(device),
43
- max_length=len(input_tokens) + 2048,
44
- num_return_sequences=1,
45
- )
46
- full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True).strip()
47
- print(full_output)
48
- return full_output
 
 
 
 
 
 
 
 
 
 
 
49
 
50
 
51
 
52
  description = """
53
  <div style="text-align: center;">
54
- <h1 style="color: #0077be; font-size: 3em;">R-PRM, powered by NJUNLP</h1>
55
- <h3 style="font-size: 3em;">🚀 We introduce Reasoning-Driven Process Reward Modeling (R-PRM), a novel approach that enhances LLMs' ability to evaluate mathematical reasoning step-by-step. By leveraging stronger LLMs to generate seed data, optimizing preferences without additional annotations, and scaling inference-time computation, R-PRM delivers comprehensive, transparent, and robust assessments of reasoning processes.</h3>
56
  </div>
57
  """
58
 
59
- examples_inputs = [["test"]]
60
 
61
  with gr.Blocks() as demo:
62
  gr.Markdown(description)
63
  with gr.Row():
64
- input_text = gr.Textbox(label="Input Text", lines=6)
 
 
65
  with gr.Row():
66
- btn = gr.Button("Translate text")
67
  with gr.Row():
68
  output = gr.Textbox(label="Output Text", lines=6)
69
  btn.click(
70
  translate,
71
- inputs=[input_text],
72
  outputs=output,
73
  )
74
- examples = gr.Examples(examples=examples_inputs,inputs=[input_text], fn=translate, outputs=output, cache_examples=True)
75
 
76
  print("Prepared")
77
  demo.launch()
 
7
  import torch
8
 
9
  # 假设openai_client已定义,例如:
10
+ template="""You are an excellent math teacher. Please verify the correctness of the Now Step.
11
 
12
+ You first need to analyze the Now Step and the Previous Steps and then summarize based on your analysis.
13
+ Analysis:
14
+ You need to analyze the following aspects.
15
+ **Previous Steps Analysis**: You need to analyze the Previous Steps step by step. For each step, you need to first explain what the current step is doing, then you try to find any error in the current step.
16
+ **Now Step Analysis**: You first need to explain what the Now Step is doing, and then point out which part of the Question it is trying to solve or which part of the information it states.
17
+ **Data Source Analysis**: First you need to find out what data are used in the Now Step, and then you need to determine whether the source of the data is reasonable and correct. When you judge whether the source of a data is reasonable and correct, you need to specify the specific source of this data: such as which part of the question, or which content of the previous step; and then determine the source and current use is consistent, the Now Step is used correctly.
18
+ **Consistency Analysis**: You need to check that the Now Step is consistent with the contents of the Previous Steps, and then you need to check that all the information inside the Now Step is consistent.
19
+ **Calculation Analysis**: If the Now Step involves any calculations, such as addition, subtraction, multiplication, division, equations, modulo operations, etc., you will first need to perform a check on the calculation, such as a reverse operation, to see if the calculation was done correctly, and then analyze the results of your check to see if there was an error in the calculation.
20
+ Conclusion:
21
+ Please verify the correctness of the Now Step based on your analysis, if there is any error in the Now Step then the Now Step is wrong and vice versa the Now Step is correct. At the end of the Conclusion, when you give your final answer, write it in the form "Verification: Is the step correct (Yes/No)? X", where X is either Yes or No.
22
+
23
+ Question: {}
24
+ Previous Steps: {}
25
+ Now Step: {}
26
+ Please carefully analyze the correctness of the Now Step.
27
+ Reply:"""
28
+
29
+ import math
30
+
31
+ def split_string_into_max_six_chunks(input_str: str) -> list[str]:
32
+ """
33
+ Splits a string by newlines into a maximum of 6 chunks.
34
+
35
+ For example, if the string has 12 lines, it will be split into 6 chunks,
36
+ with each chunk containing 2 lines.
37
+
38
+ Args:
39
+ input_str: The input string with newline characters.
40
+
41
+ Returns:
42
+ A list of strings, where the list contains at most 6 elements.
43
+ """
44
+ # Split the string into individual lines
45
+ lines = input_str.splitlines()
46
+ num_lines = len(lines)
47
+
48
+ # If there are no lines, return an empty list
49
+ if num_lines == 0:
50
+ return []
51
+
52
+ # Define the maximum number of chunks desired
53
+ max_chunks = 6
54
+
55
+ # If the number of lines is already within the limit, return the lines as they are
56
+ if num_lines <= max_chunks:
57
+ return lines
58
+
59
+ # Calculate how many lines should be in each chunk, rounding up
60
+ lines_per_chunk = math.ceil(num_lines / max_chunks)
61
+
62
+ # Group the lines into chunks
63
+ result_chunks = []
64
+ for i in range(0, num_lines, lines_per_chunk):
65
+ # Slice the lines list to get the current chunk
66
+ chunk_lines = lines[i:i + lines_per_chunk]
67
+ # Join the lines back together with newlines
68
+ result_chunks.append('\n'.join(chunk_lines))
69
+
70
+ return result_chunks
71
+
72
  device = "cuda"
73
  MODEL_NAME = "kevinpro/R-PRM-7B-DPO"
74
 
 
85
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
86
 
87
 
 
 
 
 
88
  # Only assign GPU if cache not used
89
  @spaces.GPU
90
+ def translate(input_question,input_cot):
91
+ all_judge = ""
92
+ reasoning_chunk = split_string_into_max_six_chunks(input_cot)
93
+ previsous_step_string = ""
94
+ for index,r in enumerate(reasoning_chunk):
95
+ previsous_step_string = previsous_step_string.strip()
96
+ cur_step = "Step {}: ".format(index) + r
97
+ input_string = template.format(input_question,previsous_step_string,cur_step)
98
+ print(input_string)
99
+ input_tokens = (
100
+ tokenizer(input_string, return_tensors="pt")
101
+ .input_ids[0]
102
+ .cpu()
103
+ .numpy()
104
+ .tolist()
105
+ )
106
+ translated_chunk = model.generate(
107
+ input_ids=torch.tensor([input_tokens]).to(device),
108
+ max_length=len(input_tokens) + 2048,
109
+ num_return_sequences=1,
110
+ )
111
+ full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True)
112
+ full_output = full_output.replace(input_string,"")
113
+ previsous_step_string += "\n" + input_string
114
+ all_judge += "Step {}: ".format(index) + full_output + "\n\n"
115
+ print(full_output)
116
+ return all_judge
117
 
118
 
119
 
120
  description = """
121
  <div style="text-align: center;">
122
+ <h1 style="color: #0077be; font-size: 4em;">R-PRM, powered by NJUNLP</h1>
123
+ <h3 style="font-size: 1em;">🚀 We introduce Reasoning-Driven Process Reward Modeling (R-PRM), a novel approach that enhances LLMs' ability to evaluate mathematical reasoning step-by-step. By leveraging stronger LLMs to generate seed data, optimizing preferences without additional annotations, and scaling inference-time computation, R-PRM delivers comprehensive, transparent, and robust assessments of reasoning processes.</h3>
124
  </div>
125
  """
126
 
127
+
128
 
129
  with gr.Blocks() as demo:
130
  gr.Markdown(description)
131
  with gr.Row():
132
+ input_question = gr.Textbox(label="Question", lines=4)
133
+ with gr.Row():
134
+ input_cot = gr.Textbox(label="Reasoning", lines=12)
135
  with gr.Row():
136
+ btn = gr.Button("Start Analysis")
137
  with gr.Row():
138
  output = gr.Textbox(label="Output Text", lines=6)
139
  btn.click(
140
  translate,
141
+ inputs=[input_question,input_cot],
142
  outputs=output,
143
  )
 
144
 
145
  print("Prepared")
146
  demo.launch()