Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Merge branch 'master' into huggingface
Browse files- crazy_functions/latex_utils.py +33 -18
- request_llm/bridge_all.py +10 -1
- request_llm/bridge_chatglm.py +3 -3
- toolbox.py +1 -1
    	
        crazy_functions/latex_utils.py
    CHANGED
    
    | @@ -27,6 +27,24 @@ def set_forbidden_text(text, mask, pattern, flags=0): | |
| 27 | 
             
                    mask[res.span()[0]:res.span()[1]] = PRESERVE
         | 
| 28 | 
             
                return text, mask
         | 
| 29 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 30 | 
             
            def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
         | 
| 31 | 
             
                """
         | 
| 32 | 
             
                Add a preserve text area in this paper (text become untouchable for GPT).
         | 
| @@ -326,6 +344,7 @@ def split_subprocess(txt, project_folder, return_dict, opts): | |
| 326 | 
             
                # reverse 操作必须放在最后
         | 
| 327 | 
             
                text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
         | 
| 328 | 
             
                text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
         | 
|  | |
| 329 | 
             
                root = convert_to_linklist(text, mask)
         | 
| 330 |  | 
| 331 | 
             
                # 修复括号
         | 
| @@ -672,10 +691,9 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work | |
| 672 | 
             
                    print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
         | 
| 673 | 
             
                    return False, -1, [-1]
         | 
| 674 |  | 
| 675 | 
            -
             | 
| 676 | 
            -
            def compile_latex_with_timeout(command, timeout=60):
         | 
| 677 | 
             
                import subprocess
         | 
| 678 | 
            -
                process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         | 
| 679 | 
             
                try:
         | 
| 680 | 
             
                    stdout, stderr = process.communicate(timeout=timeout)
         | 
| 681 | 
             
                except subprocess.TimeoutExpired:
         | 
| @@ -699,24 +717,24 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f | |
| 699 |  | 
| 700 | 
             
                    # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
         | 
| 701 | 
             
                    yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history)   # 刷新Gradio前端界面
         | 
| 702 | 
            -
                     | 
| 703 |  | 
| 704 | 
             
                    yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history)   # 刷新Gradio前端界面
         | 
| 705 | 
            -
                     | 
| 706 |  | 
| 707 | 
             
                    if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
         | 
| 708 | 
             
                        # 只有第二步成功,才能继续下面的步骤
         | 
| 709 | 
             
                        yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history)    # 刷新Gradio前端界面
         | 
| 710 | 
             
                        if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
         | 
| 711 | 
            -
                             | 
| 712 | 
             
                        if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
         | 
| 713 | 
            -
                             | 
| 714 |  | 
| 715 | 
             
                        yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history)  # 刷新Gradio前端界面
         | 
| 716 | 
            -
                         | 
| 717 | 
            -
                         | 
| 718 | 
            -
                         | 
| 719 | 
            -
                         | 
| 720 |  | 
| 721 | 
             
                        if mode!='translate_zh':
         | 
| 722 | 
             
                            yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
         | 
| @@ -724,13 +742,11 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f | |
| 724 | 
             
                            ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex  {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
         | 
| 725 |  | 
| 726 | 
             
                            yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history)   # 刷新Gradio前端界面
         | 
| 727 | 
            -
                             | 
| 728 | 
            -
                             | 
| 729 | 
            -
                             | 
| 730 | 
            -
                             | 
| 731 |  | 
| 732 | 
            -
                    # <--------------------->
         | 
| 733 | 
            -
                    os.chdir(current_dir)
         | 
| 734 |  | 
| 735 | 
             
                    # <---------- 检查结果 ----------->
         | 
| 736 | 
             
                    results_ = ""
         | 
| @@ -766,7 +782,6 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f | |
| 766 | 
             
                        yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history)   # 刷新Gradio前端界面
         | 
| 767 | 
             
                        if not can_retry: break
         | 
| 768 |  | 
| 769 | 
            -
                os.chdir(current_dir)
         | 
| 770 | 
             
                return False # 失败啦
         | 
| 771 |  | 
| 772 |  | 
|  | |
| 27 | 
             
                    mask[res.span()[0]:res.span()[1]] = PRESERVE
         | 
| 28 | 
             
                return text, mask
         | 
| 29 |  | 
| 30 | 
            +
            def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
         | 
| 31 | 
            +
                """
         | 
| 32 | 
            +
                Move area out of preserve area (make text editable for GPT)
         | 
| 33 | 
            +
                count the number of the braces so as to catch compelete text area. 
         | 
| 34 | 
            +
                e.g.
         | 
| 35 | 
            +
                \begin{abstract} blablablablablabla. \end{abstract} 
         | 
| 36 | 
            +
                """
         | 
| 37 | 
            +
                if isinstance(pattern, list): pattern = '|'.join(pattern)
         | 
| 38 | 
            +
                pattern_compile = re.compile(pattern, flags)
         | 
| 39 | 
            +
                for res in pattern_compile.finditer(text):
         | 
| 40 | 
            +
                    if not forbid_wrapper:
         | 
| 41 | 
            +
                        mask[res.span()[0]:res.span()[1]] = TRANSFORM
         | 
| 42 | 
            +
                    else:
         | 
| 43 | 
            +
                        mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE   # '\\begin{abstract}'
         | 
| 44 | 
            +
                        mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM   # abstract
         | 
| 45 | 
            +
                        mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE   # abstract
         | 
| 46 | 
            +
                return text, mask
         | 
| 47 | 
            +
             | 
| 48 | 
             
            def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
         | 
| 49 | 
             
                """
         | 
| 50 | 
             
                Add a preserve text area in this paper (text become untouchable for GPT).
         | 
|  | |
| 344 | 
             
                # reverse 操作必须放在最后
         | 
| 345 | 
             
                text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
         | 
| 346 | 
             
                text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
         | 
| 347 | 
            +
                text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
         | 
| 348 | 
             
                root = convert_to_linklist(text, mask)
         | 
| 349 |  | 
| 350 | 
             
                # 修复括号
         | 
|  | |
| 691 | 
             
                    print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
         | 
| 692 | 
             
                    return False, -1, [-1]
         | 
| 693 |  | 
| 694 | 
            +
            def compile_latex_with_timeout(command, cwd, timeout=60):
         | 
|  | |
| 695 | 
             
                import subprocess
         | 
| 696 | 
            +
                process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
         | 
| 697 | 
             
                try:
         | 
| 698 | 
             
                    stdout, stderr = process.communicate(timeout=timeout)
         | 
| 699 | 
             
                except subprocess.TimeoutExpired:
         | 
|  | |
| 717 |  | 
| 718 | 
             
                    # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
         | 
| 719 | 
             
                    yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history)   # 刷新Gradio前端界面
         | 
| 720 | 
            +
                    ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
         | 
| 721 |  | 
| 722 | 
             
                    yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history)   # 刷新Gradio前端界面
         | 
| 723 | 
            +
                    ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
         | 
| 724 |  | 
| 725 | 
             
                    if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
         | 
| 726 | 
             
                        # 只有第二步成功,才能继续下面的步骤
         | 
| 727 | 
             
                        yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history)    # 刷新Gradio前端界面
         | 
| 728 | 
             
                        if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
         | 
| 729 | 
            +
                            ok = compile_latex_with_timeout(f'bibtex  {main_file_original}.aux', work_folder_original)
         | 
| 730 | 
             
                        if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
         | 
| 731 | 
            +
                            ok = compile_latex_with_timeout(f'bibtex  {main_file_modified}.aux', work_folder_modified)
         | 
| 732 |  | 
| 733 | 
             
                        yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history)  # 刷新Gradio前端界面
         | 
| 734 | 
            +
                        ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
         | 
| 735 | 
            +
                        ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
         | 
| 736 | 
            +
                        ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
         | 
| 737 | 
            +
                        ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
         | 
| 738 |  | 
| 739 | 
             
                        if mode!='translate_zh':
         | 
| 740 | 
             
                            yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
         | 
|  | |
| 742 | 
             
                            ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex  {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
         | 
| 743 |  | 
| 744 | 
             
                            yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history)   # 刷新Gradio前端界面
         | 
| 745 | 
            +
                            ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
         | 
| 746 | 
            +
                            ok = compile_latex_with_timeout(f'bibtex    merge_diff.aux', work_folder)
         | 
| 747 | 
            +
                            ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
         | 
| 748 | 
            +
                            ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
         | 
| 749 |  | 
|  | |
|  | |
| 750 |  | 
| 751 | 
             
                    # <---------- 检查结果 ----------->
         | 
| 752 | 
             
                    results_ = ""
         | 
|  | |
| 782 | 
             
                        yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history)   # 刷新Gradio前端界面
         | 
| 783 | 
             
                        if not can_retry: break
         | 
| 784 |  | 
|  | |
| 785 | 
             
                return False # 失败啦
         | 
| 786 |  | 
| 787 |  | 
    	
        request_llm/bridge_all.py
    CHANGED
    
    | @@ -152,7 +152,7 @@ model_info = { | |
| 152 | 
             
                    "token_cnt": get_token_num_gpt4,
         | 
| 153 | 
             
                },
         | 
| 154 |  | 
| 155 | 
            -
                # chatglm
         | 
| 156 | 
             
                "chatglm": {
         | 
| 157 | 
             
                    "fn_with_ui": chatglm_ui,
         | 
| 158 | 
             
                    "fn_without_ui": chatglm_noui,
         | 
| @@ -161,6 +161,15 @@ model_info = { | |
| 161 | 
             
                    "tokenizer": tokenizer_gpt35,
         | 
| 162 | 
             
                    "token_cnt": get_token_num_gpt35,
         | 
| 163 | 
             
                },
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 164 | 
             
                # newbing
         | 
| 165 | 
             
                "newbing": {
         | 
| 166 | 
             
                    "fn_with_ui": newbing_ui,
         | 
|  | |
| 152 | 
             
                    "token_cnt": get_token_num_gpt4,
         | 
| 153 | 
             
                },
         | 
| 154 |  | 
| 155 | 
            +
                # 将 chatglm 直接对齐到 chatglm2
         | 
| 156 | 
             
                "chatglm": {
         | 
| 157 | 
             
                    "fn_with_ui": chatglm_ui,
         | 
| 158 | 
             
                    "fn_without_ui": chatglm_noui,
         | 
|  | |
| 161 | 
             
                    "tokenizer": tokenizer_gpt35,
         | 
| 162 | 
             
                    "token_cnt": get_token_num_gpt35,
         | 
| 163 | 
             
                },
         | 
| 164 | 
            +
                "chatglm2": {
         | 
| 165 | 
            +
                    "fn_with_ui": chatglm_ui,
         | 
| 166 | 
            +
                    "fn_without_ui": chatglm_noui,
         | 
| 167 | 
            +
                    "endpoint": None,
         | 
| 168 | 
            +
                    "max_token": 1024,
         | 
| 169 | 
            +
                    "tokenizer": tokenizer_gpt35,
         | 
| 170 | 
            +
                    "token_cnt": get_token_num_gpt35,
         | 
| 171 | 
            +
                },
         | 
| 172 | 
            +
                
         | 
| 173 | 
             
                # newbing
         | 
| 174 | 
             
                "newbing": {
         | 
| 175 | 
             
                    "fn_with_ui": newbing_ui,
         | 
    	
        request_llm/bridge_chatglm.py
    CHANGED
    
    | @@ -40,12 +40,12 @@ class GetGLMHandle(Process): | |
| 40 | 
             
                    while True:
         | 
| 41 | 
             
                        try:
         | 
| 42 | 
             
                            if self.chatglm_model is None:
         | 
| 43 | 
            -
                                self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/ | 
| 44 | 
             
                                device, = get_conf('LOCAL_MODEL_DEVICE')
         | 
| 45 | 
             
                                if device=='cpu':
         | 
| 46 | 
            -
                                    self.chatglm_model = AutoModel.from_pretrained("THUDM/ | 
| 47 | 
             
                                else:
         | 
| 48 | 
            -
                                    self.chatglm_model = AutoModel.from_pretrained("THUDM/ | 
| 49 | 
             
                                self.chatglm_model = self.chatglm_model.eval()
         | 
| 50 | 
             
                                break
         | 
| 51 | 
             
                            else:
         | 
|  | |
| 40 | 
             
                    while True:
         | 
| 41 | 
             
                        try:
         | 
| 42 | 
             
                            if self.chatglm_model is None:
         | 
| 43 | 
            +
                                self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
         | 
| 44 | 
             
                                device, = get_conf('LOCAL_MODEL_DEVICE')
         | 
| 45 | 
             
                                if device=='cpu':
         | 
| 46 | 
            +
                                    self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).float()
         | 
| 47 | 
             
                                else:
         | 
| 48 | 
            +
                                    self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda()
         | 
| 49 | 
             
                                self.chatglm_model = self.chatglm_model.eval()
         | 
| 50 | 
             
                                break
         | 
| 51 | 
             
                            else:
         | 
    	
        toolbox.py
    CHANGED
    
    | @@ -498,7 +498,7 @@ def on_report_generated(cookies, files, chatbot): | |
| 498 | 
             
                else:
         | 
| 499 | 
             
                    report_files = find_recent_files('gpt_log')
         | 
| 500 | 
             
                if len(report_files) == 0:
         | 
| 501 | 
            -
                    return None, chatbot
         | 
| 502 | 
             
                # files.extend(report_files)
         | 
| 503 | 
             
                file_links = ''
         | 
| 504 | 
             
                for f in report_files: file_links += f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>'
         | 
|  | |
| 498 | 
             
                else:
         | 
| 499 | 
             
                    report_files = find_recent_files('gpt_log')
         | 
| 500 | 
             
                if len(report_files) == 0:
         | 
| 501 | 
            +
                    return cookies, None, chatbot
         | 
| 502 | 
             
                # files.extend(report_files)
         | 
| 503 | 
             
                file_links = ''
         | 
| 504 | 
             
                for f in report_files: file_links += f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>'
         |