Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -5,8 +5,7 @@ import torch | |
| 5 | 
             
            import spaces
         | 
| 6 | 
             
            import json
         | 
| 7 | 
             
            import re
         | 
| 8 | 
            -
             | 
| 9 | 
            -
            from googletrans import Translator
         | 
| 10 |  | 
| 11 | 
             
            # Load the processor and model
         | 
| 12 | 
             
            processor = AutoProcessor.from_pretrained(
         | 
| @@ -96,52 +95,76 @@ def decode_unicode_sequences(unicode_seq): | |
| 96 |  | 
| 97 | 
             
            def is_mandarin(text):
         | 
| 98 | 
             
                """
         | 
| 99 | 
            -
                Detects if the given text is in Mandarin.
         | 
| 100 |  | 
| 101 | 
             
                Args:
         | 
| 102 | 
             
                    text (str): The text to check.
         | 
| 103 |  | 
| 104 | 
             
                Returns:
         | 
| 105 | 
            -
                    bool: True if the text  | 
| 106 | 
             
                """
         | 
| 107 | 
            -
                 | 
| 108 | 
            -
             | 
| 109 | 
            -
                     | 
| 110 | 
            -
             | 
| 111 | 
            -
             | 
| 112 |  | 
| 113 | 
            -
            def  | 
| 114 | 
             
                """
         | 
| 115 | 
            -
                Translates  | 
| 116 |  | 
| 117 | 
             
                Args:
         | 
| 118 | 
             
                    text (str): The Mandarin text to translate.
         | 
| 119 | 
            -
                     | 
| 120 |  | 
| 121 | 
             
                Returns:
         | 
| 122 | 
             
                    str: The translated English text.
         | 
| 123 | 
             
                """
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 124 | 
             
                try:
         | 
| 125 | 
            -
                     | 
| 126 | 
            -
                     | 
| 127 | 
            -
             | 
| 128 | 
            -
                    print( | 
|  | |
|  | |
|  | |
| 129 | 
             
                    return text  # Return the original text if translation fails
         | 
| 130 |  | 
| 131 | 
            -
            def  | 
| 132 | 
             
                """
         | 
| 133 | 
             
                Processes the input string to find Unicode escape sequences representing Mandarin words,
         | 
| 134 | 
            -
                translates them to English, and replaces them accordingly.
         | 
| 135 |  | 
| 136 | 
             
                Args:
         | 
| 137 | 
             
                    input_string (str): The original string containing Unicode escape sequences.
         | 
|  | |
| 138 |  | 
| 139 | 
             
                Returns:
         | 
| 140 | 
             
                    str: The processed string with translations where applicable.
         | 
| 141 | 
             
                """
         | 
| 142 | 
            -
                # Initialize the translator
         | 
| 143 | 
            -
                translator = Translator()
         | 
| 144 | 
            -
                
         | 
| 145 | 
             
                # Regular expression to find groups of consecutive \uXXXX sequences
         | 
| 146 | 
             
                unicode_word_pattern = re.compile(r'(?:\\u[0-9a-fA-F]{4})+')
         | 
| 147 |  | 
| @@ -151,7 +174,7 @@ def process_text_for_mandarin_unicode(input_string): | |
| 151 | 
             
                    decoded_word = decode_unicode_sequences(unicode_seq)
         | 
| 152 |  | 
| 153 | 
             
                    if is_mandarin(decoded_word):
         | 
| 154 | 
            -
                        translated =  | 
| 155 | 
             
                        return f"{translated} ({decoded_word})"
         | 
| 156 | 
             
                    else:
         | 
| 157 | 
             
                        # If not Mandarin, return the original sequence
         | 
| @@ -183,7 +206,7 @@ def process_image_and_text(image, text): | |
| 183 | 
             
                generated_tokens = output[0, inputs['input_ids'].size(1):]
         | 
| 184 | 
             
                generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
         | 
| 185 | 
             
                generated_text_w_json_wrapper = wrap_json_in_markdown(generated_text)
         | 
| 186 | 
            -
                generated_text_w_unicode_mdn =  | 
| 187 |  | 
| 188 | 
             
                return generated_text_w_unicode_mdn
         | 
| 189 |  | 
|  | |
| 5 | 
             
            import spaces
         | 
| 6 | 
             
            import json
         | 
| 7 | 
             
            import re
         | 
| 8 | 
            +
            import deepl
         | 
|  | |
| 9 |  | 
| 10 | 
             
            # Load the processor and model
         | 
| 11 | 
             
            processor = AutoProcessor.from_pretrained(
         | 
|  | |
| 95 |  | 
| 96 | 
             
            def is_mandarin(text):
         | 
| 97 | 
             
                """
         | 
| 98 | 
            +
                Detects if the given text is in Mandarin using Unicode ranges.
         | 
| 99 |  | 
| 100 | 
             
                Args:
         | 
| 101 | 
             
                    text (str): The text to check.
         | 
| 102 |  | 
| 103 | 
             
                Returns:
         | 
| 104 | 
            +
                    bool: True if the text contains Chinese characters, False otherwise.
         | 
| 105 | 
             
                """
         | 
| 106 | 
            +
                # Chinese Unicode ranges
         | 
| 107 | 
            +
                for char in text:
         | 
| 108 | 
            +
                    if '\u4e00' <= char <= '\u9fff':
         | 
| 109 | 
            +
                        return True
         | 
| 110 | 
            +
                return False
         | 
| 111 |  | 
| 112 | 
            +
            def translate_to_english_deepl(text, api_key):
         | 
| 113 | 
             
                """
         | 
| 114 | 
            +
                Translates Mandarin text to English using DeepL API.
         | 
| 115 |  | 
| 116 | 
             
                Args:
         | 
| 117 | 
             
                    text (str): The Mandarin text to translate.
         | 
| 118 | 
            +
                    api_key (str): Your DeepL API authentication key.
         | 
| 119 |  | 
| 120 | 
             
                Returns:
         | 
| 121 | 
             
                    str: The translated English text.
         | 
| 122 | 
             
                """
         | 
| 123 | 
            +
                url = "https://api.deepl.com/v2/translate"
         | 
| 124 | 
            +
                params = {
         | 
| 125 | 
            +
                    "auth_key": api_key,
         | 
| 126 | 
            +
                    "text": text,
         | 
| 127 | 
            +
                    "source_lang": "ZH",
         | 
| 128 | 
            +
                    "target_lang": "EN"
         | 
| 129 | 
            +
                }
         | 
| 130 | 
            +
                
         | 
| 131 | 
            +
                # try:
         | 
| 132 | 
            +
                #     response = requests.post(url, data=params)
         | 
| 133 | 
            +
                #     response.raise_for_status()
         | 
| 134 | 
            +
                #     result = response.json()
         | 
| 135 | 
            +
                #     return result['translations'][0]['text']
         | 
| 136 | 
            +
                # except requests.exceptions.RequestException as e:
         | 
| 137 | 
            +
                #     print(f"DeepL Translation error: {e}")
         | 
| 138 | 
            +
                #     return text  # Return the original text if translation fails
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                # auth_key = api_key  # Replace with your key
         | 
| 141 | 
            +
                # translator = deepl.Translator(auth_key)
         | 
| 142 | 
            +
             | 
| 143 | 
            +
                # result = translator.translate_text("Hello, world!", target_lang="FR")
         | 
| 144 | 
            +
                # print(result.text)  # "Bonjour, le monde !"
         | 
| 145 | 
            +
             | 
| 146 | 
             
                try:
         | 
| 147 | 
            +
                    auth_key = api_key  # Replace with your key
         | 
| 148 | 
            +
                    translator = deepl.Translator(auth_key)
         | 
| 149 | 
            +
                    result = translator.translate_text(text, source_lang="ZH", target_lang="EN-US")
         | 
| 150 | 
            +
                    # print(result.text)
         | 
| 151 | 
            +
                    return result.text
         | 
| 152 | 
            +
                except requests.exceptions.RequestException as e:
         | 
| 153 | 
            +
                    print(f"DeepL Translation error: {e}")
         | 
| 154 | 
             
                    return text  # Return the original text if translation fails
         | 
| 155 |  | 
| 156 | 
            +
            def process_text_deepl(input_string, api_key):
         | 
| 157 | 
             
                """
         | 
| 158 | 
             
                Processes the input string to find Unicode escape sequences representing Mandarin words,
         | 
| 159 | 
            +
                translates them to English using DeepL, and replaces them accordingly.
         | 
| 160 |  | 
| 161 | 
             
                Args:
         | 
| 162 | 
             
                    input_string (str): The original string containing Unicode escape sequences.
         | 
| 163 | 
            +
                    api_key (str): Your DeepL API authentication key.
         | 
| 164 |  | 
| 165 | 
             
                Returns:
         | 
| 166 | 
             
                    str: The processed string with translations where applicable.
         | 
| 167 | 
             
                """
         | 
|  | |
|  | |
|  | |
| 168 | 
             
                # Regular expression to find groups of consecutive \uXXXX sequences
         | 
| 169 | 
             
                unicode_word_pattern = re.compile(r'(?:\\u[0-9a-fA-F]{4})+')
         | 
| 170 |  | 
|  | |
| 174 | 
             
                    decoded_word = decode_unicode_sequences(unicode_seq)
         | 
| 175 |  | 
| 176 | 
             
                    if is_mandarin(decoded_word):
         | 
| 177 | 
            +
                        translated = translate_to_english_deepl(decoded_word, api_key)
         | 
| 178 | 
             
                        return f"{translated} ({decoded_word})"
         | 
| 179 | 
             
                    else:
         | 
| 180 | 
             
                        # If not Mandarin, return the original sequence
         | 
|  | |
| 206 | 
             
                generated_tokens = output[0, inputs['input_ids'].size(1):]
         | 
| 207 | 
             
                generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
         | 
| 208 | 
             
                generated_text_w_json_wrapper = wrap_json_in_markdown(generated_text)
         | 
| 209 | 
            +
                generated_text_w_unicode_mdn = process_text_deepl(generated_text_w_json_wrapper, "a5b1749b-7112-4c2d-81a3-33ea18478bb4:fx")
         | 
| 210 |  | 
| 211 | 
             
                return generated_text_w_unicode_mdn
         | 
| 212 |  |