Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -11,38 +11,13 @@ MAX_MAX_NEW_TOKENS = 2048 | |
| 11 | 
             
            DEFAULT_MAX_NEW_TOKENS = 1024
         | 
| 12 | 
             
            MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
         | 
| 13 |  | 
| 14 | 
            -
            DESCRIPTION = """\
         | 
| 15 | 
            -
            # Llama-2 13B Chat
         | 
| 16 |  | 
| 17 | 
            -
             | 
|  | |
|  | |
|  | |
| 18 |  | 
| 19 | 
            -
            🔎 For more details about the Llama 2 family of models and how to use them with `transformers`, take a look [at our blog post](https://huggingface.co/blog/llama2).
         | 
| 20 |  | 
| 21 | 
            -
            🔨 Looking for an even more powerful model? Check out the large [**70B** model demo](https://huggingface.co/spaces/ysharma/Explore_llamav2_with_TGI).
         | 
| 22 | 
            -
            🐇 For a smaller model that you can run on many GPUs, check our [7B model demo](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat).
         | 
| 23 | 
            -
             | 
| 24 | 
            -
            """
         | 
| 25 | 
            -
             | 
| 26 | 
            -
            LICENSE = """
         | 
| 27 | 
            -
            <p/>
         | 
| 28 | 
            -
             | 
| 29 | 
            -
            ---
         | 
| 30 | 
            -
            As a derivate work of [Llama-2-13b-chat](https://huggingface.co/meta-llama/Llama-2-13b-chat) by Meta,
         | 
| 31 | 
            -
            this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-13b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-13b-chat/blob/main/USE_POLICY.md).
         | 
| 32 | 
            -
            """
         | 
| 33 | 
            -
             | 
| 34 | 
            -
            if not torch.cuda.is_available():
         | 
| 35 | 
            -
                DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
         | 
| 36 | 
            -
             | 
| 37 | 
            -
             | 
| 38 | 
            -
            if torch.cuda.is_available():
         | 
| 39 | 
            -
                model_id = "meta-llama/Llama-2-13b-chat-hf"
         | 
| 40 | 
            -
                model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
         | 
| 41 | 
            -
                tokenizer = AutoTokenizer.from_pretrained(model_id)
         | 
| 42 | 
            -
                tokenizer.use_default_system_prompt = False
         | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
            @spaces.GPU
         | 
| 46 | 
             
            def generate(
         | 
| 47 | 
             
                message: str,
         | 
| 48 | 
             
                chat_history: list[dict],
         | 
| @@ -128,11 +103,8 @@ chat_interface = gr.ChatInterface( | |
| 128 | 
             
                ],
         | 
| 129 | 
             
                stop_btn=None,
         | 
| 130 | 
             
                examples=[
         | 
| 131 | 
            -
                    [" | 
| 132 | 
            -
                    [" | 
| 133 | 
            -
                    ["Explain the plot of Cinderella in a sentence."],
         | 
| 134 | 
            -
                    ["How many hours does it take a man to eat a Helicopter?"],
         | 
| 135 | 
            -
                    ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
         | 
| 136 | 
             
                ],
         | 
| 137 | 
             
                cache_examples=False,
         | 
| 138 | 
             
                type="messages",
         | 
|  | |
| 11 | 
             
            DEFAULT_MAX_NEW_TOKENS = 1024
         | 
| 12 | 
             
            MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
         | 
| 13 |  | 
|  | |
|  | |
| 14 |  | 
| 15 | 
            +
            model_id = "https://huggingface.co/stabilityai/ar-stablelm-2-chat"
         | 
| 16 | 
            +
            model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
         | 
| 17 | 
            +
            tokenizer = AutoTokenizer.from_pretrained(model_id)
         | 
| 18 | 
            +
            tokenizer.use_default_system_prompt = False
         | 
| 19 |  | 
|  | |
| 20 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 21 | 
             
            def generate(
         | 
| 22 | 
             
                message: str,
         | 
| 23 | 
             
                chat_history: list[dict],
         | 
|  | |
| 103 | 
             
                ],
         | 
| 104 | 
             
                stop_btn=None,
         | 
| 105 | 
             
                examples=[
         | 
| 106 | 
            +
                    ["السلام عليكم"],
         | 
| 107 | 
            +
                    ["اعرب الجملة التالية: ذهبت الى السوق"]
         | 
|  | |
|  | |
|  | |
| 108 | 
             
                ],
         | 
| 109 | 
             
                cache_examples=False,
         | 
| 110 | 
             
                type="messages",
         | 
 
			
