Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -11,34 +11,46 @@ class ModelInput(BaseModel): | |
| 11 | 
             
            app = FastAPI()
         | 
| 12 |  | 
| 13 | 
             
            # Load your model and tokenizer
         | 
| 14 | 
            -
            model_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs" | 
| 15 | 
             
            tokenizer = AutoTokenizer.from_pretrained(model_path)
         | 
| 16 | 
             
            model = AutoModelForCausalLM.from_pretrained(model_path)
         | 
| 17 |  | 
| 18 | 
             
            # Initialize the pipeline
         | 
| 19 | 
             
            generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
         | 
| 20 |  | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
            def generate_response(model, tokenizer, instruction):
         | 
| 24 | 
             
                """Generate a response from the model based on an instruction."""
         | 
| 25 | 
            -
                 | 
| 26 | 
            -
             | 
| 27 | 
            -
                    messages | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 31 | 
            -
                     | 
| 32 | 
            -
             | 
| 33 | 
            -
             | 
| 34 | 
            -
             | 
| 35 | 
            -
             | 
| 36 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 37 |  | 
|  | |
| 38 | 
             
            def generate_text(input: ModelInput):
         | 
|  | |
| 39 | 
             
                try:
         | 
| 40 | 
            -
                     | 
| 41 | 
            -
                     | 
|  | |
|  | |
|  | |
| 42 | 
             
                except Exception as e:
         | 
| 43 | 
             
                    raise HTTPException(status_code=500, detail=str(e))
         | 
| 44 |  | 
|  | |
| 11 | 
             
            app = FastAPI()
         | 
| 12 |  | 
| 13 | 
             
            # Load your model and tokenizer
         | 
| 14 | 
            +
            model_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
         | 
| 15 | 
             
            tokenizer = AutoTokenizer.from_pretrained(model_path)
         | 
| 16 | 
             
            model = AutoModelForCausalLM.from_pretrained(model_path)
         | 
| 17 |  | 
| 18 | 
             
            # Initialize the pipeline
         | 
| 19 | 
             
            generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
         | 
| 20 |  | 
| 21 | 
            +
            # Helper function to generate a response
         | 
| 22 | 
            +
            def generate_response(model, tokenizer, instruction, max_new_tokens=128):
         | 
|  | |
| 23 | 
             
                """Generate a response from the model based on an instruction."""
         | 
| 24 | 
            +
                try:
         | 
| 25 | 
            +
                    # Format the input as chat messages if necessary
         | 
| 26 | 
            +
                    messages = [{"role": "user", "content": instruction}]
         | 
| 27 | 
            +
                    input_text = tokenizer.apply_chat_template(
         | 
| 28 | 
            +
                        messages, tokenize=False, add_generation_prompt=True
         | 
| 29 | 
            +
                    )
         | 
| 30 | 
            +
                    # Tokenize and generate the output
         | 
| 31 | 
            +
                    inputs = tokenizer.encode(input_text, return_tensors="pt")
         | 
| 32 | 
            +
                    outputs = model.generate(
         | 
| 33 | 
            +
                        inputs,
         | 
| 34 | 
            +
                        max_new_tokens=max_new_tokens,
         | 
| 35 | 
            +
                        temperature=0.2,
         | 
| 36 | 
            +
                        top_p=0.9,
         | 
| 37 | 
            +
                        do_sample=True,
         | 
| 38 | 
            +
                    )
         | 
| 39 | 
            +
                    # Decode the output
         | 
| 40 | 
            +
                    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         | 
| 41 | 
            +
                    return response
         | 
| 42 | 
            +
                except Exception as e:
         | 
| 43 | 
            +
                    raise ValueError(f"Error generating response: {e}")
         | 
| 44 |  | 
| 45 | 
            +
            @app.post("/generate")
         | 
| 46 | 
             
            def generate_text(input: ModelInput):
         | 
| 47 | 
            +
                """API endpoint to generate text."""
         | 
| 48 | 
             
                try:
         | 
| 49 | 
            +
                    # Call the helper function
         | 
| 50 | 
            +
                    response = generate_response(
         | 
| 51 | 
            +
                        model=model, tokenizer=tokenizer, instruction=input.prompt, max_new_tokens=input.max_new_tokens
         | 
| 52 | 
            +
                    )
         | 
| 53 | 
            +
                    return {"generated_text": response}
         | 
| 54 | 
             
                except Exception as e:
         | 
| 55 | 
             
                    raise HTTPException(status_code=500, detail=str(e))
         | 
| 56 |  |