hf-llm-api

Running

App Files Files Community

jonathanjordan21 commited on Jul 16, 2024

Commit

9c416cb

verified ·

1 Parent(s): e37b4b3

Update apis/chat_api.py

Browse files

Files changed (1) hide show

apis/chat_api.py +30 -20

apis/chat_api.py CHANGED Viewed

@@ -151,26 +151,36 @@ class ChatAPIApp:
             default="Hello, who are you?",
             description="(str) Prompt",
         )
-        temperature: Union[float, None] = Field(
-            default=0.5,
-            description="(float) Temperature",
-        )
-        top_p: Union[float, None] = Field(
-            default=0.95,
-            description="(float) top p",
-        )
-        max_tokens: Union[int, None] = Field(
-            default=-1,
-            description="(int) Max tokens",
-        )
-        use_cache: bool = Field(
-            default=False,
-            description="(bool) Use cache",
-        )
         stream: bool = Field(
             default=True,
             description="(bool) Stream",
         )
     def generate_text(
         self, item: GenerateRequest, api_key: str = Depends(extract_api_key)
@@ -190,11 +200,11 @@ class ChatAPIApp:
                 streamer = HuggingfaceStreamer(model=item.model)
                 stream_response = streamer.chat_response(
                     prompt=item.prompt,
-                    temperature=item.temperature,
-                    top_p=item.top_p,
-                    max_new_tokens=item.max_tokens,
                     api_key=api_key,
-                    use_cache=item.use_cache,
                 )
             if item.stream:

             default="Hello, who are you?",
             description="(str) Prompt",
         )
         stream: bool = Field(
             default=True,
             description="(bool) Stream",
         )
+        options: dict = Field(
+            default={
+                "temperature":0.5,
+                "top_p":0.95,
+                "max_tokens":-1,
+                "use_cache":False
+            },
+            description="(dict) Options"
+        )
+        # temperature: Union[float, None] = Field(
+        #     default=0.5,
+        #     description="(float) Temperature",
+        # )
+        # top_p: Union[float, None] = Field(
+        #     default=0.95,
+        #     description="(float) top p",
+        # )
+        # max_tokens: Union[int, None] = Field(
+        #     default=-1,
+        #     description="(int) Max tokens",
+        # )
+        # use_cache: bool = Field(
+        #     default=False,
+        #     description="(bool) Use cache",
+        # )
     def generate_text(
         self, item: GenerateRequest, api_key: str = Depends(extract_api_key)
                 streamer = HuggingfaceStreamer(model=item.model)
                 stream_response = streamer.chat_response(
                     prompt=item.prompt,
+                    temperature=item.options.get('temperature'),
+                    top_p=item.options.get('top_p'),
+                    max_new_tokens=item.options.get('max_new_tokens'),
                     api_key=api_key,
+                    use_cache=item.options.get('use_cache'),
                 )
             if item.stream: