hf-llm-api

Runtime error

App Files Files Community

jonathanjordan21 commited on Jul 16, 2024

Commit

f9ac435

verified ·

1 Parent(s): 198a4f7

Update apis/chat_api.py

Browse files

Files changed (1) hide show

apis/chat_api.py +32 -30

apis/chat_api.py CHANGED Viewed

@@ -155,33 +155,33 @@ class ChatAPIApp:
             default=False,
             description="(bool) Stream",
         )
-        # options: dict = Field(
-        #     default={
-        #         "temperature":0.5,
-        #         "top_p":0.95,
-        #         "max_tokens":-1,
-        #         "use_cache":False
-        #     },
-        #     description="(dict) Options"
-        # )
-        temperature: Union[float, None] = Field(
-            default=0.5,
-            description="(float) Temperature",
-        )
-        top_p: Union[float, None] = Field(
-            default=0.95,
-            description="(float) top p",
-        )
-        max_tokens: Union[int, None] = Field(
-            default=-1,
-            description="(int) Max tokens",
-        )
-        use_cache: bool = Field(
-            default=False,
-            description="(bool) Use cache",
         )
     def generate_text(
         self, item: GenerateRequest, api_key: str = Depends(extract_api_key)
@@ -199,13 +199,15 @@ class ChatAPIApp:
                 )
             else:
                 streamer = HuggingfaceStreamer(model=item.model)
                 stream_response = streamer.chat_response(
                     prompt=item.prompt,
-                    temperature=item.temperature,
-                    top_p=item.top_p,
-                    max_new_tokens=item.max_tokens,
-                    api_key=api_key,
-                    use_cache=item.use_cache,
                     # temperature=item.options.get('temperature', 0.6),
                     # top_p=item.options.get('top_p', 0.95),
                     # max_new_tokens=item.options.get('max_new_tokens', -1),

             default=False,
             description="(bool) Stream",
         )
+        options: dict = Field(
+            default={
+                "temperature":0.6,
+                "top_p":0.9,
+                "max_tokens":-1,
+                "use_cache":False
+            },
+            description="(dict) Options"
         )
+        # temperature: Union[float, None] = Field(
+        #     default=0.5,
+        #     description="(float) Temperature",
+        # )
+        # top_p: Union[float, None] = Field(
+        #     default=0.95,
+        #     description="(float) top p",
+        # )
+        # max_tokens: Union[int, None] = Field(
+        #     default=-1,
+        #     description="(int) Max tokens",
+        # )
+        # use_cache: bool = Field(
+        #     default=False,
+        #     description="(bool) Use cache",
+        # )
     def generate_text(
         self, item: GenerateRequest, api_key: str = Depends(extract_api_key)
                 )
             else:
                 streamer = HuggingfaceStreamer(model=item.model)
+                options = {k:v for k,v in item.options.items() if v is not None}
                 stream_response = streamer.chat_response(
                     prompt=item.prompt,
+                    **options,
+                    # temperature=item.temperature,
+                    # top_p=item.top_p,
+                    # max_new_tokens=item.max_tokens,
+                    # api_key=api_key,
+                    # use_cache=item.use_cache,
                     # temperature=item.options.get('temperature', 0.6),
                     # top_p=item.options.get('top_p', 0.95),
                     # max_new_tokens=item.options.get('max_new_tokens', -1),