File size: 10,961 Bytes
0fd441a
 
 
 
 
 
 
 
962ef72
0fd441a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8df8835
 
962ef72
8df8835
962ef72
0fd441a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
from __future__ import annotations

from typing import Iterable, Literal, Optional
import os
import time
import traceback
from huggingface_hub import InferenceClient, login, logout as hf_logout

from llm.llm_login import login_huggingface, is_loggedin_huggingface   #,is_login_huggingface

from utils.logger import get_logger

## Get logger instance
logger = get_logger(__name__)


class HFChatClient:
    """
    Provider‐agnostic LLM client interface.
    Encapsulate `huggingface_hub.InferenceClient` setup and chat calls.

    Backends:
    - model: plain HF model id (e.g., "HuggingFaceH4/zephyr-7b-beta")
    - provider: provider-routed id (e.g., "openai/gpt-oss-120b:fireworks-ai")
    - endpoint: full inference endpoint URL (e.g., "http://localhost:1234").
    """

    def __init__(self,
        #api_token: str,
        #model_id: str = "gpt2",
        provider: str = "huggingface",  ## "huggingface2", "openai"
        model_id: str = "openai/gpt-oss-120b", ##default_model
        hf_provider: str = "huggingface",
        endpoint_url: Optional[str] = None,
        #backend: Literal["model", "provider", "endpoint"] = [],
        backend_choice: Optional[str] = None,  #choices=["model-id", "provider", "endpoint"]
        system_message: str = "",
        max_tokens: int = 4096,
        temperature: float = 0.0,
        top_p: float = 0.1,
        stream: bool = False,
        api_token: Optional[str] = None
        ) -> None:
        
        try:
            self.model_id = model_id
            self.provider = provider.lower()
            self.hf_provider = hf_provider.lower()
            self.endpoint_url = endpoint_url
            #self.backend = backend
            #self.backend_literal: Literal["model", "provider", "endpoint"] = (
            '''
            self.backend: Literal["model", "provider", "endpoint"] = (
                "model" if backend_choice == "Hugging Face Model ID" else (
                    "provider" if backend_choice == "HF Provider Route" else "endpoint")
                ),
            '''
            self.backend: Literal["model", "provider", "endpoint"] = (
                "model" if backend_choice == "model-id" else (
                    "provider" if backend_choice == "provider" else "endpoint")
                )  ## see Gradio backend_choice dropdown
            self.system_message = system_message
            self.max_tokens = max_tokens
            self.temperature = temperature
            self.top_p = top_p
            self.stream = stream
            self.token = api_token if api_token else None   #""  # invalid; preserved
            #self.token = token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")  ## not preferred

            self.base_url = "https://router.huggingface.co/v1"  #%22"  #HF API proxy
        except Exception as exc:
            #logger.error(f"client_init_failed", extra={"error": str(exc)}")
            tb = traceback.format_exc()
            logger.exception(f'✗ client_init_failed", extra={"error": str(exc)}\n{tb}', exc_info=True)
            raise RuntimeError(f"✗ Failed to initialise client: {exc}\n{tb}")

        ##SMY: //TOBE: Deprecated : Moved to llm.llm_login
        '''
        # # Disable implicit token propagation for determinism
        # Explicitly disable implicit token propagation; we rely on explicit auth or env var
        os.environ["HF_HUB_DISABLE_IMPLICIT_TOKEN"] = "1"
        
        # Privacy-first login: try interactive CLI first; fallback to provided/env token only if needed
        try:
            login()
            time.sleep(15)  ##SMY pause for login. Helpful: pool async opex 
            logger.info("hf_login", extra={"mode": "cli"})
        except Exception as exc:
            # Respect common env var names; prefer explicit token arg when provided
            fallback_token = self.token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
            if fallback_token:
                try:
                    login(token=fallback_token)
                    self.token = fallback_token
                    logger.info("hf_login", extra={"mode": "token"})
                except Exception as exc_token:
                    logger.warning("hf_login_failed", extra={"error": str(exc_token)})
            else:
                logger.warning("hf_login_failed", extra={"error": str(exc)})
                # Silent fallback; client will still work if token is passed directly
                #pass
        '''
        
        login_huggingface(self.token) if not is_loggedin_huggingface() else logger.log(level=20, msg=f"You are logged in to HF Hub already") ## attempt login if not already logged in. NB: HF CLI login prompt would not display in Process Worker.
        ##SMY: TODO: Mapped with openai_client.py
        #self.islogged_in = is_loggedin_huggingface()

    @staticmethod
    def _normalise_history(history: list, system_message: str, latest_user_message: str) -> list[dict]:
        """ 
        `prompt` prefixed by system_message if set
        Normalise chat history to list of {"role": role, "content": content} dicts.
        Supports both dict and tuple formats for history items.
        """
        messages: list[dict] = []
        if system_message:
            messages.append({"role": "system", "content": system_message})
        for item in history or []:
            if isinstance(item, dict) and "role" in item and "content" in item:
                if item["role"] in ("user", "assistant"):
                    messages.append({"role": item["role"], "content": item["content"]})
            elif isinstance(item, (list, tuple)) and len(item) == 2:
                usr, asst = item
                if usr:
                    messages.append({"role": "user", "content": usr})
                if asst:
                    messages.append({"role": "assistant", "content": asst})
        messages.append({"role": "user", "content": latest_user_message})
        return messages

    @staticmethod
    def _initialise_client(self,
        backend: Literal["model", "provider", "endpoint"], 
        model_id: Optional[str] = None, 
        hf_provider: Optional[str] = None, 
        endpoint_url: Optional[str] = None, 
        token: Optional[str] = None) -> InferenceClient:

        try:
            match backend:
                case "endpoint" | "model":
                    logger.debug("_initialise_client: initialising with:", extra={"model":model_id})  ## debug
                    hf_client = InferenceClient(model=model_id or endpoint_url, token=token)   #endpoint=target)   ##, token=api_token or self.token)
                    logger.log(20, "client: ", extra={"model":model_id})  ## debug
                case "provider":
                    logger.info("_initialise_client: initialising with:", extra={"provider":hf_provider})  ## debug
                    hf_client = InferenceClient(provider=hf_provider, model=model_id, token=token)  ##, token=api_token or self.token)
                    #client = client(model = model_id, provider=provider, token=token)   ##target
                    logger.log(20, "client: ", extra={"backend":backend})  ## debug
                case _:
                    raise ValueError("Invalid backend.")
            return hf_client
        except Exception as exc:
            logger.log(40, "_initialise_client: client_init_failed", extra={"error": str(exc)})  ## debug
            raise RuntimeError(f"_initialise_client: Failed to initialise client: {exc}")

    ## wrap HF client for marker
    def chat_fn(
        self,
        message: str,
        history: list = [],
        ) -> Iterable[str]:
        """    
        messages = self._normalise_history(history, system_message, message)
        token = api_token or self.token
        """
        ## set prompt and token
        messages = self._normalise_history(message, history, self.system_message)
        #token = api_token or self.token
        #token = self.token  ## redundant

        logger.log(20,"chat: initialising client", extra={
            "backend": self.backend, "model": self.model_id, "provider": self.hf_provider, "endpoint": self.endpoint_url,
            "stream": self.stream, "max_tokens": self.max_tokens, "temperature": self.temperature, "top_p": self.top_p,
        })

        ## initialised client
        try:
            client = self._initialise_client(self, self.backend, self.model_id, self.hf_provider, self.endpoint_url, self.token)  #api_token)
            logger.log(20, "chat: client initialised")  ## debug
        except Exception as exc:
            ##logger.error
            logger.log(40,"chat client_init_failed", extra={"error": str(exc)})
            raise RuntimeError(f"chat: Failed to initialise client: {exc}")
        
        logger.log(20, "chat_start", extra={
                "backend": self.backend, "model": self.model_id, "provider": self.hf_provider, "endpoint": self.endpoint_url,
            "stream": self.stream, "max_tokens": self.max_tokens, "temperature": self.temperature, "top_p": self.top_p,
                })
        
        if self.stream:
            acc = ""
            for chunk in client.chat_completion(
                messages=messages,
                #model=client.model,  ## moved back to client initialise
                max_tokens=self.max_tokens,
                stream=True,
                temperature=self.temperature,
                top_p=self.top_p,
            ):
                delta = getattr(chunk.choices[0].delta, "content", None) or ""
                if delta:
                    acc += delta
                    yield acc
            return

        result = client.chat_completion(
            messages=messages,
            #model=client.model,  ## moved back to client initialised
            max_tokens=self.max_tokens,
            stream=False,
            temperature=self.temperature,
            top_p=self.top_p,
            )
        yield result.choices[0].message.content

        '''
        ## future consideration
        response = client.text_generation(
            #model=model_name,
            inputs=prompt,
            parameters={
                "max_new_tokens": max_new_tokens,
                "temperature": temperature,
            },
        )
        return response[0].generated_text
        '''

    def logout(self) -> bool:
        """Logout from Hugging Face and clear in-process tokens.

        Returns True on success, False otherwise.
        """
        try:
            hf_logout()
        except Exception as exc:
            logger.error("hf_logout_failed", extra={"error": str(exc)})
            return False
        # Clear process environment tokens
        for key in ("HF_TOKEN", "HUGGINGFACEHUB_API_TOKEN"):
            if key in os.environ:
                os.environ.pop(key, None)
        self.token = None
        logger.info("hf_logout_success")
        return True