llm_types.py
python
| 1 | """Typed structures for OpenAI-format chat messages and API boundaries. |
| 2 | |
| 3 | Every shape used by ``LLMClient`` is defined here as a typed TypedDict so |
| 4 | mypy can verify all field access statically. No ``Any`` lives in this file. |
| 5 | |
| 6 | Organisation: |
| 7 | Chat messages → ``SystemMessage``, ``UserMessage``, |
| 8 | ``AssistantMessage``, ``ToolResultMessage``, |
| 9 | ``ChatMessage`` (union) |
| 10 | Tool schemas → ``OpenAIPropertyDef``, ``ToolParametersDict``, |
| 11 | ``ToolFunctionDict``, ``ToolSchemaDict``, |
| 12 | ``OpenAIToolChoiceDict``, ``ToolCallFunction``, |
| 13 | ``ToolCallEntry`` |
| 14 | Token usage → ``PromptTokenDetails``, ``UsageStats`` |
| 15 | Request payload → ``ProviderConfig``, ``ReasoningConfig``, |
| 16 | ``OpenAIRequestPayload`` |
| 17 | Non-streaming response → ``ResponseFunction``, ``ResponseToolCall``, |
| 18 | ``ResponseMessage``, ``ResponseChoice``, |
| 19 | ``OpenAIResponse`` |
| 20 | Streaming chunks → ``ReasoningDetail``, ``ToolCallFunctionDelta``, |
| 21 | ``ToolCallDelta``, ``StreamDelta``, |
| 22 | ``StreamChoice``, ``OpenAIStreamChunk`` |
| 23 | Stream events → ``ReasoningDeltaEvent``, ``ContentDeltaEvent``, |
| 24 | ``DoneStreamEvent``, ``StreamEvent`` (union) |
| 25 | """ |
| 26 | from __future__ import annotations |
| 27 | |
| 28 | from typing import Literal, Union |
| 29 | |
| 30 | from typing_extensions import NotRequired, Required, TypedDict |
| 31 | |
| 32 | from musehub.contracts.json_types import JSONValue |
| 33 | |
| 34 | |
| 35 | # ── Chat message shapes ──────────────────────────────────────────────────────── |
| 36 | |
| 37 | |
| 38 | class ToolCallFunction(TypedDict): |
| 39 | """The ``function`` field inside an OpenAI tool call. |
| 40 | |
| 41 | ``arguments`` is a JSON-encoded string — callers must ``json.loads`` it. |
| 42 | """ |
| 43 | |
| 44 | name: str |
| 45 | arguments: str |
| 46 | |
| 47 | |
| 48 | class ToolCallEntry(TypedDict): |
| 49 | """One tool call in an assistant message (streaming accumulator or response).""" |
| 50 | |
| 51 | id: str |
| 52 | type: str |
| 53 | function: ToolCallFunction |
| 54 | |
| 55 | |
| 56 | class SystemMessage(TypedDict): |
| 57 | """A system-role prompt message.""" |
| 58 | |
| 59 | role: Literal["system"] |
| 60 | content: str |
| 61 | |
| 62 | |
| 63 | class UserMessage(TypedDict): |
| 64 | """A user-role message.""" |
| 65 | |
| 66 | role: Literal["user"] |
| 67 | content: str |
| 68 | |
| 69 | |
| 70 | class AssistantMessage(TypedDict, total=False): |
| 71 | """An assistant reply — may be text-only or contain tool calls.""" |
| 72 | |
| 73 | role: Required[Literal["assistant"]] |
| 74 | content: str | None |
| 75 | tool_calls: list[ToolCallEntry] |
| 76 | |
| 77 | |
| 78 | class ToolResultMessage(TypedDict): |
| 79 | """A tool result message returned to the LLM after a tool call.""" |
| 80 | |
| 81 | role: Literal["tool"] |
| 82 | tool_call_id: str |
| 83 | content: str |
| 84 | |
| 85 | |
| 86 | ChatMessage = Union[SystemMessage, UserMessage, AssistantMessage, ToolResultMessage] |
| 87 | """Discriminated union of all OpenAI chat message shapes.""" |
| 88 | |
| 89 | |
| 90 | # ── Tool schema shapes (OpenAI function-calling format) ─────────────────────── |
| 91 | |
| 92 | |
| 93 | class OpenAIPropertyDef(TypedDict, total=False): |
| 94 | """JSON Schema definition for a single OpenAI function parameter. |
| 95 | |
| 96 | Covers the subset of JSON Schema used in Muse tool definitions. |
| 97 | All constraint fields are optional. |
| 98 | """ |
| 99 | |
| 100 | type: Required[str] # "string", "number", "integer", "boolean", "array", "object" |
| 101 | description: str |
| 102 | enum: list[str] |
| 103 | minimum: float |
| 104 | maximum: float |
| 105 | default: JSONValue |
| 106 | items: dict[str, JSONValue] # array item schema (simplified) |
| 107 | properties: dict[str, "OpenAIPropertyDef"] # nested object schema |
| 108 | |
| 109 | |
| 110 | class ToolParametersDict(TypedDict, total=False): |
| 111 | """JSON Schema ``parameters`` block inside an OpenAI tool definition.""" |
| 112 | |
| 113 | type: str |
| 114 | properties: dict[str, OpenAIPropertyDef] |
| 115 | required: list[str] |
| 116 | |
| 117 | |
| 118 | class OpenAIToolChoiceDict(TypedDict): |
| 119 | """Structured ``tool_choice`` when forcing a specific tool call. |
| 120 | |
| 121 | The OpenAI API accepts either the string ``"auto"`` / ``"none"`` / |
| 122 | ``"required"`` or this dict to force a specific function. |
| 123 | """ |
| 124 | |
| 125 | type: str # always "function" when forcing a specific tool |
| 126 | function: dict[str, str] # {"name": "<tool_name>"} |
| 127 | |
| 128 | |
| 129 | class ToolFunctionDict(TypedDict): |
| 130 | """The ``function`` field of an OpenAI tool definition.""" |
| 131 | |
| 132 | name: str |
| 133 | description: str |
| 134 | parameters: NotRequired[ToolParametersDict] |
| 135 | |
| 136 | |
| 137 | class ToolSchemaDict(TypedDict): |
| 138 | """A single OpenAI-format tool definition (``{type: function, function: {...}}``).""" |
| 139 | |
| 140 | type: str |
| 141 | function: ToolFunctionDict |
| 142 | |
| 143 | |
| 144 | class CacheControlDict(TypedDict): |
| 145 | """Anthropic prompt-caching marker added to the last tool definition.""" |
| 146 | |
| 147 | type: str # always "ephemeral" |
| 148 | |
| 149 | |
| 150 | class CachedToolSchemaDict(TypedDict, total=False): |
| 151 | """A tool definition with optional Anthropic prompt-caching annotation. |
| 152 | |
| 153 | Identical to ``ToolSchemaDict`` plus an optional ``cache_control`` field. |
| 154 | The ``llm_client`` adds this field to the last tool in the list before |
| 155 | sending to OpenRouter/Anthropic when prompt-caching is enabled. |
| 156 | """ |
| 157 | |
| 158 | type: Required[str] |
| 159 | function: Required[ToolFunctionDict] |
| 160 | cache_control: CacheControlDict |
| 161 | |
| 162 | |
| 163 | # ── Token usage shapes ──────────────────────────────────────────────────────── |
| 164 | |
| 165 | |
| 166 | class PromptTokenDetails(TypedDict, total=False): |
| 167 | """Nested token-detail block inside ``UsageStats``. |
| 168 | |
| 169 | OpenRouter surfaces cache data in at least two field names depending on |
| 170 | model and API version — both are included here. |
| 171 | """ |
| 172 | |
| 173 | cached_tokens: int # cache read hits (OR standard) |
| 174 | cache_write_tokens: int # cache write/creation (OR standard) |
| 175 | |
| 176 | |
| 177 | class UsageStats(TypedDict, total=False): |
| 178 | """Token usage and cost stats returned by OpenAI/Anthropic/OpenRouter. |
| 179 | |
| 180 | All fields are optional because the exact set varies by model and API |
| 181 | version. ``_extract_cache_stats`` normalises all known field names. |
| 182 | """ |
| 183 | |
| 184 | prompt_tokens: int |
| 185 | completion_tokens: int |
| 186 | total_tokens: int |
| 187 | prompt_tokens_details: PromptTokenDetails |
| 188 | # OpenRouter / Anthropic direct cache fields |
| 189 | native_tokens_cached: int |
| 190 | cache_read_input_tokens: int |
| 191 | prompt_cache_hit_tokens: int |
| 192 | cache_creation_input_tokens: int |
| 193 | prompt_cache_miss_tokens: int |
| 194 | cache_discount: float |
| 195 | |
| 196 | |
| 197 | # ── Request payload shapes ──────────────────────────────────────────────────── |
| 198 | |
| 199 | |
| 200 | class ProviderConfig(TypedDict, total=False): |
| 201 | """OpenRouter provider-routing config sent in ``payload["provider"]``. |
| 202 | |
| 203 | Used to lock generation to a specific provider (e.g. direct Anthropic) |
| 204 | for reliable prompt caching and reasoning token support. |
| 205 | """ |
| 206 | |
| 207 | order: list[str] |
| 208 | allow_fallbacks: bool |
| 209 | |
| 210 | |
| 211 | class ReasoningConfig(TypedDict, total=False): |
| 212 | """OpenRouter extended-reasoning config sent in ``payload["reasoning"]``.""" |
| 213 | |
| 214 | max_tokens: int |
| 215 | |
| 216 | |
| 217 | class OpenAIRequestPayload(TypedDict, total=False): |
| 218 | """Full request body sent to OpenRouter's chat completions endpoint. |
| 219 | |
| 220 | ``tools`` is ``list[ToolSchemaDict]`` for base tool definitions. When |
| 221 | prompt-caching is active, the LLM client adds a ``cache_control`` key to |
| 222 | the last entry — handled by widening to ``dict[str, JSONValue]`` at that |
| 223 | insertion point only. |
| 224 | """ |
| 225 | |
| 226 | model: Required[str] |
| 227 | messages: Required[list[ChatMessage]] |
| 228 | temperature: float |
| 229 | max_tokens: int |
| 230 | stream: bool |
| 231 | tools: list[CachedToolSchemaDict] |
| 232 | tool_choice: str | OpenAIToolChoiceDict |
| 233 | provider: ProviderConfig |
| 234 | reasoning: ReasoningConfig |
| 235 | |
| 236 | |
| 237 | # ── Non-streaming response shapes ───────────────────────────────────────────── |
| 238 | |
| 239 | |
| 240 | class ResponseFunction(TypedDict, total=False): |
| 241 | """The ``function`` field of a tool call in a non-streaming response.""" |
| 242 | |
| 243 | name: str |
| 244 | arguments: str |
| 245 | |
| 246 | |
| 247 | class ResponseToolCall(TypedDict, total=False): |
| 248 | """One tool call in a non-streaming assistant response choice.""" |
| 249 | |
| 250 | id: str |
| 251 | type: str |
| 252 | function: ResponseFunction |
| 253 | |
| 254 | |
| 255 | class ResponseMessage(TypedDict, total=False): |
| 256 | """The ``message`` field inside a non-streaming response choice.""" |
| 257 | |
| 258 | content: str | None |
| 259 | tool_calls: list[ResponseToolCall] |
| 260 | |
| 261 | |
| 262 | class ResponseChoice(TypedDict, total=False): |
| 263 | """One choice in a non-streaming API response.""" |
| 264 | |
| 265 | message: ResponseMessage |
| 266 | finish_reason: str | None |
| 267 | |
| 268 | |
| 269 | class OpenAIResponse(TypedDict, total=False): |
| 270 | """Full (non-streaming) response body from an OpenAI-compatible API.""" |
| 271 | |
| 272 | choices: list[ResponseChoice] |
| 273 | usage: UsageStats |
| 274 | |
| 275 | |
| 276 | # ── Streaming chunk shapes ──────────────────────────────────────────────────── |
| 277 | |
| 278 | |
| 279 | class ReasoningDetail(TypedDict, total=False): |
| 280 | """One element of ``delta.reasoning_details`` in a stream chunk. |
| 281 | |
| 282 | OpenRouter uses ``type="reasoning.text"`` for incremental text and |
| 283 | ``type="reasoning.summary"`` for the final consolidated summary. |
| 284 | """ |
| 285 | |
| 286 | type: str |
| 287 | text: str |
| 288 | summary: str |
| 289 | |
| 290 | |
| 291 | class ToolCallFunctionDelta(TypedDict, total=False): |
| 292 | """Incremental function info in a streaming tool call delta.""" |
| 293 | |
| 294 | name: str |
| 295 | arguments: str |
| 296 | |
| 297 | |
| 298 | class ToolCallDelta(TypedDict, total=False): |
| 299 | """One tool call fragment in a streaming delta.""" |
| 300 | |
| 301 | index: int |
| 302 | id: str |
| 303 | type: str |
| 304 | function: ToolCallFunctionDelta |
| 305 | |
| 306 | |
| 307 | class StreamDelta(TypedDict, total=False): |
| 308 | """The ``delta`` field inside a streaming choice.""" |
| 309 | |
| 310 | reasoning_details: list[ReasoningDetail] |
| 311 | content: str |
| 312 | tool_calls: list[ToolCallDelta] |
| 313 | |
| 314 | |
| 315 | class StreamChoice(TypedDict, total=False): |
| 316 | """One choice in a streaming SSE chunk.""" |
| 317 | |
| 318 | delta: StreamDelta |
| 319 | finish_reason: str | None |
| 320 | |
| 321 | |
| 322 | class OpenAIStreamChunk(TypedDict, total=False): |
| 323 | """One SSE data chunk from the OpenRouter streaming API.""" |
| 324 | |
| 325 | choices: list[StreamChoice] |
| 326 | usage: UsageStats |
| 327 | |
| 328 | |
| 329 | # ── Stream event shapes (yielded by LLMClient.chat_completion_stream) ───────── |
| 330 | |
| 331 | |
| 332 | class ReasoningDeltaEvent(TypedDict): |
| 333 | """Incremental reasoning text from an extended-thinking model.""" |
| 334 | |
| 335 | type: Literal["reasoning_delta"] |
| 336 | text: str |
| 337 | |
| 338 | |
| 339 | class ContentDeltaEvent(TypedDict): |
| 340 | """Incremental content text from the model.""" |
| 341 | |
| 342 | type: Literal["content_delta"] |
| 343 | text: str |
| 344 | |
| 345 | |
| 346 | class DoneStreamEvent(TypedDict): |
| 347 | """Terminal event yielded when streaming completes. |
| 348 | |
| 349 | ``tool_calls`` holds the fully-accumulated list of tool calls built up |
| 350 | from the streaming deltas — consumers should not read ``ToolCallEntry`` |
| 351 | fields before this event arrives. |
| 352 | """ |
| 353 | |
| 354 | type: Literal["done"] |
| 355 | content: str | None |
| 356 | tool_calls: list[ToolCallEntry] |
| 357 | finish_reason: str | None |
| 358 | usage: UsageStats |
| 359 | |
| 360 | |
| 361 | StreamEvent = Union[ReasoningDeltaEvent, ContentDeltaEvent, DoneStreamEvent] |
| 362 | """Discriminated union of all events yielded by ``LLMClient.chat_completion_stream``.""" |
| 363 | |
| 364 | # Kept as a type alias: either a string shorthand ("auto", "none", "required") |
| 365 | # or an explicit tool-selector dict. The dict form is rarely used but |
| 366 | # specified by the OpenAI API. |
| 367 | OpenAIToolChoice = str | OpenAIToolChoiceDict |
| 368 | """Either a string shorthand (``"auto"``, ``"none"``, ``"required"``) or an |
| 369 | explicit tool-selector dict forcing a specific function call.""" |