193 lines
5.7 KiB
Python
193 lines
5.7 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import Any, Literal
|
|
from urllib.parse import urlparse, urlunparse
|
|
|
|
EndpointKind = Literal[
|
|
"chat_completions",
|
|
"responses",
|
|
"lmstudio_chat",
|
|
"completions",
|
|
"legacy_prompt",
|
|
]
|
|
|
|
|
|
def _ensure_scheme(url: str) -> str:
|
|
value = url.strip()
|
|
if not value:
|
|
return value
|
|
if value.startswith("http://") or value.startswith("https://"):
|
|
return value
|
|
return f"http://{value}"
|
|
|
|
|
|
def normalize_endpoint_url(raw_url: str, default_path: str) -> str:
|
|
value = _ensure_scheme(raw_url)
|
|
if not value:
|
|
return value
|
|
parsed = urlparse(value)
|
|
path = parsed.path or ""
|
|
if path in {"", "/"}:
|
|
path = default_path
|
|
elif not path.startswith("/"):
|
|
path = f"/{path}"
|
|
return urlunparse(parsed._replace(path=path))
|
|
|
|
|
|
def detect_text_endpoint_kind(url: str) -> EndpointKind:
|
|
path = urlparse(url).path.lower()
|
|
if path.endswith("/v1/chat/completions"):
|
|
return "chat_completions"
|
|
if path.endswith("/v1/responses"):
|
|
return "responses"
|
|
if path.endswith("/api/v1/chat"):
|
|
return "lmstudio_chat"
|
|
if path.endswith("/v1/completions"):
|
|
return "completions"
|
|
return "legacy_prompt"
|
|
|
|
|
|
def build_text_payload(
|
|
prompt: str,
|
|
model: str,
|
|
endpoint_kind: EndpointKind,
|
|
temperature: float | None = None,
|
|
max_tokens: int | None = None,
|
|
) -> dict[str, Any]:
|
|
if endpoint_kind in {"chat_completions", "lmstudio_chat"}:
|
|
payload: dict[str, Any] = {
|
|
"model": model,
|
|
"messages": [{"role": "user", "content": prompt}],
|
|
"stream": False,
|
|
}
|
|
if temperature is not None:
|
|
payload["temperature"] = temperature
|
|
if max_tokens is not None:
|
|
payload["max_tokens"] = max_tokens
|
|
return payload
|
|
|
|
if endpoint_kind == "responses":
|
|
payload = {"model": model, "input": prompt}
|
|
if temperature is not None:
|
|
payload["temperature"] = temperature
|
|
if max_tokens is not None:
|
|
payload["max_output_tokens"] = max_tokens
|
|
return payload
|
|
|
|
if endpoint_kind == "completions":
|
|
payload = {
|
|
"model": model,
|
|
"prompt": prompt,
|
|
"stream": False,
|
|
}
|
|
if temperature is not None:
|
|
payload["temperature"] = temperature
|
|
if max_tokens is not None:
|
|
payload["max_tokens"] = max_tokens
|
|
return payload
|
|
|
|
payload = {"prompt": prompt}
|
|
if model:
|
|
payload["model"] = model
|
|
if temperature is not None:
|
|
payload["temperature"] = temperature
|
|
if max_tokens is not None:
|
|
payload["max_tokens"] = max_tokens
|
|
return payload
|
|
|
|
|
|
def _content_to_text(content: Any) -> str:
|
|
if isinstance(content, str):
|
|
return content.strip()
|
|
if isinstance(content, list):
|
|
parts: list[str] = []
|
|
for item in content:
|
|
if isinstance(item, str):
|
|
parts.append(item)
|
|
continue
|
|
if isinstance(item, dict):
|
|
text = item.get("text")
|
|
if isinstance(text, str):
|
|
parts.append(text)
|
|
return "\n".join(part.strip() for part in parts if part.strip()).strip()
|
|
return ""
|
|
|
|
|
|
def extract_text_response(data: Any) -> str:
|
|
if isinstance(data, str):
|
|
return data.strip()
|
|
if not isinstance(data, dict):
|
|
return ""
|
|
|
|
response = data.get("response")
|
|
if isinstance(response, str) and response.strip():
|
|
return response.strip()
|
|
|
|
output_text = data.get("output_text")
|
|
if isinstance(output_text, str) and output_text.strip():
|
|
return output_text.strip()
|
|
|
|
message = data.get("message")
|
|
if isinstance(message, dict):
|
|
content = _content_to_text(message.get("content"))
|
|
if content:
|
|
return content
|
|
|
|
choices = data.get("choices")
|
|
if isinstance(choices, list) and choices:
|
|
first = choices[0]
|
|
if isinstance(first, dict):
|
|
text = first.get("text")
|
|
if isinstance(text, str) and text.strip():
|
|
return text.strip()
|
|
msg = first.get("message")
|
|
if isinstance(msg, dict):
|
|
content = _content_to_text(msg.get("content"))
|
|
if content:
|
|
return content
|
|
delta = first.get("delta")
|
|
if isinstance(delta, dict):
|
|
content = _content_to_text(delta.get("content"))
|
|
if content:
|
|
return content
|
|
|
|
output = data.get("output")
|
|
if isinstance(output, list):
|
|
parts: list[str] = []
|
|
for item in output:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
content_list = item.get("content")
|
|
if not isinstance(content_list, list):
|
|
continue
|
|
for content_item in content_list:
|
|
if not isinstance(content_item, dict):
|
|
continue
|
|
text = content_item.get("text")
|
|
if isinstance(text, str) and text.strip():
|
|
parts.append(text.strip())
|
|
if parts:
|
|
return "\n".join(parts)
|
|
|
|
return ""
|
|
|
|
|
|
def normalize_embedding_url(raw_url: str) -> str:
|
|
return normalize_endpoint_url(raw_url, "/v1/embeddings")
|
|
|
|
|
|
def extract_embedding_response(data: Any) -> list[float]:
|
|
if not isinstance(data, dict):
|
|
return []
|
|
embedding = data.get("embedding")
|
|
if isinstance(embedding, list):
|
|
return [float(value) for value in embedding]
|
|
entries = data.get("data")
|
|
if isinstance(entries, list) and entries:
|
|
first = entries[0]
|
|
if isinstance(first, dict):
|
|
values = first.get("embedding")
|
|
if isinstance(values, list):
|
|
return [float(value) for value in values]
|
|
return []
|