""" services/llm_client.py 极简 OpenAI 兼容 Chat Completions 客户端(仅用于生成章节声明,可选)。 """ from __future__ import annotations import json import logging import re import requests from config import settings logger = logging.getLogger(__name__) def llm_configured() -> bool: return bool( str(settings.LLM_API_BASE or "").strip() and str(settings.LLM_API_KEY or "").strip() and str(settings.LLM_MODEL_NAME or "").strip() ) _THINK_BLOCK_RE = re.compile(r".*?", re.DOTALL | re.IGNORECASE) def _strip_reasoning(text: str) -> str: """去掉思考模型的思维链:成对 ,以及截断/前导的残留标签。""" s = text or "" s = _THINK_BLOCK_RE.sub("", s) # 仅剩结束标签时,说明前面是未配对的思考段,取最后一个 之后的正文 if "" in s: s = s.rsplit("", 1)[-1] s = re.sub(r"", "", s, flags=re.IGNORECASE) return s.strip() def chat_completion_text( *, system_prompt: str, user_prompt: str, temperature: float = 0.2, max_tokens: int = 512, timeout_sec: int | None = None, ) -> str: """调用 LLM 返回纯文本。失败抛出异常,由调用方决定是否兜底。""" base = str(settings.LLM_API_BASE or "").strip().rstrip("/") url = f"{base}/chat/completions" headers = { "Authorization": f"Bearer {settings.LLM_API_KEY}", "Content-Type": "application/json", } payload = { "model": settings.LLM_MODEL_NAME, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ], "temperature": temperature, "max_tokens": max_tokens, } # 关闭思考模型的思维链(vLLM/Qwen3 等支持该扩展字段;不支持的服务会忽略) if bool(getattr(settings, "LLM_DISABLE_THINKING", False)): payload["chat_template_kwargs"] = {"enable_thinking": False} resp = requests.post( url, headers=headers, data=json.dumps(payload, ensure_ascii=False).encode("utf-8"), timeout=timeout_sec or int(settings.LLM_HTTP_TIMEOUT_SEC or 120), ) resp.raise_for_status() data = resp.json() return _strip_reasoning((data["choices"][0]["message"]["content"] or "").strip()) def _extract_json(text: str) -> dict: """从模型输出中解析 JSON object(容忍 ```json``` 代码块包裹)。""" s = (text or "").strip() if s.startswith("```"): s = re.sub(r"^```[a-zA-Z]*\s*", "", s) s = re.sub(r"\s*```$", "", s).strip() try: obj = json.loads(s) except json.JSONDecodeError: m = re.search(r"\{.*\}", s, flags=re.DOTALL) if not m: return {} try: obj = json.loads(m.group(0)) except json.JSONDecodeError: return {} return obj if isinstance(obj, dict) else {} def chat_completions_json( *, system_prompt: str, user_prompt: str, temperature: float = 0.1, max_tokens: int = 4096, timeout_sec: int | None = None, ) -> dict: """调用 LLM 并将返回解析为 JSON object(dict)。失败返回 {}。""" try: text = chat_completion_text( system_prompt=system_prompt, user_prompt=user_prompt, temperature=temperature, max_tokens=max_tokens, timeout_sec=timeout_sec, ) except Exception as e: # noqa: BLE001 logger.warning("chat_completions_json 调用失败: %s", e) return {} return _extract_json(text)