""" services/declaration_service.py 为每个目录(章节)生成一个"声明"。 声明:一段说明该章节应写什么、结构与约束的撰写指引,存入 report_template_sections.section_prompt。 优先用 LLM(结合章节标题 + 该章节正文)生成;未配置或失败时 回退到确定性模板,保证流程稳定可用。 """ from __future__ import annotations import logging import re from concurrent.futures import ThreadPoolExecutor, as_completed from config import settings from services.llm_client import chat_completions_json, llm_configured logger = logging.getLogger(__name__) _SYSTEM_PROMPT = ( "你是报告模板专家。任务:阅读给定章节的范文(参考正文),总结这一章应该怎么写," "作为后续报告撰写该章节的写作指引。需提炼:①内容要点(写哪些事项);" "②组织结构(应有的小节/条目顺序);③数据与口径要求(需引用的对比/指标/表格等);" "④写作约束(先事实后评价、缺失写「待补充」、不得编造)。" "严格要求:不要输出任何思考过程或解释;只输出 JSON 对象 {\"guide\": \"...\"};" "guide 为 300 字以内的写作指引纯文本(不含 markdown 标题);" "范文缺失或过短时,按章节标题给出通用写作指引。" ) def _strip_number_prefix(title: str) -> str: t = str(title or "").strip() t = re.sub(r"^(?:\d+(?:\.\d+)*|[一二三四五六七八九十]+[、..])\s*", "", t).strip() return t def _fallback_declaration(section_title: str) -> str: label = _strip_number_prefix(section_title) or "本章节" return ( f"本章节为「{label}」。撰写时应紧扣标题主题,先陈述事实与数据,再给出分析与评价;" f"结构需与标题保持一致,条理清晰、用语规范;" f"所有结论须有依据,缺失信息写「待补充」,禁止编造。" ) def _build_user_prompt(section_title: str, content: str) -> str: body = (content or "").strip() if len(body) > 2500: body = body[:2500] body_block = f"\n\n该章节范文(参考正文,节选):\n```\n{body}\n```" if body else "" return ( f"章节标题:{section_title}{body_block}\n\n" f"请根据上述范文,总结该章节应该怎么写,并只返回 JSON:{{\"guide\": \"300字以内的写作指引\"}}。" ) def generate_declaration(section_title: str, content: str = "") -> str: """根据范文为单个章节生成"怎么写"的写作指引(JSON 取 guide,自动剔除思考过程)。""" use_llm = bool(getattr(settings, "DECLARATION_USE_LLM", True)) and llm_configured() if not use_llm: return _fallback_declaration(section_title) try: data = chat_completions_json( system_prompt=_SYSTEM_PROMPT, user_prompt=_build_user_prompt(section_title, content), temperature=0.2, max_tokens=2048, ) guide = str((data or {}).get("guide") or "").strip() if guide: return guide except Exception as e: # noqa: BLE001 - 兜底,保证主流程不被 LLM 影响 logger.warning("生成章节声明失败,使用兜底模板 | title=%s | err=%s", section_title, e) return _fallback_declaration(section_title) def _content_for_section(s: dict, content_by_key: dict[str, str]) -> str: """目录键可能是 canonical 形式,优先用标题中的编号前缀去匹配正文。""" title = str(s.get("sectionTitle") or "") m = re.match(r"^(\d+(?:\.\d+)*)", title.strip()) num = m.group(1) if m else "" return content_by_key.get(num, "") or content_by_key.get(str(s.get("sectionKey") or ""), "") def generate_declarations(sections: list[dict], content_by_key: dict[str, str] | None = None) -> list[str]: """ 为目录中每个章节并发生成"怎么写"的写作指引(基于范文)。 sections: [{sectionKey, sectionTitle}, ...] content_by_key: 章节编号/键 -> 范文正文,用于为指引提供上下文(可选)。 每章一次 LLM 调用,多线程并发以打满 GPU(LLM 为网络 I/O,线程下真正并行)。 """ content_by_key = content_by_key or {} tasks = [(str(s.get("sectionTitle") or ""), _content_for_section(s, content_by_key)) for s in sections] if not tasks: return [] use_llm = bool(getattr(settings, "DECLARATION_USE_LLM", True)) and llm_configured() if not use_llm: return [_fallback_declaration(title) for title, _ in tasks] max_workers = max(int(getattr(settings, "TEMPLATE_UPLOAD_LLM_MAX_WORKERS", 8) or 8), 1) results: list[str] = [""] * len(tasks) if len(tasks) == 1: results[0] = generate_declaration(*tasks[0]) return results workers = min(max_workers, len(tasks)) with ThreadPoolExecutor(max_workers=workers) as executor: future_to_idx = { executor.submit(generate_declaration, title, content): i for i, (title, content) in enumerate(tasks) } for fut in as_completed(future_to_idx): idx = future_to_idx[fut] try: results[idx] = fut.result() except Exception as e: # noqa: BLE001 logger.warning("章节声明并发生成失败,使用兜底 | idx=%s | err=%s", idx, e) results[idx] = _fallback_declaration(tasks[idx][0]) logger.info("章节声明生成 | 章节=%s | 线程=%s", len(tasks), workers) return results