""" services/template_service.py 复刻自 eval_report:report_template_sections 数据的获取方式。 - DEFAULT_TEMPLATE_SECTIONS:系统默认后评价报告章节目录(key, title) - default_section_prompt / default_section_output_contract / default_section_examples: 按章节标题/编号取对应提示词、输出合同、示例 - build_default_template_catalog:默认目录 + 提示词/合同(供上传模版匹配) 说明:eval_report 会额外从《编制细则》与《模版》Word 文档抽取更细的提示词/示例; 本项目默认不含这两个 .doc 文件与 DocParser,故相关函数在缺文件时优雅降级, 回退到 SECTION_PROMPT_RULES / SECTION_EXAMPLE_RULES。 """ from __future__ import annotations import re import uuid from datetime import datetime from functools import lru_cache from pathlib import Path from sqlalchemy.orm import Session from database.models import ReportTemplate, ReportTemplateSection from prompts.report_generation.section_output_contracts import ( DEFAULT_SECTION_OUTPUT_CONTRACT, SECTION_OUTPUT_CONTRACTS, ) from prompts.report_generation.template_prompt_rules import ( DEFAULT_SECTION_PROMPT, SECTION_EXAMPLE_RULES, SECTION_PROMPT_RULES, ) SYSTEM_DEFAULT_TEMPLATE_NAME = "后评价默认模板" GUIDELINE_BASENAME = "炼油化工建设项目后评价报告编制细则(修订)" PROJECT_EXAMPLE_BASENAME = "模版" MAX_SECTION_EXAMPLE_CHARS = 12000 DEFAULT_TEMPLATE_SECTIONS: list[tuple[str, str]] = [ ("1", "1 项目概况"), ("1-1", "1.1 项目基本情况"), ("1-2", "1.2 项目决策要点"), ("1-3", "1.3 项目实施情况"), ("1-4", "1.4 项目运行情况"), ("2", "2 前期工作评价"), ("2-1", "2.1 项目要素评价"), ("2-1-1", "2.1.1 资源与原料评价"), ("2-1-2", "2.1.2 产品方案及市场评价"), ("2-1-2-1", "2.1.2.1 产品方案评价"), ("2-1-2-2", "2.1.2.2 产品市场评价"), ("2-1-3", "2.1.3 工艺方案评价"), ("2-1-3-1", "2.1.3.1 总加工方案评价"), ("2-1-3-2", "2.1.3.2 建设规模及工艺技术方案评价"), ("2-1-3-3", "2.1.3.3 主要设备方案评价"), ("2-1-4", "2.1.4 厂址选择及外部条件评价"), ("2-1-5", "2.1.5 总图及系统配套工程评价"), ("2-1-6", "2.1.6 主要技术指标评价"), ("2-1-7", "2.1.7 风险分析评价"), ("2-2", "2.2 工作程序评价"), ("2-2-1", "2.2.1 编制单位资质及选择方式评价"), ("2-2-2", "2.2.2 编制进度评价"), ("2-2-3", "2.2.3 与专项评价的结合情况"), ("2-2-4", "2.2.4 可行性研究报告的质量评价"), ("2-3", "2.3 前评估工作评价"), ("2-4", "2.4 初步设计评价"), ("2-4-1", "2.4.1 设计单位资质及选择方式评价"), ("2-4-2", "2.4.2 初步设计进度评价"), ("2-4-3", "2.4.3 初步设计质量评价"), ("2-4-4", "2.4.4 初步设计审查工作评价"), ("2-5", "2.5 前期决策程序评价"), ("2-6", "2.6 前期工作评价结论"), ("3", "3 建设实施评价"), ("3-1", "3.1 工程建设管理模式评价"), ("3-2", "3.2 招投标评价"), ("3-3", "3.3 施工图设计评价"), ("3-3-1", "3.3.1 与批复后初步设计符合性评价"), ("3-3-2", "3.3.2 设计进度评价"), ("3-3-3", "3.3.3 施工图设计水平及质量评价"), ("3-3-4", "3.3.4 施工图设计变更管理评价"), ("3-4", "3.4 工程承包商或施工单位评价"), ("3-4-1", "3.4.1 施工准备评价"), ("3-4-2", "3.4.2 施工计划的执行情况"), ("3-5", "3.5 采购工作评价"), ("3-6", "3.6 工程监理评价"), ("3-7", "3.7 工程质量评价"), ("3-8", "3.8 HSE管理评价"), ("3-9", "3.9 三查四定及中间交接"), ("3-10", "3.10 工程竣工验收评价"), ("3-11", "3.11 建设实施评价结论"), ("4", "4 生产运行评价"), ("4-1", "4.1 生产准备评价"), ("4-2", "4.2 联合试运与试生产情况评价"), ("4-3", "4.3 生产运行评价"), ("4-3-1", "4.3.1 原料供应评价"), ("4-3-2", "4.3.2 生产运行总体情况评价"), ("4-3-3", "4.3.3 达标评价"), ("4-3-4", "4.3.4 生产工艺技术评价"), ("4-3-5", "4.3.5 设备运行评价"), ("4-3-6", "4.3.6 公用工程及辅助设施合理性评价"), ("4-4", "4.4 生产运行评价结论"), ("5", "5 投资与经济效益评价"), ("5-1", "5.1 主要经济指标实现程度评价"), ("5-2", "5.2 投资和执行情况评价"), ("5-2-1", "5.2.1 投资控制及变动原因分析"), ("5-2-2", "5.2.2 投资水平分析"), ("5-2-3", "5.2.3 资金来源及到位评价"), ("5-2-4", "5.2.4 投资控制的经验和教训"), ("5-3", "5.3 经济效益分析"), ("5-3-1", "5.3.1 项目投产以来生产经营及效益状况"), ("5-3-2", "5.3.2 项目经济效益后评价"), ("5-4", "5.4 不确定性分析"), ("5-5", "5.5 投资与经济效益评价结论"), ("6", "6 影响与持续性评价"), ("6-1", "6.1 影响评价"), ("6-1-1", "6.1.1 环境影响评价"), ("6-1-2", "6.1.2 安全影响评价"), ("6-1-3", "6.1.3 科技进步影响"), ("6-1-4", "6.1.4 项目社会影响评价"), ("6-1-5", "6.1.5 项目影响评价结论"), ("6-2", "6.2 持续性评价"), ("6-2-1", "6.2.1 资源分析"), ("6-2-2", "6.2.2 产品分析"), ("6-2-3", "6.2.3 主要技术及经济指标对比"), ("6-2-4", "6.2.4 项目持续性评价结论"), ("7", "7 综合评价结论"), ("7-1", "7.1 综合评价结论"), ("7-1-1", "7.1.1 总体评价结论"), ("7-1-2", "7.1.2 成功度评价"), ("7-2", "7.2 主要经验"), ("7-3", "7.3 问题与建议"), ] def default_section_output_contract(section_title: str, section_key: str | None = None) -> str: section_no = _extract_number_prefix(section_title) or _section_key_to_number(section_key) if section_no and section_no in SECTION_OUTPUT_CONTRACTS: return SECTION_OUTPUT_CONTRACTS[section_no] return DEFAULT_SECTION_OUTPUT_CONTRACT def default_section_prompt(section_title: str, section_key: str | None = None) -> str: guideline_prompt = _guideline_prompt_for(section_title, section_key) if guideline_prompt: return guideline_prompt title = _normalize_section_identity(section_title) key = str(section_key or "").strip().lower() for pattern, prompt in SECTION_PROMPT_RULES: p = pattern.lower() if title.startswith(p): return prompt if p.isdigit() and (title.startswith(f"{p} ") or key.startswith(f"{p}-") or key == p): return prompt return DEFAULT_SECTION_PROMPT def build_default_template_catalog() -> list[dict[str, str]]: """系统默认模板章节目录及对应提示词、输出合同(供上传模版匹配)。""" out: list[dict[str, str]] = [] for key, title in DEFAULT_TEMPLATE_SECTIONS: out.append( { "sectionKey": key, "sectionTitle": title, "sectionNumber": _extract_number_prefix(title) or _section_key_to_number(key), "sectionPrompt": default_section_prompt(title, key), "sectionOutputContract": default_section_output_contract(title, key), } ) return out def default_section_examples(section_title: str, section_key: str | None = None) -> str: project_example = _project_example_for(section_title, section_key) if project_example: return project_example title = _normalize_section_identity(section_title) key = str(section_key or "").strip().lower() num = _extract_number_prefix(section_title) or _section_key_to_number(section_key) chapter_no = "" if num: chapter_no = num.split(".")[0] elif key: chapter_no = key.split("-")[0] for prefix, examples in SECTION_EXAMPLE_RULES: p = str(prefix).strip().lower() if chapter_no == p: return examples if title.startswith(f"{p} "): return examples if key.startswith(f"{p}-") or key == p: return examples return "" def _normalize_section_identity(value: str | None) -> str: text = str(value or "").strip().lower() text = text.replace(".", ".").replace("。", ".") text = re.sub(r"\s+", " ", text) return text def _section_key_to_number(section_key: str | None) -> str: key = str(section_key or "").strip() if not key: return "" if re.fullmatch(r"\d+(?:-\d+)*", key): return key.replace("-", ".") return "" def _extract_number_prefix(title: str) -> str: m = re.match(r"^\s*(\d+(?:\.\d+)*)\s*", str(title or "")) return m.group(1) if m else "" def _normalize_heading_key(value: str) -> str: s = str(value or "").strip().lower() s = s.replace(".", ".").replace("。", ".") s = re.sub(r"\s+", "", s) return s def _tuple_from_number(number_str: str) -> tuple[int, ...]: if not number_str: return tuple() parts = [] for p in number_str.split("."): if p.isdigit(): parts.append(int(p)) else: return tuple() return tuple(parts) def _read_doc_text(path: str) -> str: """读取 .doc/.docx 文本。本项目无 DocParser 时返回空串(优雅降级)。""" try: from function.documents.doc_parser import DocParser # type: ignore except Exception: return "" try: return DocParser(path).read() except Exception: return "" @lru_cache(maxsize=1) def _guideline_section_prompt_map() -> dict[str, str]: guideline_path = _resolve_guideline_path() if not guideline_path: return {} raw_text = _read_doc_text(guideline_path) if not raw_text: return {} return _build_guideline_prompt_map(raw_text) def _resolve_guideline_path() -> str | None: root = Path(__file__).resolve().parents[1] candidates = [ root / f"{GUIDELINE_BASENAME}.doc", root / f"{GUIDELINE_BASENAME}.docx", ] for p in candidates: if p.is_file(): return str(p) return None def _resolve_project_example_path() -> str | None: root = Path(__file__).resolve().parents[1] candidates = [ root / f"{PROJECT_EXAMPLE_BASENAME}.doc", root / f"{PROJECT_EXAMPLE_BASENAME}.docx", ] for p in candidates: if p.is_file(): return str(p) return None @lru_cache(maxsize=1) def _project_example_entries() -> list[tuple[str, str]]: path = _resolve_project_example_path() if not path: return [] raw_text = _read_doc_text(path) if not raw_text: return [] return _build_project_example_entries(raw_text) def _build_project_example_entries(text: str) -> list[tuple[str, str]]: lines = str(text or "").splitlines() headings: list[tuple[int, int, str]] = [] for idx, raw in enumerate(lines): line = str(raw or "").strip() m = re.match(r"^\s*(#{1,6})\s*(.+?)\s*$", line) if not m: continue level = len(m.group(1)) heading_title = m.group(2).strip() if not heading_title: continue headings.append((idx, level, heading_title)) out: list[tuple[str, str]] = [] for i, (start_idx, level, title) in enumerate(headings): end_idx = len(lines) for j in range(i + 1, len(headings)): next_idx, next_level, _ = headings[j] if next_level <= level: end_idx = next_idx break body = "\n".join(lines[start_idx + 1 : end_idx]).strip() body = re.sub(r"\n{3,}", "\n\n", body) if not body: continue out.append((title, body)) return out def _project_example_for(section_title: str, section_key: str | None = None) -> str: entries = _project_example_entries() if not entries: return "" target_title = _clean_section_title(section_title) target_key = _section_key_to_number(section_key) target_core = _core_title(target_title or target_key) if not target_core: return "" best_title = "" best_body = "" best_score = -1 for heading, body in entries: heading_clean = _clean_section_title(heading) heading_core = _core_title(heading_clean) score = _title_match_score(target_core, heading_core) if score > best_score: best_score = score best_title = heading_clean best_body = body if best_score < 4 or not best_body: return "" text = f"### {best_title}\n\n{best_body}".strip() if len(text) > MAX_SECTION_EXAMPLE_CHARS: text = text[:MAX_SECTION_EXAMPLE_CHARS].rstrip() + "\n\n(示例过长,已截断)" return text def _clean_section_title(value: str | None) -> str: s = str(value or "").strip() s = re.sub(r"^\s*\d+(?:[.\-]\d+)*\s*", "", s) return s.strip() def _core_title(value: str | None) -> str: s = str(value or "").strip() s = s.replace("(", "(").replace(")", ")") s = re.sub(r"\([^)]*\)", "", s) s = re.sub(r"[、,。;::()()\-\s]", "", s) s = s.replace("项目", "") s = s.replace("情况", "") s = s.replace("工作", "") return s.strip().lower() def _title_match_score(target: str, candidate: str) -> int: if not target or not candidate: return 0 if target == candidate: return 100 score = 0 if target in candidate or candidate in target: score += 40 tks_t = re.findall(r"[\u4e00-\u9fa5]{2,8}|[a-z]{2,12}", target) tks_c = re.findall(r"[\u4e00-\u9fa5]{2,8}|[a-z]{2,12}", candidate) if tks_t and tks_c: overlap = len(set(tks_t) & set(tks_c)) score += overlap * 8 ch_overlap = len(set(target) & set(candidate)) score += min(ch_overlap, 20) return score def _build_guideline_prompt_map(text: str) -> dict[str, str]: lines = str(text or "").splitlines() headings: list[tuple[int, str, str, tuple[int, ...]]] = [] for idx, raw in enumerate(lines): line = str(raw or "").strip() m = re.match(r"^\s*#{1,6}\s*(.+?)\s*$", line) if not m: continue heading_title = m.group(1).strip() number = _extract_number_prefix(heading_title) number_tuple = _tuple_from_number(number) if not number_tuple: continue headings.append((idx, heading_title, number, number_tuple)) prompt_map: dict[str, str] = {} for i, (start_idx, heading_title, number, number_tuple) in enumerate(headings): end_idx = len(lines) for j in range(i + 1, len(headings)): next_start, _, _, next_tuple = headings[j] if len(next_tuple) < len(number_tuple) or next_tuple[: len(number_tuple)] != number_tuple: end_idx = next_start break body = "\n".join(lines[start_idx + 1 : end_idx]).strip() body = re.sub(r"\n{3,}", "\n\n", body) if not body: continue key_title = _normalize_heading_key(heading_title) key_number = _normalize_heading_key(number) prompt_map[key_title] = body prompt_map[key_number] = body return prompt_map def _guideline_prompt_for(section_title: str, section_key: str | None = None) -> str: mapping = _guideline_section_prompt_map() if not mapping: return "" title = str(section_title or "").strip() number = _extract_number_prefix(title) or _section_key_to_number(section_key) candidates = [ _normalize_heading_key(title), _normalize_heading_key(number), ] for key in candidates: if key and key in mapping: return mapping[key] return "" def list_templates(db: Session) -> list[ReportTemplate]: return ( db.query(ReportTemplate) .order_by(ReportTemplate.is_default.desc(), ReportTemplate.updated_at.desc()) .all() ) def ensure_default_template(db: Session) -> None: now = datetime.now() system_default = ( db.query(ReportTemplate) .filter(ReportTemplate.name == SYSTEM_DEFAULT_TEMPLATE_NAME) .first() ) if not system_default: system_default = ReportTemplate( id=uuid.uuid4().hex, name=SYSTEM_DEFAULT_TEMPLATE_NAME, description="系统预置模板(细则完整章节)", is_default=True, is_active=True, created_at=now, updated_at=now, ) db.add(system_default) db.flush() current_rows = ( db.query(ReportTemplateSection) .filter(ReportTemplateSection.template_id == system_default.id) .order_by(ReportTemplateSection.section_order.asc()) .all() ) current_pairs = [(r.section_key, r.section_title) for r in current_rows] expected_pairs = list(DEFAULT_TEMPLATE_SECTIONS) db.query(ReportTemplate).update({ReportTemplate.is_default: False}) system_default.is_default = True system_default.is_active = True system_default.updated_at = now if current_pairs == expected_pairs: has_changed = False for row in current_rows: current_examples = str(row.examples or "").strip() new_examples = default_section_examples(row.section_title, row.section_key).strip() if new_examples and current_examples != new_examples: row.examples = new_examples row.updated_at = now has_changed = True current_out = str(getattr(row, "section_output_contract", None) or "").strip() new_out = default_section_output_contract(row.section_title, row.section_key).strip() if not current_out and new_out: row.section_output_contract = new_out row.updated_at = now has_changed = True if has_changed: system_default.updated_at = now db.commit() return db.query(ReportTemplateSection).filter( ReportTemplateSection.template_id == system_default.id ).delete() for i, (key, title) in enumerate(DEFAULT_TEMPLATE_SECTIONS): db.add( ReportTemplateSection( id=uuid.uuid4().hex, template_id=system_default.id, section_key=key, section_title=title, section_prompt="", section_output_contract=default_section_output_contract(title, key), section_order=i, examples=default_section_examples(title, key), created_at=now, updated_at=now, ) ) db.commit()