零碎知识 一键检查 Confluence Matrix 目录层级是否存在

大海 · July 04, 2025 · 434 hits

需求背景

领导要求针对各个各个项目组内已经创建过的 Confluence Matrix 目录构建进行检查,看是否是严格按照要求来做的。因此我自己写了一个脚本,可以一键检查 Confluence Matirx 目录层级,避免了大家手动执行检查操作。

脚本代码(所在项目有自己的独立 space)

import logging
from atlassian import Confluence
from fuzzywuzzy import fuzz, process

# 配置信息
CONFLUENCE_URL = 'XXXXXX'
SOURCE_SPACE_KEY = 'CTF'
DESTINATION_SPACE_KEY = 'Kedex'  # 根据你的项目地址更改为正确的空间键
USERNAME = 'XXXXXX'  # 替换为你的邮箱
API_TOKEN = 'XXXXXX'  # 替换为你的API令牌

# 目录层级缺失页
missing_pages = []

# 配置日志记录
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()

# 创建 Confluence 实例
confluence = Confluence(
    url=CONFLUENCE_URL,
    username=USERNAME,
    password=API_TOKEN
)


def escape_cql_query(query):
    """Escape special characters in CQL query."""
    return query.replace('&', '&').replace('+', '%2B')


def get_page_id_by_title(space_key, title, parent_id=None, similarity_threshold=0.8):
    cql_query = f"space={space_key} and title~'{escape_cql_query(title)}'"
    if parent_id:
        cql_query += f" and ancestor={parent_id}"
    try:
        results = confluence.cql(cql_query)
        if results.get('results'):
            best_match = None
            highest_similarity = 0
            for result in results['results']:
                page_title = result['content']['title']
                similarity_ratio = fuzz.token_sort_ratio(title.strip(), page_title.strip()) / 100.0
                logger.debug(f"Comparing '{title}' with '{page_title}', similarity ratio: {similarity_ratio}")
                if similarity_ratio > highest_similarity:
                    highest_similarity = similarity_ratio
                    best_match = (result['content']['id'], page_title)
            if best_match and highest_similarity >= similarity_threshold:
                logger.info(f"Match found: '{title}' matches '{best_match[1]}' with ID: {best_match[0]}")
                return best_match[0], best_match[1], highest_similarity, True

            # 如果没有找到足够的相似度,尝试关键词匹配
            keywords = set(title.lower().split())
            for result in results['results']:
                page_title = result['content']['title'].lower()
                page_keywords = set(page_title.split())
                keyword_match_count = len(keywords.intersection(page_keywords))
                if keyword_match_count > 0:
                    logger.info(
                        f"Keyword match found: '{title}' matches '{page_title}' with ID: {result['content']['id']}")
                    return result['content']['id'], page_title, 1.0, True
    except Exception as e:
        logger.error(f"Failed to execute CQL query '{cql_query}': {e}")
    return None, None, 0, False


def check_and_log_page_existence(title, space_key, parent_id=None, similarity_threshold=0.8):
    page_id, actual_title, similarity_ratio, matched = get_page_id_by_title(space_key, title, parent_id,
                                                                            similarity_threshold)
    if matched:
        logger.info(f"Expected: '{title}', Actual: '{actual_title}', Similarity: {similarity_ratio:.2f}, Matched: True")
        return True, page_id
    else:
        logger.warning(
            f"Expected: '{title}', Actual: '{actual_title}', Similarity: {similarity_ratio:.2f}, Matched: False")
        return False, None


def process_pages(pages, space_key, parent_id, missing_pages):
    for page_dict in pages:
        for page, child_pages in page_dict.items():
            page_title = page.replace('+', ' ')
            exists, _ = check_and_log_page_existence(page_title, space_key, parent_id)
            if not exists:
                missing_pages.append((page_title, parent_id))
            process_pages(child_pages, space_key, get_page_id_by_title(space_key, page_title, parent_id)[0],
                          missing_pages)


source_structure = {
    "00.+Product+Overview": [
        {"Introduction": []},
        {"Product+Roadmap": [
            {"Overall": []},
            {"Quarterly+planning": []}
        ]},
        {"Project+key+member+responsibility": []}
    ],
    "01.+Requirement": [
        {"Business+Requirements": [
            {"Policy+Code": []},
            {"Marketing+Analyzation": []},
            {"Competitors+investigation": []},
            {"Business+case": []}
        ]},
        {"Functional+Requirements": [
            {"Module-XXX": [
                {"Features-XXX+PRD": [
                    {"Business+Background": []},
                    {"Role+Authoritarian": []},
                    {"Requirement+description": []},
                    {"Business+Workflow": []},
                    {"Prototype+design": []},
                    {"Page+elements+definition": []},
                    {"Log": []},
                    {"Requirement+Review+Meeting+Summary": []}
                ]}
            ]}
        ]},
        {"Non-Functional+Requirements": [
            {"Role+Setting": []},
            {"Product+Performance": []}
        ]},
        {"Business+value+Review": []}
    ],
    "02.+Engineering": [
        {"01.+Architecture": [
            {"Tech-arch": []},
            {"Business-arch": []},
            {"Data-arch": []},
            {"Feature-xxx": []}
        ]},
        {"02.+Development": [
            {"Frontend-App": []},
            {"Frontend-Web": []},
            {"Frontend-Mini": []},
            {"domain+name": [
                {"domain+arch": [
                    {"features-xxx": []}
                ]},
                {"app+name": [
                    {"app-name-api": []},
                    {"design+for+key+feature+1": []}
                ]}
            ]}
        ]},
        {"03.+Data+Intelligence": []},
        {"04.+Validation+Quality": [
            {"Test+Specifications": [
                {"Test+ENV": []},
                {"Test+Strategy": []},
                {"Test+Spec+Documents": []}
            ]},
            {"Test+Cases": []},
            {"Test+Reports": []},
            {"Automation": [
                {"Automation+Strategy": []},
                {"Automation+Test+Result": []},
                {"Automation+Coverage+Track": []}
            ]},
            {"Non-Function+Test": [
                {"Performance+Test": []},
                {"Stability+Test": []},
                {"Compatibility+test": []},
                {"Usability+Test": []}
            ]},
            {"PRD+Leaking+Bug+Retro": []}
        ]},
        {"05.+Data+Services+Products": [
            {"KCDP": [
                {"PoC": []},
                {"Common+Services": []},
                {"Data+Engineering": []}
            ]},
            {"Digital+enabled+services+247+services": [
                {"Device+view": []},
                {"Device+Shadow": []},
                {"Dynamic+Scheduling": []},
                {"ISN+CN": []}
            ]}
        ]}
    ],
    "03.+Application+Security": [
        {"Security+Summary": []},
        {"Secure+Design": [
            {"Security+protocol": []},
            {"Common+Reference+design": []}
        ]},
        {"Security+Requirements": []},
        {"Security+guideline": []},
        {"Security+Certificate": [
            {"MLPS+certificate": []},
            {"IEC-62443+certificate": []},
            {"ISO27001+series": []}
        ]},
        {"Security+Manual": []},
        {"Security+Testing": [
            {"Security+requirements+verification": []},
            {"Hot+findings+mitigation+summary": []},
            {"Pen+testing+Summary": []}
        ]}
    ],
    "04.+Releases": [
        {"Release+Calendar": [
            {"2025": []},
            {"2026": []}
        ]},
        {"Release+Version": [
            {"v-x.y.z": [
                {"v-x.y.z-git-env-map": []},
                {"v-x.y.z-human-resource": []},
                {"v-x.y.z-runbook": [
                    {"v-x.y.z-runbook-result": []}
                ]},
                {"v-x.y.z-dev-to-test": [
                    {"feature-xxxxxx": []}
                ]},
                {"v-x.y.z-test-report": []},
                {"v-x.y.z-security-report": []},
                {"v-x.y.z-deploy-approve": []}
            ]},
            {"v-x.y.z.w": []}
        ]}
    ],
    "05.+Deployment+Operations": [
        {"Deployment+Guide": []},
        {"CI+CD+Pipeline": []},
        {"Monitoring": []},
        {"Incident+Management": []},
        {"User+Manual+FAQ": []}
    ],
    "06.+Knowledge": [],
    "07.+Project+Management": [
        {"Process": []},
        {"Team+Contacts": []},
        {"Team+Availability": []},
        {"Team+Member+Privilege": [
            {"system-xxx": []}
        ]}
    ],
    "08.+Audit": [],
    "09.+Meeting+Minutes": [
        {"Engineering": [
            {"Arch": [
                {"yyyy-mm-dd-meeting+topic": []}
            ]},
            {"Dev": []},
            {"Algorithm": []},
            {"DevOps": []}
        ]},
        {"Design": []},
        {"Innovation": []},
        {"Cross+team": []}
    ]
}


def main():

    for category, subcategories in source_structure.items():
        category_title = category.replace('+', ' ')
        exists, category_id = check_and_log_page_existence(category_title, DESTINATION_SPACE_KEY)
        if not exists:
            missing_pages.append((category_title, None))
        else:
            for subcategory_dict in subcategories:
                for subcategory, pages in subcategory_dict.items():
                    subcategory_title = subcategory.replace('+', ' ')
                    exists, subcategory_id = check_and_log_page_existence(subcategory_title, DESTINATION_SPACE_KEY,
                                                                          parent_id=category_id)
                    if not exists:
                        missing_pages.append((subcategory_title, category_id))
                    else:
                        process_pages(pages, DESTINATION_SPACE_KEY, subcategory_id, missing_pages)

    if missing_pages:
        logger.error("********************** The following pages are missing: *************************")
        for title, parent_id in missing_pages:
            logger.error(
                f"Page '{title}' does not exist under {'root' if parent_id is None else f'parent ID {parent_id}'}.")
    else:
        logger.info("All pages exist.")


if __name__ == "__main__":
    main()

脚本代码(所在项目依附在根目录下)

import logging
from atlassian import Confluence
import difflib

# 配置信息
CONFLUENCE_URL = 'XXXXXX'
SOURCE_SPACE_KEY = 'CTF'
DESTINATION_SPACE_KEY = 'ChinaIT'  # 根据你的项目地址更改为正确的空间键
USERNAME = 'XXXXXX  # 替换为你的邮箱
API_TOKEN = 'XXXXXX'  # 替换为你的API令牌

# 目标父页面ID
destination_parent_id = XXXXXX

# 目录层级缺失页
missing_pages = []

# 配置日志记录
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# 创建 Confluence 实例
confluence = Confluence(
    url=CONFLUENCE_URL,
    username=USERNAME,
    password=API_TOKEN
)

def escape_cql_query(query):
    """Escape special characters in CQL query."""
    return query.replace('&', '&').replace('+', '%2B')

def get_page_id_by_title(space_key, title, parent_id=None, similarity_threshold=0.6):
    cql_query = f"space={space_key} and title~'{escape_cql_query(title)}'"
    if parent_id:
        cql_query += f" and ancestor={parent_id}"
    try:
        results = confluence.cql(cql_query)
        if results.get('results'):
            best_match = None
            highest_similarity = 0
            for result in results['results']:
                page_title = result['content']['title']
                similarity_ratio = difflib.SequenceMatcher(None, page_title.strip(), title.strip()).ratio()
                logger.debug(f"Comparing '{title}' with '{page_title}', similarity ratio: {similarity_ratio}")
                if similarity_ratio > highest_similarity:
                    highest_similarity = similarity_ratio
                    best_match = (result['content']['id'], page_title)
            if best_match and highest_similarity >= similarity_threshold:
                logger.info(f"Match found: '{title}' matches '{best_match[1]}' with ID: {best_match[0]}")
                return best_match[0], best_match[1], highest_similarity, True

            # 如果没有找到足够的相似度,尝试关键词匹配
            keywords = set(title.lower().split())
            for result in results['results']:
                page_title = result['content']['title'].lower()
                page_keywords = set(page_title.split())
                keyword_match_count = len(keywords.intersection(page_keywords))
                if keyword_match_count > 0:
                    logger.info(f"Keyword match found: '{title}' matches '{page_title}' with ID: {result['content']['id']}")
                    return result['content']['id'], page_title, 1.0, True
    except Exception as e:
        logger.error(f"Failed to execute CQL query '{cql_query}': {e}")
    return None, None, 0, False

def check_and_log_page_existence(title, space_key, parent_id=None, similarity_threshold=0.6):
    page_id, actual_title, similarity_ratio, matched = get_page_id_by_title(space_key, title, parent_id, similarity_threshold)
    if matched:
        logger.info(f"Expected: '{title}', Actual: '{actual_title}', Similarity: {similarity_ratio:.2f}, Matched: True")
        return True, page_id
    else:
        missing_pages.append((title, parent_id))
        logger.warning(f"Expected: '{title}', Actual: '{actual_title}', Similarity: {similarity_ratio:.2f}, Matched: False")
        return False, None

def process_pages(pages, space_key, parent_id=None, similarity_threshold=0.6):
    for page_dict in pages:
        for page, child_pages in page_dict.items():
            page_title = page.replace('+', ' ')
            matched, current_parent_id = check_and_log_page_existence(page_title, space_key, parent_id, similarity_threshold)
            if not matched:
                continue
            if current_parent_id:
                process_pages(child_pages, space_key, current_parent_id, similarity_threshold)


source_structure = {
    "00.+Product+Overview": [
        {"Introduction": []},
        {"Product+Roadmap": [
            {"Overall": []},
            {"Quarterly+planning": []}
        ]},
        {"Project+key+member+responsibility": []}
    ],
    "01.+Requirement": [
        {"Business+Requirements": [
            {"Policy+Code": []},
            {"Marketing+Analyzation": []},
            {"Competitors+investigation": []},
            {"Business+case": []}
        ]},
        {"Functional+Requirements": [
            {"Module-XXX": [
                {"Features-XXX+PRD": [
                    {"Business+Background": []},
                    {"Role+Authoritarian": []},
                    {"Requirement+description": []},
                    {"Business+Workflow": []},
                    {"Prototype+design": []},
                    {"Page+elements+definition": []},
                    {"Log": []},
                    {"Requirement+Review+Meeting+Summary": []}
                ]}
            ]}
        ]},
        {"Non-Functional+Requirements": [
            {"Role+Setting": []},
            {"Product+Performance": []}
        ]},
        {"Business+value+Review": []}
    ],
    "02.+Engineering": [
        {"01.+Architecture": [
            {"Tech-arch": []},
            {"Business-arch": []},
            {"Data-arch": []},
            {"Feature-xxx": []}
        ]},
        {"02.+Development": [
            {"Frontend-App": []},
            {"Frontend-Web": []},
            {"Frontend-Mini": []},
            {"domain+name": [
                {"domain+arch": [
                    {"features-xxx": []}
                ]},
                {"app+name": [
                    {"app-name-api": []},
                    {"design+for+key+feature+1": []}
                ]}
            ]}
        ]},
        {"03.+Data+Intelligence": []},
        {"04.+Validation+Quality": [
            {"Test+Specifications": [
                {"Test+ENV": []},
                {"Test+Strategy": []},
                {"Test+Spec+Documents": []}
            ]},
            {"Test+Cases": []},
            {"Test+Reports": []},
            {"Automation": [
                {"Automation+Strategy": []},
                {"Automation+Test+Result": []},
                {"Automation+Coverage+Track": []}
            ]},
            {"Non-Function+Test": [
                {"Performance+Test": []},
                {"Stability+Test": []},
                {"Compatibility+test": []},
                {"Usability+Test": []}
            ]},
            {"PRD+Leaking+Bug+Retro": []}
        ]},
        {"05.+Data+Services+Products": [
            {"KCDP": [
                {"PoC": []},
                {"Common+Services": []},
                {"Data+Engineering": []}
            ]},
            {"Digital+enabled+services+247+services": [
                {"Device+View": []},
                {"Device+Shadow": []},
                {"Dynamic+Scheduling": []},
                {"ISN+CN": []}
            ]}
        ]}
    ],
    "03.+Application+Security": [
        {"Security+Summary": []},
        {"Secure+Design": [
            {"Security+Protocol": []},
            {"Common+Reference+design": []}
        ]},
        {"Security+Requirements": []},
        {"Security+Guideline": []},
        {"Security+Certificate": [
            {"MLPS+certificate": []},
            {"IEC-62443+certificate": []},
            {"ISO27001+series": []}
        ]},
        {"Security+Manual": []},
        {"Security+Testing": [
            {"Security+requirements+verification": []},
            {"Hot+findings+mitigation+summary": []},
            {"Pen+testing+Summary": []}
        ]}
    ],
    "04.+Releases": [
        {"Release+Calendar": [
            {"2025": []},
            {"2026": []}
        ]},
        {"Release+Version": [
            {"v-x.y.z": [
                {"v-x.y.z-git-env-map": []},
                {"v-x.y.z-human-resource": []},
                {"v-x.y.z-runbook": [
                    {"v-x.y.z-runbook-result": []}
                ]},
                {"v-x.y.z-dev-to-test": [
                    {"feature-xxxxxx": []}
                ]},
                {"v-x.y.z-test-report": []},
                {"v-x.y.z-security-report": []},
                {"v-x.y.z-deploy-approve": []}
            ]},
            {"v-x.y.z.w": []}
        ]}
    ],
    "05.+Deployment+Operations": [
        {"Deployment+Guide": []},
        {"CI+CD+Pipeline": []},
        {"Monitoring": []},
        {"Incident+Management": []},
        {"User+Manual+FAQ": []}
    ],
    "06.+Knowledge": [],
    "07.+Project+Management": [
        {"Process": []},
        {"Team+Contacts": []},
        {"Team+Availability": []},
        {"Team+Member+Privilege": [
            {"system-xxx": []}
        ]}
    ],
    "08.+Audit": [],
    "09.+Meeting+Minutes": [
        {"Engineering": [
            {"Arch": [
                {"yyyy-mm-dd-meeting+topic": []}
            ]},
            {"Dev": []},
            {"Algorithm": []},
            {"DevOps": []}
        ]},
        {"Design": []},
        {"Innovation": []},
        {"Cross+team": []}
    ]
}

def main():

    for category, subcategories in source_structure.items():
        category_title = category.replace('+', ' ')
        matched, category_id = check_and_log_page_existence(category_title, DESTINATION_SPACE_KEY, destination_parent_id)
        if not matched:
            continue
        for subcategory_dict in subcategories:
            for subcategory, pages in subcategory_dict.items():
                subcategory_title = subcategory.replace('+', ' ')
                matched, subcategory_id = check_and_log_page_existence(subcategory_title, DESTINATION_SPACE_KEY, category_id)
                if not matched:
                    continue
                process_pages(pages, DESTINATION_SPACE_KEY, subcategory_id)

    if missing_pages:
        logger.error("********************** The following pages are missing: *************************")
        for title, parent_id in missing_pages:
            logger.error(f"Page '{title}' does not exist under {'root' if parent_id is None else f'parent ID {parent_id}'}.")
    else:
        logger.info("All pages exist.")

if __name__ == "__main__":
    main()


如果觉得我的文章对您有用,请随意打赏。您的支持将鼓励我继续创作!
No Reply at the moment.
需要 Sign In 后方可回复, 如果你还没有账号请点击这里 Sign Up