mmdeploy/.github/scripts/doc_link_checker.py

86 lines
2.5 KiB
Python

# Copyright (c) MegFlow. All rights reserved.
# /bin/python3
import argparse
import os
import re
def make_parser():
parser = argparse.ArgumentParser('Doc link checker')
parser.add_argument(
'--http', default=False, type=bool, help='check http or not ')
parser.add_argument(
'--target',
default='./docs',
type=str,
help='the directory or file to check')
return parser
pattern = re.compile(r'\[.*?\]\(.*?\)')
def analyze_doc(home, path):
print('analyze {}'.format(path))
problem_list = []
code_block = 0
with open(path) as f:
lines = f.readlines()
for line in lines:
line = line.strip()
if line.startswith('```'):
code_block = 1 - code_block
if code_block > 0:
continue
if '[' in line and ']' in line and '(' in line and ')' in line:
all = pattern.findall(line)
for item in all:
# skip ![]()
if item.find('[') == item.find(']') - 1:
continue
# process the case [text()]()
offset = item.find('](')
if offset == -1:
continue
item = item[offset:]
start = item.find('(')
end = item.find(')')
ref = item[start + 1:end]
if ref.startswith('http') or ref.startswith('#'):
continue
if '.md#' in ref:
ref = ref[ref.find('#'):]
fullpath = os.path.join(home, ref)
if not os.path.exists(fullpath):
problem_list.append(ref)
else:
continue
if len(problem_list) > 0:
print(f'{path}:')
for item in problem_list:
print(f'\t {item}')
print('\n')
raise Exception('found link error')
def traverse(target):
if os.path.isfile(target):
analyze_doc(os.path.dirname(target), target)
return
for home, dirs, files in os.walk(target):
for filename in files:
if filename.endswith('.md'):
path = os.path.join(home, filename)
if os.path.islink(path) is False:
analyze_doc(home, path)
if __name__ == '__main__':
args = make_parser().parse_args()
traverse(args.target)