Last active
December 27, 2024 07:58
将[[wiki]]附件转换为标准markdown形式,并将附件复制到md相同目录,python export-obsidian-to-standard-markdown.py vault_path target_path
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import re | |
import os | |
import shutil | |
import argparse | |
import functools | |
pattern_markdown = r'\!\[[^\]]*\]\(([^\)]+)\)' | |
pattern_wikilink = r'\!\[\[([^\]]+)\]\]' | |
pattern_pdf = r'\[\[([^\]]+)\]\]' | |
EXT = ('.jpg', '.jpeg', '.png', '.gif', '.pdf', '.svg', '.drawio') | |
def replace_markdown(vault_path: str, target_path: str, db: dict[str, str], matchobj): | |
orig = matchobj.group(0) | |
image = matchobj.group(1) | |
# Skip images from HTTP or HTTPS URLs | |
if image.startswith('http://') or image.startswith('https://'): | |
return orig | |
image_dir = os.path.dirname(image) | |
image_filename = os.path.basename(image) | |
image_description = '' | |
ext = os.path.splitext(image_filename)[1] | |
if ext in EXT: | |
if '|' in image_filename: | |
rets = image_filename.split('|') | |
image_filename = rets[0] | |
image_description = rets[1] | |
print('image splited by | ') | |
if image_dir: | |
orig_image = os.path.join(image_dir, image_filename) | |
orig_path = os.path.join(vault_path, orig_image) | |
else: | |
orig_image = image_filename | |
orig_path = db.get(orig_image) | |
if not orig_path: | |
print(f"Image {orig_image} not found in database!!!") | |
return orig | |
dest_path = os.path.join(target_path, image_filename) | |
if os.path.exists(orig_path): | |
try: | |
shutil.copy2(orig_path, dest_path) | |
except: | |
print('failed to copy %s!!!' % orig_path) | |
else: | |
print('file %s does not exist!!!' % orig_path) | |
output = '' % (image_description, image_filename) | |
print(f"convert: {orig} -> {output}") | |
return output | |
else: | |
return orig | |
def process(vault_path: str, infile: str, db): | |
target_path = os.path.dirname(infile) | |
replace_markdown_partial = functools.partial(replace_markdown, vault_path, target_path, db) | |
outlines = [] | |
with open(infile, "r", encoding="utf-8") as fp: | |
for line in fp: | |
ret = re.sub(pattern_wikilink, replace_markdown_partial, line) | |
ret = re.sub(pattern_markdown, replace_markdown_partial, ret) | |
outlines.append(ret) | |
output = "".join(outlines) | |
with open(infile, "w", encoding="utf-8") as fp: | |
fp.write(output) | |
def pre_check(vault_path: str, target_path: str): | |
if not os.path.exists(target_path): | |
os.makedirs(target_path) | |
print(f"Created directory: {target_path}") | |
else: | |
response = input(f"Directory {target_path} already exists. Do you want to delete it? (y/n) [y]: ") | |
if response.lower() in ['', 'y']: | |
shutil.rmtree(target_path) | |
os.makedirs(target_path) | |
print(f"Deleted and recreated directory: {target_path}") | |
else: | |
print("Directory not deleted. Proceeding with existing directory.") | |
def generate_db(vault_path: str): | |
""" | |
获取vault_path所有EXT后缀的文件,按照{ file_name: full_path }格式返回 | |
""" | |
file_db = {} | |
for root, dirs, files in os.walk(vault_path): | |
# Skip directories that start with a dot | |
dirs[:] = [d for d in dirs if not d.startswith('.')] | |
# Skip files that start with a dot | |
files = [f for f in files if not f.startswith('.')] | |
for file in files: | |
ext = os.path.splitext(file)[1] | |
if ext in EXT: | |
full_path = os.path.join(root, file) | |
file_db[file] = full_path | |
return file_db | |
def migrate(valut_path: str, target_path: str, db): | |
""" | |
遍历valut_path路径下的所有.md文件,复制到target_path相同路径下 | |
""" | |
for root, dirs, files in os.walk(valut_path): | |
# Skip directories that start with a dot | |
dirs[:] = [d for d in dirs if not d.startswith('.')] | |
# Skip files that start with a dot | |
files = [f for f in files if not f.startswith('.')] | |
for file in files: | |
if file.endswith('.md'): | |
src_file_path = os.path.join(root, file) | |
relative_path = os.path.relpath(src_file_path, valut_path) | |
dest_file_path = os.path.join(target_path, relative_path) | |
dest_dir = os.path.dirname(dest_file_path) | |
if not os.path.exists(dest_dir): | |
os.makedirs(dest_dir) | |
# print(f"from: {src_file_path} -> to: {dest_file_path}") | |
shutil.copy2(src_file_path, dest_file_path) | |
process(valut_path, dest_file_path, db) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Migrate Obsidian vault to standard Markdown.") | |
parser.add_argument('vault_path', type=str, help='Path to the Obsidian vault') | |
parser.add_argument('target_path', type=str, help='Path to the target directory') | |
args = parser.parse_args() | |
pre_check(args.vault_path, args.target_path) | |
db = generate_db(args.vault_path) | |
migrate(args.vault_path, args.target_path, db) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
GREAT!!!!!!!!!THANK YOU SO MUCH!!!!!!!!