rsync 实现 move 功能

2020/04/04 技术

rsync 实现 move 功能

在实际使用 docker 时, 需要将本地目录挂载到容器上。

但如果挂载的目录的 inode 变化(如宿主目录先被删除,后重新创建),容器读取到的目录将变为空目录。

所以, 希望实现一种特殊的 move 功能:

  • 将源目录内容完整复制到目标目录上
  • 目标目录已经存在的子目录,不能更改其文件 inode
  • 原来的目标目录上存在、但源目录不存在的文件,需要在目标目录上删除
  • 原来目标目录上存在且内容与源目录一致的文件, 尽量不要修改其文件 inode

最终使用 rsync 实现该功能:

src='~/test/abc'
dst='~/test/efg'

# move src to dst
cp -r ~/test/abc /tmp/efg
rsync -avu --progress --delete /tmp/efg ~/test/
rm -r /tmp/efg

验证代码

import hashlib
import os
import shutil
import subprocess
import tempfile
import unittest


def get_md5_for_file(file_or_filename) -> str:
    if isinstance(file_or_filename, str):
        hash_md5 = hashlib.md5()
        with open(file_or_filename, "rb") as f:
            for chunk in iter(lambda: f.read(4096), b""):
                hash_md5.update(chunk)
        return hash_md5.hexdigest()

    hash_md5 = hashlib.md5()
    for chunk in iter(lambda: file_or_filename.read(4096), b""):
        hash_md5.update(chunk)
    return hash_md5.hexdigest()


class TestRsyncCopy(unittest.TestCase):
    def setUp(self) -> None:
        pass

    def do_log(self, info: str):
        print(info)

    def get_dir_info(self, path: str) -> dict:
        """ """
        path_info: dict = {os.path.abspath(path): {"is_dir": True, "stat": os.stat(path)}}
        for _dir in os.listdir(path):
            _full_name = os.path.abspath(os.path.join(path, _dir))
            if os.path.isdir(_full_name):
                path_info.update(self.get_dir_info(path=_full_name))
                continue

            path_info[_full_name] = {"is_dir": False, "stat": os.stat(_full_name)}
            if os.path.isfile(_full_name):
                path_info[_full_name]["file_md5"] = get_md5_for_file(_full_name)

        return path_info

    def copy(self, src_dir: str, dst_dir: str):
        """ """
        tmp_dir = tempfile.mkdtemp()
        try:
            cmd_1 = "cp -r {} {}/{}".format(os.path.abspath(src_dir), tmp_dir, os.path.basename(dst_dir))
            self.do_log("run: {}".format(cmd_1))
            subprocess.check_output(cmd_1, shell=True)
            cmd = "rsync -avu --progress --delete {}/{} {}".format(tmp_dir, os.path.basename(dst_dir), os.path.abspath(os.path.dirname(dst_dir)))
            self.do_log("run: {}".format(cmd))
            result = subprocess.check_output(cmd, shell=True)
            self.do_log("{}".format(result.decode("utf-8")))
        finally:
            shutil.rmtree(tmp_dir)

    def _create_dir(self, target_dir: str, file_info: dict):
        """ """
        for file_path, file_content in file_info.items():
            # make dir
            _full_file_name = os.path.join(target_dir, file_path)
            _full_dir = os.path.dirname(_full_file_name)
            os.makedirs(_full_dir, exist_ok=True)

            if file_content is None:
                self.do_log("{} is dir".format(_full_file_name))
                os.makedirs(_full_file_name)
            else:
                with open(_full_file_name, "w") as f:
                    f.write(file_content)

    def testCopy(self):
        """ """

        # prepare dir data
        src_dir, dst_dir = "./abc", "./efg"

        # remove dir
        for _dir in [src_dir, dst_dir]:
            if os.path.exists(_dir):
                if os.path.isdir(_dir):
                    shutil.rmtree(_dir)
                else:
                    os.remove(_dir)

            # make dir
            os.mkdir(_dir)

        # prepare data
        self._create_dir(target_dir=src_dir, file_info={
            "a.txt": "abcdefg",
            "b/b.txt": "xxxxxx",
            "c.txt": "ccccccc",
        })
        self._create_dir(target_dir=dst_dir, file_info={
            "a.txt": "zzzz",
            "b/c.txt": "xxxxxx",
            "c/x.abc": "fdsfaffasf",
            "c.txt": "ccccccc",
        })
        src_file_info = self.get_dir_info(path=src_dir)
        raw_dst_file_info = self.get_dir_info(path=dst_dir)

        # copy
        self.copy(src_dir=src_dir, dst_dir=dst_dir)
        target_dst_file_info = self.get_dir_info(path=dst_dir)

        # confirm
        # 文件内容
        self.do_log("\n\ncheck file move from src to dst...")
        _src_full_dir = os.path.abspath(src_dir)
        _src_file_info = {
            _file_name[len(_src_full_dir):]: _file_info["file_md5"] for _file_name, _file_info in src_file_info.items() \
            if "file_md5" in _file_info
        }
        _dst_full_dir = os.path.abspath(dst_dir)
        _dst_file_info = {
            _file_name[len(_dst_full_dir):]: _file_info["file_md5"] for _file_name, _file_info in target_dst_file_info.items() \
            if "file_md5" in _file_info
        }
        self.assertEqual(_src_file_info, _dst_file_info)
        self.do_log("files equal between src and dst!")

        # 目录 inode
        self.do_log("\n\ncheck dir inode with same name...")
        _raw_dst_dir_info = {
            _file_name[len(_dst_full_dir):]: _file_info["stat"].st_ino for _file_name, _file_info in raw_dst_file_info.items() \
            if _file_info["is_dir"]
        }
        _target_dst_dir_info = {
            _file_name[len(_dst_full_dir):]: _file_info["stat"].st_ino for _file_name, _file_info in target_dst_file_info.items() \
            if _file_info["is_dir"]
        }
        for _dir_name, _dir_ino in _raw_dst_dir_info.items():
            if _dir_name not in _target_dst_dir_info:
                continue
            self.do_log("dir {}, raw ino {}, target ino {}".format(_dir_name, _dir_ino, _target_dst_dir_info[_dir_name]))
            self.assertEqual(_dir_ino, _target_dst_dir_info[_dir_name])

        # 相同文件 inode
        self.do_log("\n\ncheck file inode with same content...")
        _raw_dst_file_info = {
            _file_name[len(_dst_full_dir):]: _file_info for _file_name, _file_info in raw_dst_file_info.items() \
            if not _file_info["is_dir"] and "file_md5" in _file_info
        }
        _target_dst_file_info = {
            _file_name[len(_dst_full_dir):]: _file_info for _file_name, _file_info in target_dst_file_info.items() \
            if not _file_info["is_dir"] and "file_md5" in _file_info
        }
        for _dir_name, _file_info in _raw_dst_file_info.items():
            if _dir_name not in _target_dst_file_info:
                continue

            _target_file_info = _target_dst_file_info[_dir_name]
            if _file_info["file_md5"] != _target_file_info["file_md5"]:
                continue

            self.do_log("file {}, raw ino {}, target ino {}".format(_dir_name, _file_info["stat"].st_ino, _target_file_info["stat"].st_ino))
            self.assertEqual(_file_info["stat"].st_ino, _target_file_info["stat"].st_ino)

执行结果:

run: cp -r /xxx/test/abc /var/folders/qz/1gh56g497f366630wxtk4z8m0000gn/T/tmpe55ue00w/efg
run: rsync -avu --progress --delete /var/folders/qz/1gh56g497f366630wxtk4z8m0000gn/T/tmpe55ue00w/efg /xxyy/test
building file list ... 
5 files to consider
deleting efg/c/x.abc
deleting efg/c/
deleting efg/b/c.txt
efg/a.txt
           7 100%    0.00kB/s    0:00:00 (xfer#1, to-check=3/5)
efg/b/b.txt
           6 100%    5.86kB/s    0:00:00 (xfer#2, to-check=0/5)

sent 238 bytes  received 64 bytes  604.00 bytes/sec
total size is 20  speedup is 0.07



check file move from src to dst...
files equal between src and dst!


check dir inode with same name...
dir , raw ino 35323238, target ino 35323238
dir /b, raw ino 35323244, target ino 35323244


check file inode with same content...
file /c.txt, raw ino 35323248, target ino 35323248

Search

    Table of Contents