diff --git a/easybuild/tools/filetools.py b/easybuild/tools/filetools.py index 4a9d77726a..0f2eb1a00f 100644 --- a/easybuild/tools/filetools.py +++ b/easybuild/tools/filetools.py @@ -2540,12 +2540,12 @@ def copy(paths, target_path, force_in_dry_run=False, **kwargs): raise EasyBuildError("Specified path to copy is not an existing file or directory: %s", path) -def get_source_tarball_from_git(filename, targetdir, git_config): +def get_source_tarball_from_git(filename, target_dir, git_config): """ Downloads a git repository, at a specific tag or commit, recursively or not, and make an archive with it :param filename: name of the archive to save the code to (must be .tar.gz) - :param targetdir: target directory where to save the archive to + :param target_dir: target directory where to save the archive to :param git_config: dictionary containing url, repo_name, recursive, and one of tag or commit """ # sanity check on git_config value being passed @@ -2584,8 +2584,7 @@ def get_source_tarball_from_git(filename, targetdir, git_config): raise EasyBuildError("git_config currently only supports filename ending in .tar.gz") # prepare target directory and clone repository - mkdir(targetdir, parents=True) - targetpath = os.path.join(targetdir, filename) + mkdir(target_dir, parents=True) # compose 'git clone' command, and run it if extra_config_params: @@ -2668,17 +2667,34 @@ def get_source_tarball_from_git(filename, targetdir, git_config): for cmd in cmds: run_shell_cmd(cmd, work_dir=work_dir, hidden=True, verbose_dry_run=True) - # create an archive and delete the git repo directory + # Create archive + archive_path = os.path.join(target_dir, filename) + if keep_git_dir: - tar_cmd = ['tar', 'cfvz', targetpath, repo_name] + # create archive of git repo including .git directory + tar_cmd = ['tar', 'cfvz', archive_path, repo_name] else: - tar_cmd = ['tar', 'cfvz', targetpath, '--exclude', '.git', repo_name] + # create reproducible archive + # see https://reproducible-builds.org/docs/archives/ + tar_cmd = [ + # print names of all files and folders excluding .git directory + 'find', repo_name, '-name ".git"', '-prune', '-o', '-print0', + # reset access and modification timestamps + '-exec', 'touch', '-t 197001010100', '{}', r'\;', '|', + # sort file list + 'LC_ALL=C', 'sort', '--zero-terminated', '|', + # create tarball in GNU format with ownership reset + 'tar', '--create', '--no-recursion', '--owner=0', '--group=0', '--numeric-owner', '--format=gnu', + '--null', '--files-from', '-', '|', + # compress tarball with gzip without original file name and timestamp + 'gzip', '--no-name', '>', archive_path + ] run_shell_cmd(' '.join(tar_cmd), work_dir=tmpdir, hidden=True, verbose_dry_run=True) # cleanup (repo_name dir does not exist in dry run mode) remove(tmpdir) - return targetpath + return archive_path def move_file(path, target_path, force_in_dry_run=False): diff --git a/test/framework/filetools.py b/test/framework/filetools.py index bdd3f703de..91afdf79f1 100644 --- a/test/framework/filetools.py +++ b/test/framework/filetools.py @@ -2798,32 +2798,41 @@ def run_check(): 'url': 'git@github.com:easybuilders', 'tag': 'tag_for_tests', } - git_repo = {'git_repo': 'git@github.com:easybuilders/testrepository.git'} # Just to make the below shorter + string_args = { + 'git_repo': 'git@github.com:easybuilders/testrepository.git', + 'test_prefix': self.test_prefix, + } + reprod_tar_cmd_pattern = ( + r' running shell command "find {} -name \".git\" -prune -o -print0 -exec touch -t 197001010100 {{}} \; |' + r' LC_ALL=C sort --zero-terminated | tar --create --no-recursion --owner=0 --group=0 --numeric-owner' + r' --format=gnu --null --files-from - | gzip --no-name > %(test_prefix)s/target/test.tar.gz' + ) + expected = '\n'.join([ r' running shell command "git clone --depth 1 --branch tag_for_tests %(git_repo)s"', - r" \(in /.*\)", - r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"', - r" \(in /.*\)", - ]) % git_repo + r" \(in .*/tmp.*\)", + reprod_tar_cmd_pattern.format("testrepository"), + r" \(in .*/tmp.*\)", + ]) % string_args run_check() git_config['clone_into'] = 'test123' expected = '\n'.join([ r' running shell command "git clone --depth 1 --branch tag_for_tests %(git_repo)s test123"', - r" \(in /.*\)", - r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git test123"', - r" \(in /.*\)", - ]) % git_repo + r" \(in .*/tmp.*\)", + reprod_tar_cmd_pattern.format("test123"), + r" \(in .*/tmp.*\)", + ]) % string_args run_check() del git_config['clone_into'] git_config['recursive'] = True expected = '\n'.join([ r' running shell command "git clone --depth 1 --branch tag_for_tests --recursive %(git_repo)s"', - r" \(in /.*\)", - r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"', - r" \(in /.*\)", - ]) % git_repo + r" \(in .*/tmp.*\)", + reprod_tar_cmd_pattern.format("testrepository"), + r" \(in .*/tmp.*\)", + ]) % string_args run_check() git_config['recurse_submodules'] = ['!vcflib', '!sdsl-lite'] @@ -2831,9 +2840,9 @@ def run_check(): ' running shell command "git clone --depth 1 --branch tag_for_tests --recursive' + ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' %(git_repo)s"', r" \(in .*/tmp.*\)", - r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"', + reprod_tar_cmd_pattern.format("testrepository"), r" \(in .*/tmp.*\)", - ]) % git_repo + ]) % string_args run_check() git_config['extra_config_params'] = [ @@ -2845,9 +2854,9 @@ def run_check(): + ' clone --depth 1 --branch tag_for_tests --recursive' + ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' %(git_repo)s"', r" \(in .*/tmp.*\)", - r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"', + reprod_tar_cmd_pattern.format("testrepository"), r" \(in .*/tmp.*\)", - ]) % git_repo + ]) % string_args run_check() del git_config['recurse_submodules'] del git_config['extra_config_params'] @@ -2855,10 +2864,10 @@ def run_check(): git_config['keep_git_dir'] = True expected = '\n'.join([ r' running shell command "git clone --branch tag_for_tests --recursive %(git_repo)s"', - r" \(in /.*\)", + r" \(in .*/tmp.*\)", r' running shell command "tar cfvz .*/target/test.tar.gz testrepository"', - r" \(in /.*\)", - ]) % git_repo + r" \(in .*/tmp.*\)", + ]) % string_args run_check() del git_config['keep_git_dir'] @@ -2866,24 +2875,23 @@ def run_check(): git_config['commit'] = '8456f86' expected = '\n'.join([ r' running shell command "git clone --no-checkout %(git_repo)s"', - r" \(in /.*\)", + r" \(in .*/tmp.*\)", r' running shell command "git checkout 8456f86 && git submodule update --init --recursive"', - r" \(in /.*/testrepository\)", - r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"', - r" \(in /.*\)", - ]) % git_repo + r" \(in testrepository\)", + reprod_tar_cmd_pattern.format("testrepository"), + r" \(in .*/tmp.*\)", + ]) % string_args run_check() git_config['recurse_submodules'] = ['!vcflib', '!sdsl-lite'] expected = '\n'.join([ r' running shell command "git clone --no-checkout %(git_repo)s"', r" \(in .*/tmp.*\)", - ' running shell command "git checkout 8456f86 && git submodule update --init --recursive' - + ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\'"', - r" \(in /.*/testrepository\)", - r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"', + r' running shell command "git checkout 8456f86"', + r" \(in testrepository\)", + reprod_tar_cmd_pattern.format("testrepository"), r" \(in .*/tmp.*\)", - ]) % git_repo + ]) % string_args run_check() del git_config['recursive'] @@ -2893,9 +2901,9 @@ def run_check(): r" \(in /.*\)", r' running shell command "git checkout 8456f86"', r" \(in /.*/testrepository\)", - r' running shell command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"', + reprod_tar_cmd_pattern.format("testrepository"), r" \(in /.*\)", - ]) % git_repo + ]) % string_args run_check() # Test with real data.