commit 9c57dd2b0c687e71ae18e6bb82c2705247340963 Author: chris Date: Sun Feb 7 20:49:47 2021 +0100 docker-garbage-collect.md diff --git a/docker-garbage-collect.md b/docker-garbage-collect.md new file mode 100644 index 0000000..74e83e5 --- /dev/null +++ b/docker-garbage-collect.md @@ -0,0 +1,103 @@ +# Is the docker registries garbage collection garbage? + +Step 1) Run garbage collection on my registry + + docker-compose exec registry bin/registry garbage-collect /etc/docker/registry/config.yml + .... + 4005 blobs marked, 0 blobs and 0 manifests eligible for deletion + +So everything seems used in some image:tag. + +Step 2) Check size + + du -sch $(docker volume inspect registry_registry -f "{{.Mountpoint}}") + 50G /var/lib/docker/volumes/registry_registry/_data + +This is big, for the amount of images in there. +It's 73 images, all originating in the same base images. +Everything is either alpine or debian. + +Step 3) Build a list of all images and all tags, download everything + + python list-all-repos.py --registry ... --ask-pass > all-repos + for img in $(cat all-repos); do docker pull $img; done + +(See below for script source) + +Step 4) Throw away all registry data + + docker-compose down -v && docker-compose up -d + +Step 5) Upload everything again + + for img in $(cat all-repos); do docker push $img; done + +Now that everything is up again, I checked that all images are there again. +Just ran the script again and diffed the output. No differences. + +Step 6) Check size again + + du -sch $(docker volume inspect registry_registry -f "{{.Mountpoint}}") + 6.4G /var/lib/docker/volumes/registry_registry/_data + + ...garbage-collect + 473 blobs marked, 0 blobs and 0 manifests eligible for deletion + +So, what am I not getting? Shouldn't GC already have thrown away everthing +not referenced by some image:tag? + +## The script to list all images + +This depends on [DXF](https://github.com/davedoesdev/dxf) + +```python +"""Print all images and tags from given docker registry.""" +import os +import argparse +import getpass + +from dxf import DXF, DXFBase + + +def _get_arguments(): + parser = argparse.ArgumentParser( + description="Print all images and tags from registry.""" + ) + parser.add_argument('--registry', dest='registry') + parser.add_argument('--username', dest='username', + help="Defaults to $REGISTRY_USERNAME") + parser.add_argument('--password', dest='password', + help="Defaults to $REGISTRY_PASSWORD") + parser.add_argument('--ask-pass', dest='ask_pass', action='store_true', + help='Interactively ask for password') + return parser.parse_args() + + +def main(registry, username, password): + """Print all images and tag.""" + regbase = DXFBase(registry) + regbase.authenticate(username, password) + + for repo in regbase.list_repos(): + regrepo = DXF(registry, repo) + regrepo.authenticate(username, password) + aliases = regrepo.list_aliases() + for alias in aliases: + print(f"{registry}/{repo}:{alias}") + + +if __name__ == "__main__": + args = _get_arguments() + _username = (args.username if args.username else + os.environ.get('REGISTRY_USERNAME')) + if args.ask_pass: + _password = getpass.getpass(f"{args.registry} password: ") + else: + _password = (args.password if args.password else + os.environ.get('REGISTRY_PASSWORD')) + main( + args.registry, + _username, + _password, + ) +```