From 9514429e7e067ab42fe90ee6c222c17936f27ab4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Serdar=20=C3=96zer?= Date: Wed, 9 Jul 2025 13:41:45 +0200 Subject: [PATCH 1/4] refactor: sha256 and tag parser refactored. Digest's hash value moved to function parse_docker_uri --- .../diego/docker/docker_uri_converter.rb | 13 ++++----- lib/utils/uri_utils.rb | 28 +++++++++++++------ spec/unit/lib/utils/uri_utils_spec.rb | 2 +- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/lib/cloud_controller/diego/docker/docker_uri_converter.rb b/lib/cloud_controller/diego/docker/docker_uri_converter.rb index 881ed008a57..4938dd03be7 100644 --- a/lib/cloud_controller/diego/docker/docker_uri_converter.rb +++ b/lib/cloud_controller/diego/docker/docker_uri_converter.rb @@ -4,15 +4,12 @@ module VCAP::CloudController class DockerURIConverter def convert(docker_uri) raise UriUtils::InvalidDockerURI.new "Docker URI [#{docker_uri}] should not contain scheme" if docker_uri.include? '://' + host, path, tag_digest = UriUtils.parse_docker_uri(docker_uri) - host, path, tag = UriUtils.parse_docker_uri(docker_uri) - - if !tag.nil? && tag.start_with?('@sha256:') - path = "#{path}@sha256" - tag.slice!('@sha256:') - end - - Addressable::URI.new(scheme: 'docker', host: host, path: path, fragment: tag).to_s + # add tag or digest part as fragment to the uri, since ruby uri parser confuses with ':' + # when it presented in path. We convert user's uri to, for example; + # docker://docker.io/publish/ubuntu:latest -> docker://docker.io/publish/ubuntu#latest + Addressable::URI.new(scheme: 'docker', host: host, path: path, fragment: tag_digest).to_s end end end diff --git a/lib/utils/uri_utils.rb b/lib/utils/uri_utils.rb index a8ffc724379..14e0cb0cd6a 100644 --- a/lib/utils/uri_utils.rb +++ b/lib/utils/uri_utils.rb @@ -4,6 +4,10 @@ module UriUtils SSH_REGEX = %r{ \A (?:ssh://)? git@ .+? : .+? \.git \z }x GIT_REGEX = %r{ \A git:// .+? : .+? \.git \z }x DOCKER_INDEX_SERVER = 'docker.io'.freeze + DOCKER_PATH_REGEX = %r{\A[a-z0-9_\-\.\/]{2,255}\Z} + DOCKER_TAG_REGEX = %r{[a-zA-Z0-9_\-\.]{1,128}} + DOCKER_DIGEST_REGEX = %r{sha256:[a-z0-9]{64}} + DOCKER_TAG_DIGEST_REGEX = Regexp.new("\\A(#{DOCKER_TAG_REGEX.source} | (#{DOCKER_TAG_REGEX.source}@#{DOCKER_DIGEST_REGEX.source}) | #{DOCKER_DIGEST_REGEX.source})\\Z", Regexp::EXTENDED) class InvalidDockerURI < StandardError; end @@ -62,13 +66,20 @@ def self.parse_docker_uri(docker_uri) end path = 'library/' + path if (official_docker_registry(name_parts[0]) || missing_registry(name_parts)) && path.exclude?('/') + path, tag_digest = parse_docker_tag_or_digest_from_path(path) - path, tag = parse_docker_repository_tag(path) + raise InvalidDockerURI.new "Invalid image name [#{path}]" unless DOCKER_PATH_REGEX =~ path + raise InvalidDockerURI.new "Invalid image tag [#{tag_digest}]" if tag_digest && !(DOCKER_TAG_DIGEST_REGEX =~ tag_digest) - raise InvalidDockerURI.new "Invalid image name [#{path}]" unless %r{\A[a-z0-9_\-\.\/]{2,255}\Z} =~ path - raise InvalidDockerURI.new "Invalid image tag [#{tag}]" if tag && !(/\A(([a-zA-Z0-9_\-\.]{1,128})|(([a-zA-Z0-9_\-\.]{0,128})(@sha256:[a-z0-9]{64})))\Z/ =~ tag) + # if only sha256 presented, we add hash value as fragment to the uri, + # since the ruby uri parser confuses because of second ':' in uri's path part. + if tag_digest && tag_digest.start_with?("sha256:") + hash_algo, hash_value = tag_digest.split(":") + path = path + "@sha256" + tag_digest = hash_value + end - [host, path, tag] + [host, path, tag_digest] end private_class_method def self.official_docker_registry(host) @@ -78,13 +89,12 @@ def self.parse_docker_uri(docker_uri) private_class_method def self.missing_registry(name_parts) host = name_parts[0] name_parts.length == 1 || - (host.exclude?('.') && host.exclude?(':') && host != 'localhost') + (host.exclude?('.') && host.exclude?(':') && host != 'localhost') end - private_class_method def self.parse_docker_repository_tag(path) - path, tag = path.split(/(?=@)|:/, 2) - - return [path, tag] unless tag && tag.include?('/') + private_class_method def self.parse_docker_tag_or_digest_from_path(path) + path, tag_digest = path.split(/@|:/, 2) + return [path, tag_digest] unless tag_digest && tag_digest.include?('/') [path, 'latest'] end diff --git a/spec/unit/lib/utils/uri_utils_spec.rb b/spec/unit/lib/utils/uri_utils_spec.rb index 83f9a6c7528..a47fa3d05da 100644 --- a/spec/unit/lib/utils/uri_utils_spec.rb +++ b/spec/unit/lib/utils/uri_utils_spec.rb @@ -159,7 +159,7 @@ expect(UriUtils.parse_docker_uri('publish/buildpack:tag')).to eq ['', 'publish/buildpack', 'tag'] actual_result = UriUtils.parse_docker_uri('publish/buildpack@sha256:e118d023acaee5cf13471ead39f68416ad6172ff0899f3257ce1481cd2b28a6a') - expected_result = ['', 'publish/buildpack', '@sha256:e118d023acaee5cf13471ead39f68416ad6172ff0899f3257ce1481cd2b28a6a'] + expected_result = ['', 'publish/buildpack@sha256', 'e118d023acaee5cf13471ead39f68416ad6172ff0899f3257ce1481cd2b28a6a'] expect(actual_result).to eq expected_result actual_result = UriUtils.parse_docker_uri('publish/buildpack:tag@sha256:e118d023acaee5cf13471ead39f68416ad6172ff0899f3257ce1481cd2b28a6a') From 46e808d4e5a2a2865b218e16a7eb19f9bd89cb43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Serdar=20=C3=96zer?= Date: Fri, 11 Jul 2025 10:23:18 +0200 Subject: [PATCH 2/4] fix: double-quated converted to single-quoted --- lib/utils/uri_utils.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/utils/uri_utils.rb b/lib/utils/uri_utils.rb index 14e0cb0cd6a..6df016583d3 100644 --- a/lib/utils/uri_utils.rb +++ b/lib/utils/uri_utils.rb @@ -73,9 +73,9 @@ def self.parse_docker_uri(docker_uri) # if only sha256 presented, we add hash value as fragment to the uri, # since the ruby uri parser confuses because of second ':' in uri's path part. - if tag_digest && tag_digest.start_with?("sha256:") - hash_algo, hash_value = tag_digest.split(":") - path = path + "@sha256" + if tag_digest && tag_digest.start_with?('sha256:') + hash_algo, hash_value = tag_digest.split(':') + path = path + '@sha256' tag_digest = hash_value end From d9118298b4a3a1cc813b206ff1f4b272e87b80c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Serdar=20=C3=96zer?= Date: Fri, 11 Jul 2025 11:48:16 +0200 Subject: [PATCH 3/4] refactor: splitting tag and digest from path refactored --- lib/utils/uri_utils.rb | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/lib/utils/uri_utils.rb b/lib/utils/uri_utils.rb index 6df016583d3..10a8ab45d02 100644 --- a/lib/utils/uri_utils.rb +++ b/lib/utils/uri_utils.rb @@ -66,7 +66,7 @@ def self.parse_docker_uri(docker_uri) end path = 'library/' + path if (official_docker_registry(name_parts[0]) || missing_registry(name_parts)) && path.exclude?('/') - path, tag_digest = parse_docker_tag_or_digest_from_path(path) + path, tag_digest = parse_docker_tag_digest_from_path(path) raise InvalidDockerURI.new "Invalid image name [#{path}]" unless DOCKER_PATH_REGEX =~ path raise InvalidDockerURI.new "Invalid image tag [#{tag_digest}]" if tag_digest && !(DOCKER_TAG_DIGEST_REGEX =~ tag_digest) @@ -92,10 +92,30 @@ def self.parse_docker_uri(docker_uri) (host.exclude?('.') && host.exclude?(':') && host != 'localhost') end - private_class_method def self.parse_docker_tag_or_digest_from_path(path) - path, tag_digest = path.split(/@|:/, 2) - return [path, tag_digest] unless tag_digest && tag_digest.include?('/') + private_class_method def self.parse_docker_tag_digest_from_path(path) + # Split path into base path and digest if digest is present (after '@') + base_path, digest = path.split('@', 2) + + if digest + # If digest is present and base_path contains a tag (':'), split it + if base_path.include?(':') + base_path, tag = base_path.split(':', 2) + # Return path and combined tag@digest + return [base_path, "#{tag}@#{digest}"] + end + + # Return path and digest if no tag present + return [base_path, digest] + end + + # No digest present, check for tag + base_path, tag = base_path.split(':', 2) + + # If tag is present but looks like a path segment (contains '/'), treat as no tag + return [base_path, 'latest'] if tag&.include?('/') + + # Return path and tag (or nil if no tag) + [base_path, tag] - [path, 'latest'] end end From 7e2fad5be11097d5aae6dd01a56993538d168077 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Serdar=20=C3=96zer?= Date: Fri, 11 Jul 2025 17:27:03 +0200 Subject: [PATCH 4/4] fix: rubocop suggestions are corrected --- .../diego/docker/docker_uri_converter.rb | 1 + lib/utils/uri_utils.rb | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/cloud_controller/diego/docker/docker_uri_converter.rb b/lib/cloud_controller/diego/docker/docker_uri_converter.rb index 4938dd03be7..9dfe71960ad 100644 --- a/lib/cloud_controller/diego/docker/docker_uri_converter.rb +++ b/lib/cloud_controller/diego/docker/docker_uri_converter.rb @@ -4,6 +4,7 @@ module VCAP::CloudController class DockerURIConverter def convert(docker_uri) raise UriUtils::InvalidDockerURI.new "Docker URI [#{docker_uri}] should not contain scheme" if docker_uri.include? '://' + host, path, tag_digest = UriUtils.parse_docker_uri(docker_uri) # add tag or digest part as fragment to the uri, since ruby uri parser confuses with ':' diff --git a/lib/utils/uri_utils.rb b/lib/utils/uri_utils.rb index 10a8ab45d02..8fcb31088c8 100644 --- a/lib/utils/uri_utils.rb +++ b/lib/utils/uri_utils.rb @@ -5,9 +5,10 @@ module UriUtils GIT_REGEX = %r{ \A git:// .+? : .+? \.git \z }x DOCKER_INDEX_SERVER = 'docker.io'.freeze DOCKER_PATH_REGEX = %r{\A[a-z0-9_\-\.\/]{2,255}\Z} - DOCKER_TAG_REGEX = %r{[a-zA-Z0-9_\-\.]{1,128}} - DOCKER_DIGEST_REGEX = %r{sha256:[a-z0-9]{64}} - DOCKER_TAG_DIGEST_REGEX = Regexp.new("\\A(#{DOCKER_TAG_REGEX.source} | (#{DOCKER_TAG_REGEX.source}@#{DOCKER_DIGEST_REGEX.source}) | #{DOCKER_DIGEST_REGEX.source})\\Z", Regexp::EXTENDED) + DOCKER_TAG_REGEX = /[a-zA-Z0-9_\-\.]{1,128}/ + DOCKER_DIGEST_REGEX = /sha256:[a-z0-9]{64}/ + DOCKER_TAG_DIGEST_REGEX = Regexp.new("\\A(#{DOCKER_TAG_REGEX.source} | +(#{DOCKER_TAG_REGEX.source}@#{DOCKER_DIGEST_REGEX.source}) | #{DOCKER_DIGEST_REGEX.source})\\Z", Regexp::EXTENDED) class InvalidDockerURI < StandardError; end @@ -69,13 +70,13 @@ def self.parse_docker_uri(docker_uri) path, tag_digest = parse_docker_tag_digest_from_path(path) raise InvalidDockerURI.new "Invalid image name [#{path}]" unless DOCKER_PATH_REGEX =~ path - raise InvalidDockerURI.new "Invalid image tag [#{tag_digest}]" if tag_digest && !(DOCKER_TAG_DIGEST_REGEX =~ tag_digest) + raise InvalidDockerURI.new "Invalid image tag [#{tag_digest}]" if tag_digest && DOCKER_TAG_DIGEST_REGEX !~ tag_digest # if only sha256 presented, we add hash value as fragment to the uri, # since the ruby uri parser confuses because of second ':' in uri's path part. if tag_digest && tag_digest.start_with?('sha256:') - hash_algo, hash_value = tag_digest.split(':') - path = path + '@sha256' + _, hash_value = tag_digest.split(':') + path += '@sha256' tag_digest = hash_value end @@ -89,7 +90,7 @@ def self.parse_docker_uri(docker_uri) private_class_method def self.missing_registry(name_parts) host = name_parts[0] name_parts.length == 1 || - (host.exclude?('.') && host.exclude?(':') && host != 'localhost') + (host.exclude?('.') && host.exclude?(':') && host != 'localhost') end private_class_method def self.parse_docker_tag_digest_from_path(path) @@ -116,6 +117,5 @@ def self.parse_docker_uri(docker_uri) # Return path and tag (or nil if no tag) [base_path, tag] - end end