Skip to content

Commit

Permalink
get_bucket() handles case when max > 1000
Browse files Browse the repository at this point in the history
  • Loading branch information
ThierryO committed Jan 18, 2017
1 parent 73e6740 commit bd42806
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 3 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ importFrom(httr,warn_for_status)
importFrom(tools,md5sum)
importFrom(utils,URLencode)
importFrom(utils,str)
importFrom(utils,tail)
importFrom(xml2,as_list)
importFrom(xml2,read_xml)
importFrom(xml2,write_xml)
Expand Down
34 changes: 32 additions & 2 deletions R/get_bucket.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#' @template bucket
#' @param prefix Character string that limits the response to keys that begin with the specified prefix
#' @param delimiter Character string used to group keys. Read the AWS doc for more detail.
#' @param max Integer indicating the maximum number of keys to return (max 1000).
#' @param max Integer indicating the maximum number of keys to return. The function will recursively access the bucket in case \code{max > 1000}. Use \code{max = Inf} to retrieve all objects.
#' @param marker Character string that pecifies the key to start with when listing objects in a bucket. Amazon S3 returns object keys in alphabetical order, starting with key after the marker in order.
#' @param parse_response logical, should we attempt to parse the response?
#' @template dots
Expand All @@ -20,6 +20,7 @@
#' @references \href{https://docs.aws.amazon.com/AmazonS3/latest/API/RESTBucketGET.html}{API Documentation}
#' @seealso \code{\link{bucketlist}}, \code{\link{get_object}}
#' @export
#' @importFrom utils tail
get_bucket <- function(bucket,
prefix = NULL,
delimiter = NULL,
Expand All @@ -28,9 +29,38 @@ get_bucket <- function(bucket,
parse_response = TRUE,
...) {

query <- list(prefix = prefix, delimiter = delimiter, "max-keys" = max, marker = marker)
if (is.null(max)) {
query <- list(prefix = prefix, delimiter = delimiter, "max-keys" = NULL, marker = marker)
} else {
query <- list(prefix = prefix, delimiter = delimiter, "max-keys" = pmin(1000, max), marker = marker)
}
r <- s3HTTP(verb = "GET", bucket = bucket, query = query, parse_response = parse_response, ...)

while (
r$IsTruncated == "true" &&
!is.null(max) &&
as.integer(r$MaxKeys) < max
) {
query <- list(
prefix = prefix,
delimiter = delimiter,
"max-keys" = pmin(max - as.integer(r$MaxKeys), 1000),
marker = tail(r, 1)$Contents$Key
)
extra <- s3HTTP(verb = "GET", bucket = bucket, query = query, parse_response = parse_response, ...)
new_r <- c(r, tail(extra, -5))
new_r$MaxKeys <- as.character(as.integer(r$MaxKeys) + as.integer(extra$MaxKeys))
new_r$IsTruncated <- extra$IsTruncated
attr(new_r, "x-amz-id-2") <- attr(r, "x-amz-id-2")
attr(new_r, "x-amz-request-id") <- attr(r, "x-amz-request-id")
attr(new_r, "date") <- attr(r, "date")
attr(new_r, "x-amz-bucket-region") <- attr(r, "x-amz-bucket-region")
attr(new_r, "content-type") <- attr(r, "content-type")
attr(new_r, "transfer-encoding") <- attr(r, "transfer-encoding")
attr(new_r, "server") <- attr(r, "server")
r <- new_r
}

if (!isTRUE(parse_response)) {
return(r)
}
Expand Down
2 changes: 1 addition & 1 deletion man/get_bucket.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit bd42806

Please sign in to comment.