From bd428067f8e449a1c53a72a40db9f763d09ce6ff Mon Sep 17 00:00:00 2001 From: ThierryO Date: Wed, 18 Jan 2017 16:46:01 +0100 Subject: [PATCH] get_bucket() handles case when max > 1000 --- NAMESPACE | 1 + R/get_bucket.R | 34 ++++++++++++++++++++++++++++++++-- man/get_bucket.Rd | 2 +- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 9ea5c6a..fb967b2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -92,6 +92,7 @@ importFrom(httr,warn_for_status) importFrom(tools,md5sum) importFrom(utils,URLencode) importFrom(utils,str) +importFrom(utils,tail) importFrom(xml2,as_list) importFrom(xml2,read_xml) importFrom(xml2,write_xml) diff --git a/R/get_bucket.R b/R/get_bucket.R index c87d3fb..08918ed 100644 --- a/R/get_bucket.R +++ b/R/get_bucket.R @@ -4,7 +4,7 @@ #' @template bucket #' @param prefix Character string that limits the response to keys that begin with the specified prefix #' @param delimiter Character string used to group keys. Read the AWS doc for more detail. -#' @param max Integer indicating the maximum number of keys to return (max 1000). +#' @param max Integer indicating the maximum number of keys to return. The function will recursively access the bucket in case \code{max > 1000}. Use \code{max = Inf} to retrieve all objects. #' @param marker Character string that pecifies the key to start with when listing objects in a bucket. Amazon S3 returns object keys in alphabetical order, starting with key after the marker in order. #' @param parse_response logical, should we attempt to parse the response? #' @template dots @@ -20,6 +20,7 @@ #' @references \href{https://docs.aws.amazon.com/AmazonS3/latest/API/RESTBucketGET.html}{API Documentation} #' @seealso \code{\link{bucketlist}}, \code{\link{get_object}} #' @export +#' @importFrom utils tail get_bucket <- function(bucket, prefix = NULL, delimiter = NULL, @@ -28,9 +29,38 @@ get_bucket <- function(bucket, parse_response = TRUE, ...) { - query <- list(prefix = prefix, delimiter = delimiter, "max-keys" = max, marker = marker) + if (is.null(max)) { + query <- list(prefix = prefix, delimiter = delimiter, "max-keys" = NULL, marker = marker) + } else { + query <- list(prefix = prefix, delimiter = delimiter, "max-keys" = pmin(1000, max), marker = marker) + } r <- s3HTTP(verb = "GET", bucket = bucket, query = query, parse_response = parse_response, ...) + while ( + r$IsTruncated == "true" && + !is.null(max) && + as.integer(r$MaxKeys) < max + ) { + query <- list( + prefix = prefix, + delimiter = delimiter, + "max-keys" = pmin(max - as.integer(r$MaxKeys), 1000), + marker = tail(r, 1)$Contents$Key + ) + extra <- s3HTTP(verb = "GET", bucket = bucket, query = query, parse_response = parse_response, ...) + new_r <- c(r, tail(extra, -5)) + new_r$MaxKeys <- as.character(as.integer(r$MaxKeys) + as.integer(extra$MaxKeys)) + new_r$IsTruncated <- extra$IsTruncated + attr(new_r, "x-amz-id-2") <- attr(r, "x-amz-id-2") + attr(new_r, "x-amz-request-id") <- attr(r, "x-amz-request-id") + attr(new_r, "date") <- attr(r, "date") + attr(new_r, "x-amz-bucket-region") <- attr(r, "x-amz-bucket-region") + attr(new_r, "content-type") <- attr(r, "content-type") + attr(new_r, "transfer-encoding") <- attr(r, "transfer-encoding") + attr(new_r, "server") <- attr(r, "server") + r <- new_r + } + if (!isTRUE(parse_response)) { return(r) } diff --git a/man/get_bucket.Rd b/man/get_bucket.Rd index 025725f..666aa01 100644 --- a/man/get_bucket.Rd +++ b/man/get_bucket.Rd @@ -18,7 +18,7 @@ get_bucket_df(bucket, prefix = NULL, delimiter = NULL, max = NULL, \item{delimiter}{Character string used to group keys. Read the AWS doc for more detail.} -\item{max}{Integer indicating the maximum number of keys to return (max 1000).} +\item{max}{Integer indicating the maximum number of keys to return. The function will recursively access the bucket in case \code{max > 1000}. Use \code{max = Inf} to retrieve all objects.} \item{marker}{Character string that pecifies the key to start with when listing objects in a bucket. Amazon S3 returns object keys in alphabetical order, starting with key after the marker in order.}