Skip to content

Commit

Permalink
fix: parquet over http (#2565)
Browse files Browse the repository at this point in the history
reqwest's `content_length` method apparantly doesnt check for the casing
of "Content-Length". This seems like a bug in reqwest to me.

seanmonstar/reqwest#843


```rust

#[tokio::main]
async fn main() {
    let client = reqwest::Client::new();
    let head_res = client.head("https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits_compatible/athena/hits.parquet").send().await.unwrap();
    let content_length = head_res.content_length();
    assert_eq!(content_length, Some(14779976446))
}

```

closes #2561
  • Loading branch information
universalmind303 authored Feb 2, 2024
1 parent bb88f76 commit bbe15fb
Showing 1 changed file with 13 additions and 4 deletions.
17 changes: 13 additions & 4 deletions crates/datasources/src/object_store/http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,20 @@ impl ObjStoreAccess for HttpStoreAccess {
status, self.url
)));
}
// reqwest doesn't check the content length header, instead looks at the contents
// See: https://github.com/seanmonstar/reqwest/issues/843
let len: u64 = res
.headers()
.get("Content-Length")
.and_then(|v| v.to_str().ok())
.and_then(|v| v.parse().ok())
.unwrap_or_else(|| res.content_length().unwrap_or(0));
if len == 0 {
return Err(ObjectStoreSourceError::Static(
"Missing content-length header",
));
}

let len = res.content_length().ok_or(ObjectStoreSourceError::Static(
"Missing content-length header",
))?;

Ok(ObjectMeta {
location: location.clone(),
Expand All @@ -119,7 +129,6 @@ impl ObjStoreAccess for HttpStoreAccess {
let next = locations
.next()
.ok_or(ObjectStoreSourceError::Static("No locations provided"))?;

let objects = self
.list_globbed(&store, &next.path())
.await
Expand Down

0 comments on commit bbe15fb

Please sign in to comment.