Skip to content

Commit

Permalink
Fix normalization of consecutive slashes in uri path (#5114)
Browse files Browse the repository at this point in the history
Signed-off-by: Paolo Di Tommaso <paolo.ditommaso@gmail.com>
  • Loading branch information
pditommaso committed Jul 9, 2024
1 parent 4c207c2 commit 3f366b7
Show file tree
Hide file tree
Showing 6 changed files with 158 additions and 5 deletions.
39 changes: 39 additions & 0 deletions modules/nf-commons/src/main/nextflow/file/FileHelper.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,45 @@ class FileHelper {
return result.toAbsolutePath().normalize()
}

/**
* Remove consecutive slashes in a URI path. Ignore bu design any slash in the hostname and after the `?`
*
* @param uri The input URI as string
* @return The normalised URI string
*/
protected static String normalisePathSlashes0(String uri) {
if( !uri )
return uri
final SLASH = '/' as char
final QMARK = '?' as char
final scheme = getUrlProtocol(uri)
if( scheme==null || scheme=='file') {
// ignore for local files
return uri
}

// find first non-slash
final clean = scheme ? uri.substring(scheme.size()+1) : uri
final start = clean.findIndexOf(it->it!='/')
final result = new StringBuilder()
if( scheme )
result.append(scheme+':')
if( start )
result.append(clean.substring(0,start))
for( int i=start; i<clean.size(); i++ ) {
final ch = clean.charAt(i)
if( (ch!=SLASH) || i+1==clean.size() || (clean.charAt(i+1)!=SLASH)) {
if( ch==QMARK ) {
result.append(clean.substring(i))
break
}
else
result.append(clean.charAt(i))
}
}
return result.toString()
}

/**
* Given an hierarchical file URI path returns a {@link Path} object
* eventually creating the associated file system if required.
Expand Down
36 changes: 36 additions & 0 deletions modules/nf-commons/src/test/nextflow/file/FileHelperTest.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,17 @@ class FileHelperTest extends Specification {
path.toUri() == new URI('http://host.com/some/path/file.txt?x=1')
}

@Unroll
def 'should remove consecutive slashes in the path' () {
expect:
FileHelper.asPath(STR).toUri() == EXPECTED
where:
STR | EXPECTED
'http://foo//this///that' | new URI('http://foo/this/that')
'http://foo/this//that?x' | new URI('http://foo/this/that?x')
'http://foo/this//that?x//' | new URI('http://foo/this/that?x//') // <-- ignore slashes in the params
}

def 'should create a valid uri' () {

def URI uri
Expand Down Expand Up @@ -1088,6 +1099,7 @@ class FileHelperTest extends Specification {
Paths.get('/file.txt') | Paths.get('/file.txt')
and:
'http://foo/file.txt' | Paths.get(new URI('http://foo/file.txt'))
'http://foo/some///file.txt'| Paths.get(new URI('http://foo/some/file.txt'))
Paths.get(new URI('http://foo/file.txt')) | Paths.get(new URI('http://foo/file.txt'))

}
Expand Down Expand Up @@ -1118,4 +1130,28 @@ class FileHelperTest extends Specification {
Paths.get('/file.txt') | Paths.get('/file.txt')

}

@Unroll
def 'should remove invalid double slashed' () {
expect:
FileHelper.normalisePathSlashes0(PATH) == EXPECTED
where:
PATH | EXPECTED
null | null
'foo' | 'foo'
'http://foo/bar' | 'http://foo/bar'
'/some/file' | '/some/file'
'//some/file' | '//some/file' // <-- by design ignore multi-slash at root
'file:/some/file' | 'file:/some/file'
'file://some/file' | 'file://some/file'
'file:///some/file' | 'file:///some/file'
'http://example.com//path//to//resource' | 'http://example.com/path/to/resource'
'https://example.com////another///path/' | 'https://example.com/another/path/'
'https://example.com////another///path//' | 'https://example.com/another/path/'
'https://example.com////another///path/?and////' | 'https://example.com/another/path/?and////'
'https://example.com////another///path//?and////' | 'https://example.com/another/path/?and////'
's3://example.com////another///path//?and////' | 's3://example.com/another/path/?and////'
's3:///example.com////another///path' | 's3:///example.com/another/path'
'ftp://example.com//file//path' | 'ftp://example.com/file/path'
}
}
27 changes: 27 additions & 0 deletions plugins/nf-amazon/src/test/nextflow/file/FileHelperS3Test.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package nextflow.file

import java.nio.file.Path

import nextflow.Global
import nextflow.Session
import nextflow.SysEnv
import spock.lang.Specification
import spock.lang.Unroll
Expand Down Expand Up @@ -56,4 +58,29 @@ class FileHelperS3Test extends Specification {

}

def 'should convert to a canonical path' () {
given:
Global.session = Mock(Session) { getConfig() >> [:] }

expect:
FileHelper.toCanonicalPath(VALUE).toUri() == EXPECTED

where:
VALUE | EXPECTED
's3://foo/some/file.txt' | new URI('s3:///foo/some/file.txt')
's3://foo/some///file.txt' | new URI('s3:///foo/some/file.txt')
}

@Unroll
def 'should remove consecutive slashes in the path' () {
given:
Global.session = Mock(Session) { getConfig() >> [:] }

expect:
FileHelper.asPath(STR).toUri() == EXPECTED
where:
STR | EXPECTED
's3://foo//this/that' | new URI('s3:///foo/this/that')
's3://foo//this///that' | new URI('s3:///foo/this/that')
}
}
25 changes: 25 additions & 0 deletions plugins/nf-azure/src/test/nextflow/file/FileHelperAzTest.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,29 @@ class FileHelperAzTest extends Specification {

}

def 'should convert to a canonical path' () {
given:
Global.session = Mock(Session) { getConfig() >> [google:[project:'foo', region:'x']] }

expect:
FileHelper.toCanonicalPath(VALUE).toUri() == EXPECTED

where:
VALUE | EXPECTED
'az://foo/some/file.txt' | new URI('az://foo/some/file.txt')
'az://foo/some///file.txt' | new URI('az://foo/some/file.txt')
}

@Unroll
def 'should remove consecutive slashes in the path' () {
given:
Global.session = Mock(Session) { getConfig() >> [:] }

expect:
FileHelper.asPath(STR).toUri() == EXPECTED
where:
STR | EXPECTED
'az://foo//this/that' | new URI('az://foo/this/that')
'az://foo//this///that' | new URI('az://foo/this/that')
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,10 @@ class GsPathFactory extends FileSystemPathFactory {
init()
final str = uri.substring(5)
final p = str.indexOf('/')
return p == -1
final ret = p == -1
? CloudStorageFileSystem.forBucket(str, storageConfig, storageOptions).getPath('')
: CloudStorageFileSystem.forBucket(str.substring(0,p), storageConfig, storageOptions).getPath(str.substring(p))
return ret.normalize()
}

@Override
Expand Down
33 changes: 29 additions & 4 deletions plugins/nf-google/src/test/nextflow/file/FileHelperGsTest.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,12 @@ import java.nio.file.Path
import java.nio.file.Paths

import com.google.cloud.storage.contrib.nio.CloudStorageFileSystem
import nextflow.Global
import nextflow.Session
import nextflow.SysEnv
import spock.lang.Ignore
import spock.lang.Specification

import nextflow.Global
import nextflow.Session
import spock.lang.Unroll

/**
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
Expand Down Expand Up @@ -128,4 +126,31 @@ class FileHelperGsTest extends Specification {
'/file.txt' | '/file.txt'
Path.of('/file.txt') | '/file.txt'
}

def 'should convert to a canonical path' () {
given:
Global.session = Mock(Session) { getConfig() >> [google:[project:'foo', region:'x']] }

expect:
FileHelper.toCanonicalPath(VALUE).toUri() == EXPECTED

where:
VALUE | EXPECTED
'gs://foo/some/file.txt' | new URI('gs://foo/some/file.txt')
'gs://foo/some///file.txt' | new URI('gs://foo/some/file.txt')
}

@Unroll
def 'should remove consecutive slashes in the path' () {
given:
Global.session = Mock(Session) { getConfig() >> [google:[project:'foo', region:'x']] }

expect:
FileHelper.asPath(STR).toUri() == EXPECTED
where:
STR | EXPECTED
'gs://foo//this/that' | new URI('gs://foo/this/that')
'gs://foo//this///that' | new URI('gs://foo/this/that')
}

}

0 comments on commit 3f366b7

Please sign in to comment.