Skip to content

Commit

Permalink
caddyfile: Loosen heredoc parsing
Browse files Browse the repository at this point in the history
- Prevents trying to parse heredoc while already inside quotes
- A space while parsing the heredoc marker immediately ends parsing and returns the token as-is
- Now only checks for <<< after reaching a newline so that it doesn't eagerly error if there's three <<< in a row on a token in the middle of a line
- Rearranged tests to cover more cases, more clearly
  • Loading branch information
francislavoie committed Aug 18, 2023
1 parent f11c3c9 commit 733175b
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 15 deletions.
16 changes: 13 additions & 3 deletions caddyconfig/caddyfile/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,18 +137,28 @@ func (l *lexer) next() (bool, error) {
}

// detect whether we have the start of a heredoc
if !inHeredoc && !heredocEscaped && len(val) > 1 && string(val[:2]) == "<<" {
if ch == '<' {
return false, fmt.Errorf("too many '<' for heredoc on line #%d; only use two, for example <<END", l.line)
if !(quoted || btQuoted) && !(inHeredoc || heredocEscaped) &&
len(val) > 1 && string(val[:2]) == "<<" {
// a space means it's just a regular token and not a heredoc
if ch == ' ' {
return makeToken(0), nil
}

// skip CR, we only care about LF
if ch == '\r' {
continue
}

// after hitting a newline, we know that the heredoc marker
// is the characters after the two << and the newline.
// we reset the val because the heredoc is syntax we don't
// want to keep.
if ch == '\n' {
// check if there's too many <
if string(val[:3]) == "<<<" {
return false, fmt.Errorf("too many '<' for heredoc on line #%d; only use two, for example <<END", l.line)
}

heredocMarker = string(val[2:])
if !heredocMarkerRegexp.Match([]byte(heredocMarker)) {
return false, fmt.Errorf("heredoc marker on line #%d must contain only alpha-numeric characters, dashes and underscores; got '%s'", l.line, heredocMarker)
Expand Down
60 changes: 48 additions & 12 deletions caddyconfig/caddyfile/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,15 +322,59 @@ EOF same-line-arg
},
},
{
input: []byte(`heredoc <EOF
input: []byte(`escaped-heredoc \<< >>`),
expected: []Token{
{Line: 1, Text: `escaped-heredoc`},
{Line: 1, Text: `<<`},
{Line: 1, Text: `>>`},
},
},
{
input: []byte(`not-a-heredoc <EOF
content
EOF same-line-arg
`),
expected: []Token{
{Line: 1, Text: `heredoc`},
{Line: 1, Text: `not-a-heredoc`},
{Line: 1, Text: `<EOF`},
{Line: 2, Text: `content`},
{Line: 3, Text: `EOF`},
},
},
{
input: []byte(`not-a-heredoc <<<EOF content`),
expected: []Token{
{Line: 1, Text: `not-a-heredoc`},
{Line: 1, Text: `<<<EOF`},
{Line: 1, Text: `content`},
},
},
{
input: []byte(`not-a-heredoc "<<" ">>"`),
expected: []Token{
{Line: 1, Text: `not-a-heredoc`},
{Line: 1, Text: `<<`},
{Line: 1, Text: `>>`},
},
},
{
input: []byte(`not-a-heredoc << >>`),
expected: []Token{
{Line: 1, Text: `not-a-heredoc`},
{Line: 1, Text: `<<`},
{Line: 1, Text: `>>`},
},
},
{
input: []byte(`not-a-heredoc <<HERE SAME LINE
content
HERE same-line-arg
`),
expected: []Token{
{Line: 1, Text: `not-a-heredoc`},
{Line: 1, Text: `<<HERE`},
{Line: 1, Text: `SAME`},
{Line: 1, Text: `LINE`},
{Line: 2, Text: `content`},
{Line: 3, Text: `HERE`},
{Line: 3, Text: `same-line-arg`},
},
},
Expand Down Expand Up @@ -365,14 +409,6 @@ EOF same-line-arg
},
},
},
{
input: []byte(`heredoc <<HERE SAME LINE
content
HERE same-line-arg
`),
expectErr: true,
errorMessage: "heredoc marker on line #1 must contain only alpha-numeric characters, dashes and underscores; got 'HERE SAME LINE'",
},
{
input: []byte(`heredoc <<<EOF
content
Expand Down

0 comments on commit 733175b

Please sign in to comment.