Skip to content

Commit 73ffd7e

Browse files
authored
Merge pull request #57 from keboola/CM-873-php-csv-may-detect-linebreak-incorrectly-in-edge-case-v3
CM-873 May detect linebreak incorrectly in edge case
2 parents 4d7785e + b2c49ed commit 73ffd7e

12 files changed

+71
-42
lines changed

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.clrf.csv eol=crlf

composer.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@
2929
},
3030
"require-dev": {
3131
"ext-json": "*",
32-
"keboola/coding-standard": "^13.0",
32+
"keboola/coding-standard": "^15.0",
3333
"php-parallel-lint/php-parallel-lint": "^1.3",
34-
"phpstan/phpstan": "^1.4",
34+
"phpstan/phpstan": "^1.10",
3535
"phpunit/phpunit": ">=7.5 <=9.6",
36-
"phpstan/phpdoc-parser": "1.5.*"
36+
"phpstan/phpdoc-parser": "^1.25"
3737
},
3838
"scripts": {
3939
"phpstan": "phpstan analyse ./src ./tests --level=max --no-progress -c phpstan.neon",

phpcs.xml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,7 @@
2121
<rule ref="SlevomatCodingStandard.TypeHints.DeclareStrictTypes">
2222
<exclude name="SlevomatCodingStandard.TypeHints.DeclareStrictTypes.DeclareStrictTypesMissing"/>
2323
</rule>
24-
</ruleset>
24+
<rule ref="SlevomatCodingStandard.Functions.RequireTrailingCommaInDeclaration">
25+
<exclude name="SlevomatCodingStandard.Functions.RequireTrailingCommaInDeclaration.MissingTrailingComma"/>
26+
</rule>
27+
</ruleset>

phpstan-baseline-8+.neon

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,11 @@ parameters:
275275
count: 1
276276
path: tests/CsvReadTest.php
277277

278+
-
279+
message: "#^Method Keboola\\\\Csv\\\\Tests\\\\CsvReadTest\\:\\:testNewlineDetectionEdgecaseWithCrLf\\(\\) has no return type specified\\.$#"
280+
count: 1
281+
path: tests/CsvReadTest.php
282+
278283
-
279284
message: "#^Method Keboola\\\\Csv\\\\Tests\\\\CsvReadTest\\:\\:testParse\\(\\) has no return type specified\\.$#"
280285
count: 1
@@ -493,4 +498,4 @@ parameters:
493498
-
494499
message: "#^Parameter \\#1 \\$header of static method Keboola\\\\Csv\\\\UTF8BOMHelper\\:\\:detectAndRemoveBOM\\(\\) expects array, mixed given\\.$#"
495500
count: 1
496-
path: tests/UTF8BOMHelperTest.php
501+
path: tests/UTF8BOMHelperTest.php

phpstan-baseline.neon

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,11 @@ parameters:
280280
count: 1
281281
path: tests/CsvReadTest.php
282282

283+
-
284+
message: "#^Method Keboola\\\\Csv\\\\Tests\\\\CsvReadTest\\:\\:testNewlineDetectionEdgecaseWithCrLf\\(\\) has no return type specified\\.$#"
285+
count: 1
286+
path: tests/CsvReadTest.php
287+
283288
-
284289
message: "#^Method Keboola\\\\Csv\\\\Tests\\\\CsvReadTest\\:\\:testParse\\(\\) has no return type specified\\.$#"
285290
count: 1
@@ -498,4 +503,4 @@ parameters:
498503
-
499504
message: "#^Parameter \\#1 \\$header of static method Keboola\\\\Csv\\\\UTF8BOMHelper\\:\\:detectAndRemoveBOM\\(\\) expects array, mixed given\\.$#"
500505
count: 1
501-
path: tests/UTF8BOMHelperTest.php
506+
path: tests/UTF8BOMHelperTest.php

src/CsvOptions.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ protected function validateEnclosure($enclosure)
5050
if (strlen($enclosure) > 1) {
5151
throw new InvalidArgumentException(
5252
'Enclosure must be a single character. ' . json_encode($enclosure) . ' received',
53-
Exception::INVALID_PARAM
53+
Exception::INVALID_PARAM,
5454
);
5555
}
5656
}
@@ -64,14 +64,14 @@ protected function validateDelimiter($delimiter)
6464
if (strlen($delimiter) > 1) {
6565
throw new InvalidArgumentException(
6666
'Delimiter must be a single character. ' . json_encode($delimiter) . ' received',
67-
Exception::INVALID_PARAM
67+
Exception::INVALID_PARAM,
6868
);
6969
}
7070

7171
if (strlen($delimiter) === 0) {
7272
throw new InvalidArgumentException(
7373
'Delimiter cannot be empty.',
74-
Exception::INVALID_PARAM
74+
Exception::INVALID_PARAM,
7575
);
7676
}
7777
}

src/CsvReader.php

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class CsvReader extends AbstractCsvFile implements Iterator
1313
* @deprecated use Keboola\Csv\CsvOptions::DEFAULT_ENCLOSURE
1414
*/
1515
const DEFAULT_ESCAPED_BY = CsvOptions::DEFAULT_ESCAPED_BY;
16+
const SAMPLE_SIZE = 10000;
1617

1718
/**
1819
* @var int
@@ -92,7 +93,7 @@ protected function validateSkipLines($skipLines)
9293
if (!is_int($skipLines) || $skipLines < 0) {
9394
throw new InvalidArgumentException(
9495
"Number of lines to skip must be a positive integer. \"$skipLines\" received.",
95-
Exception::INVALID_PARAM
96+
Exception::INVALID_PARAM,
9697
);
9798
}
9899
}
@@ -106,14 +107,14 @@ protected function openCsvFile($fileName)
106107
if (!is_file($fileName)) {
107108
throw new Exception(
108109
'Cannot open file ' . $fileName,
109-
Exception::FILE_NOT_EXISTS
110+
Exception::FILE_NOT_EXISTS,
110111
);
111112
}
112113
$this->filePointer = @fopen($fileName, 'r');
113114
if (!$this->filePointer) {
114115
throw new Exception(
115116
"Cannot open file {$fileName} " . error_get_last()['message'],
116-
Exception::FILE_NOT_EXISTS
117+
Exception::FILE_NOT_EXISTS,
117118
);
118119
}
119120
}
@@ -124,7 +125,12 @@ protected function openCsvFile($fileName)
124125
protected function detectLineBreak()
125126
{
126127
@rewind($this->getFilePointer());
127-
$sample = @fread($this->getFilePointer(), 10000);
128+
$sample = @fread($this->getFilePointer(), self::SAMPLE_SIZE);
129+
if (substr((string) $sample, -1) === "\r") {
130+
// we might have hit the file in the middle of CR+LF, only getting CR
131+
@rewind($this->getFilePointer());
132+
$sample = @fread($this->getFilePointer(), self::SAMPLE_SIZE+1);
133+
}
128134

129135
return LineBreaksHelper::detectLineBreaks($sample, $this->getEnclosure(), $this->getEscapedBy());
130136
}
@@ -155,7 +161,7 @@ protected function validateLineBreak()
155161

156162
throw new InvalidArgumentException(
157163
"Invalid line break. Please use unix \\n or win \\r\\n line breaks.",
158-
Exception::INVALID_PARAM
164+
Exception::INVALID_PARAM,
159165
);
160166
}
161167

src/CsvWriter.php

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ private function validateLineBreak($lineBreak)
5555
throw new Exception(
5656
'Invalid line break: ' . json_encode($lineBreak) .
5757
' allowed line breaks: ' . json_encode($allowedLineBreaks),
58-
Exception::INVALID_PARAM
58+
Exception::INVALID_PARAM,
5959
);
6060
}
6161
}
@@ -72,13 +72,14 @@ protected function openCsvFile($fileName)
7272
throw new Exception(
7373
"Cannot open file {$fileName} " . $e->getMessage(),
7474
Exception::FILE_NOT_EXISTS,
75-
$e
75+
$e,
7676
);
7777
}
78+
7879
if (!$this->filePointer) {
7980
throw new Exception(
8081
"Cannot open file {$fileName} " . error_get_last()['message'],
81-
Exception::FILE_NOT_EXISTS
82+
Exception::FILE_NOT_EXISTS,
8283
);
8384
}
8485
}
@@ -99,7 +100,7 @@ public function writeRow(array $row)
99100
' Return: false' .
100101
' To write: ' . strlen($str) . ' Written: 0',
101102
Exception::WRITE_ERROR,
102-
$e
103+
$e,
103104
);
104105
}
105106

@@ -113,7 +114,7 @@ public function writeRow(array $row)
113114
($ret === false && error_get_last() ? 'Error: ' . error_get_last()['message'] : '') .
114115
' Return: ' . json_encode($ret) .
115116
' To write: ' . strlen($str) . ' Written: ' . (int) $ret,
116-
Exception::WRITE_ERROR
117+
Exception::WRITE_ERROR,
117118
);
118119
}
119120
}
@@ -137,7 +138,7 @@ public function rowToStr(array $row)
137138
)) {
138139
throw new Exception(
139140
'Cannot write data into column: ' . var_export($column, true),
140-
Exception::WRITE_ERROR
141+
Exception::WRITE_ERROR,
141142
);
142143
}
143144

tests/CsvReadTest.php

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,13 @@ public function testColumnsCount()
3131
self::assertEquals(9, $csv->getColumnsCount());
3232
}
3333

34+
public function testNewlineDetectionEdgecaseWithCrLf()
35+
{
36+
$this->expectNotToPerformAssertions();
37+
// this used to throw "Invalid line break. Please use unix \n or win \r\n line breaks." before the fix
38+
new CsvReader(__DIR__ . '/data/test-input-edgecase.crlf.csv');
39+
}
40+
3441
/**
3542
* @dataProvider validCsvFiles
3643
* @param string $fileName
@@ -266,7 +273,7 @@ public function testSkipsHeaders()
266273
CsvOptions::DEFAULT_DELIMITER,
267274
CsvOptions::DEFAULT_ENCLOSURE,
268275
CsvOptions::DEFAULT_ESCAPED_BY,
269-
1
276+
1,
270277
);
271278
self::assertEquals(['id', 'isImported'], $csvFile->getHeader());
272279
self::assertEquals([
@@ -285,7 +292,7 @@ public function testSkipNoLines()
285292
CsvOptions::DEFAULT_DELIMITER,
286293
CsvOptions::DEFAULT_ENCLOSURE,
287294
CsvOptions::DEFAULT_ESCAPED_BY,
288-
0
295+
0,
289296
);
290297
self::assertEquals(['id', 'isImported'], $csvFile->getHeader());
291298
self::assertEquals([
@@ -305,7 +312,7 @@ public function testSkipsMultipleLines()
305312
CsvOptions::DEFAULT_DELIMITER,
306313
CsvOptions::DEFAULT_ENCLOSURE,
307314
CsvOptions::DEFAULT_ESCAPED_BY,
308-
3
315+
3,
309316
);
310317
self::assertEquals(['id', 'isImported'], $csvFile->getHeader());
311318
self::assertEquals([
@@ -322,7 +329,7 @@ public function testSkipsOverflow()
322329
CsvOptions::DEFAULT_DELIMITER,
323330
CsvOptions::DEFAULT_ENCLOSURE,
324331
CsvOptions::DEFAULT_ESCAPED_BY,
325-
100
332+
100,
326333
);
327334
self::assertEquals(['id', 'isImported'], $csvFile->getHeader());
328335
self::assertEquals([], iterator_to_array($csvFile));
@@ -396,7 +403,7 @@ public function testInvalidSkipLines($skipLines, $message)
396403
CsvOptions::DEFAULT_DELIMITER,
397404
CsvOptions::DEFAULT_ENCLOSURE,
398405
CsvOptions::DEFAULT_ENCLOSURE,
399-
$skipLines
406+
$skipLines,
400407
);
401408
}
402409

@@ -475,9 +482,9 @@ public function testWriteReadInTheMiddle()
475482
'"1","first"',
476483
'"2","second"',
477484
'',
478-
]
485+
],
479486
),
480-
$data
487+
$data,
481488
);
482489
}
483490

@@ -525,7 +532,7 @@ public function testInvalidFile()
525532
public function testPerformance($fileContent, $expectedRows, $maxDuration)
526533
{
527534
self::markTestSkipped(
528-
'Run this test only manually. Because the duration is very different in local CI environment.'
535+
'Run this test only manually. Because the duration is very different in local CI environment.',
529536
);
530537

531538
try {

tests/CsvWriteTest.php

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,9 @@ public function testWrite()
7171
"\"columns with\nnew line\",\"columns with\ttab\"",
7272
'"column with \\n \\t \\\\","second col"',
7373
'',
74-
]
74+
],
7575
),
76-
$data
76+
$data,
7777
);
7878
}
7979

@@ -130,9 +130,9 @@ public function testWriteValidObject()
130130
'"col1","col2"' ,
131131
'"1","me string"',
132132
'',
133-
]
133+
],
134134
),
135-
$data
135+
$data,
136136
);
137137
}
138138

@@ -191,9 +191,9 @@ public function testWritePointer()
191191
[
192192
'"col1","col2"' ,
193193
'foo,bar',
194-
]
194+
],
195195
),
196-
$data
196+
$data,
197197
);
198198
}
199199

@@ -213,15 +213,15 @@ public function testInvalidPointer()
213213
$or = new LogicalOr();
214214
$or->setConstraints([
215215
new StringContains(
216-
'Cannot write to CSV file Return: 0 To write: 14 Written: 0'
216+
'Cannot write to CSV file Return: 0 To write: 14 Written: 0',
217217
),
218218
new StringContains(
219219
'Cannot write to CSV file Error: fwrite(): ' .
220-
'write of 14 bytes failed with errno=9 Bad file descriptor Return: false To write: 14 Written: 0'
220+
'write of 14 bytes failed with errno=9 Bad file descriptor Return: false To write: 14 Written: 0',
221221
),
222222
new StringContains(
223223
'Cannot write to CSV file Error: fwrite(): ' .
224-
'Write of 14 bytes failed with errno=9 Bad file descriptor Return: false To write: 14 Written: 0'
224+
'Write of 14 bytes failed with errno=9 Bad file descriptor Return: false To write: 14 Written: 0',
225225
),
226226
]);
227227
self::assertThat($e->getMessage(), $or);
@@ -238,7 +238,7 @@ public function testInvalidPointer2()
238238
$rows = [['col1', 'col2']];
239239
self::expectException(Exception::class);
240240
self::expectExceptionMessage(
241-
'a valid stream resource Return: false To write: 14 Written: '
241+
'a valid stream resource Return: false To write: 14 Written: ',
242242
);
243243
$csvFile->writeRow($rows[0]);
244244
}
@@ -258,7 +258,7 @@ public function testWriteLineBreak()
258258
$fileName,
259259
CsvOptions::DEFAULT_DELIMITER,
260260
CsvOptions::DEFAULT_ENCLOSURE,
261-
"\r\n"
261+
"\r\n",
262262
);
263263
$rows = [
264264
[
@@ -280,9 +280,9 @@ public function testWriteLineBreak()
280280
'"col1","col2"',
281281
'"val1","val2"',
282282
'',
283-
]
283+
],
284284
),
285-
$data
285+
$data,
286286
);
287287
}
288288
}

0 commit comments

Comments
 (0)