Skip to content

Commit

Permalink
Merge pull request #1452 from fishtown-analytics/feature/strip-seed-bom
Browse files Browse the repository at this point in the history
Handle seeds with utf-8 BOM (#1177)
  • Loading branch information
beckjake committed May 10, 2019
2 parents 7f7002f + 26427d2 commit d5774b3
Show file tree
Hide file tree
Showing 5 changed files with 567 additions and 5 deletions.
10 changes: 9 additions & 1 deletion core/dbt/clients/agate_helper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from codecs import BOM_UTF8

import dbt.compat

import agate

BOM = BOM_UTF8.decode('utf-8') # '\ufeff'

DEFAULT_TYPE_TESTER = agate.TypeTester(types=[
agate.data_types.Number(null_values=('null', '')),
agate.data_types.TimeDelta(null_values=('null', '')),
Expand Down Expand Up @@ -41,4 +46,7 @@ def as_matrix(table):


def from_csv(abspath):
return agate.Table.from_csv(abspath, column_types=DEFAULT_TYPE_TESTER)
with dbt.compat.open_file(abspath) as fp:
if fp.read(1) != BOM:
fp.seek(0)
return agate.Table.from_csv(fp, column_types=DEFAULT_TYPE_TESTER)
17 changes: 13 additions & 4 deletions core/dbt/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,20 @@ def to_native_string(s):

def write_file(path, s):
if WHICH_PYTHON == 2:
with codecs.open(path, 'w', encoding='utf-8') as f:
return f.write(to_string(s))
open = codecs.open
else:
with open(path, 'w') as f:
return f.write(to_string(s))
open = builtins.open
with open(path, 'w', encoding='utf-8') as f:
return f.write(to_string(s))


def open_file(path):
"""Open the path for reading. It must be utf-8 encoded."""
if WHICH_PYTHON == 2:
open = codecs.open
else:
open = builtins.open
return open(path, encoding='utf-8')


if WHICH_PYTHON == 2:
Expand Down
Loading

0 comments on commit d5774b3

Please sign in to comment.