Skip to content

Commit 7dc24fe

Browse files
author
Steve Lamb
committed
Merge pull request #74 from azavea/feature/handle_unicode_better
Feature/handle unicode better
2 parents 48df5fd + ac7d2db commit 7dc24fe

File tree

6 files changed

+90
-49
lines changed

6 files changed

+90
-49
lines changed

djqscsv/djqscsv.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ def write_csv(queryset, file_obj, **kwargs):
6969
if key not in DJQSCSV_KWARGS:
7070
csv_kwargs[key] = val
7171

72-
# add BOM to suppor CSVs in MS Excel
73-
file_obj.write(u'\ufeff'.encode('utf8'))
72+
# add BOM to support CSVs in MS Excel (for Windows only)
73+
file_obj.write(_safe_utf8_stringify(u'\ufeff'))
7474

7575
# the CSV must always be built from a values queryset
7676
# in order to introspect the necessary fields.
@@ -110,7 +110,7 @@ def write_csv(queryset, file_obj, **kwargs):
110110
name_map = dict((field, field) for field in field_names)
111111
if use_verbose_names:
112112
name_map.update(
113-
dict((field.name, field.verbose_name.encode('utf-8'))
113+
dict((field.name, field.verbose_name)
114114
for field in queryset.model._meta.fields
115115
if field.name in field_names))
116116

@@ -119,6 +119,9 @@ def write_csv(queryset, file_obj, **kwargs):
119119
merged_header_map.update(field_header_map)
120120
if extra_columns:
121121
merged_header_map.update(dict((k, k) for k in extra_columns))
122+
123+
merged_header_map = dict((k, _safe_utf8_stringify(v))
124+
for (k, v) in merged_header_map.items())
122125
writer.writerow(merged_header_map)
123126

124127
for record in values_qs:
@@ -155,6 +158,15 @@ def _validate_and_clean_filename(filename):
155158
return filename
156159

157160

161+
def _safe_utf8_stringify(value):
162+
if isinstance(value, str):
163+
return value
164+
elif isinstance(value, unicode):
165+
return value.encode('utf-8')
166+
else:
167+
return unicode(value).encode('utf-8')
168+
169+
158170
def _sanitize_unicode_record(field_serializer_map, record):
159171

160172
def _serialize_value(value):
@@ -165,21 +177,12 @@ def _serialize_value(value):
165177
else:
166178
return unicode(value)
167179

168-
def _sanitize_text(value):
169-
# make sure every text value is of type 'str', coercing unicode
170-
if isinstance(value, unicode):
171-
return value.encode("utf-8")
172-
elif isinstance(value, str):
173-
return value
174-
else:
175-
return str(value).encode("utf-8")
176-
177180
obj = {}
178181
for key, val in six.iteritems(record):
179182
if val is not None:
180183
serializer = field_serializer_map.get(key, _serialize_value)
181184
newval = serializer(val)
182-
obj[_sanitize_text(key)] = _sanitize_text(newval)
185+
obj[_safe_utf8_stringify(key)] = _safe_utf8_stringify(newval)
183186

184187
return obj
185188

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name='django-queryset-csv',
8-
version='0.3.0',
8+
version='0.3.1',
99
description='A simple python module for writing querysets to csv',
1010
long_description=open('README.rst').read(),
1111
author=author,

test_app/djqscsv_tests/models.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,22 @@
11
from django.db import models
22

3+
from django.utils.translation import ugettext as _
4+
5+
from datetime import datetime
6+
7+
SOME_TIME = datetime(2001, 01, 01, 01, 01)
8+
9+
310
class Activity(models.Model):
411
name = models.CharField(max_length=50, verbose_name="Name of Activity")
512

13+
614
class Person(models.Model):
7-
name = models.CharField(max_length=50, verbose_name="Person's name")
15+
name = models.CharField(max_length=50, verbose_name=_("Person's name"))
816
address = models.CharField(max_length=255)
917
info = models.TextField(verbose_name="Info on Person")
1018
hobby = models.ForeignKey(Activity)
19+
born = models.DateTimeField(default=SOME_TIME)
1120

1221
def __unicode__(self):
1322
return self.name

test_app/djqscsv_tests/tests/test_csv_creation.py

Lines changed: 39 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111

1212
from djqscsv_tests.context import SELECT, EXCLUDE, AS, CONSTANT
1313

14-
from djqscsv_tests.models import Person
15-
1614
from djqscsv_tests.util import create_people_and_get_queryset
1715

1816
from django.utils import six
@@ -23,6 +21,7 @@
2321
else:
2422
from StringIO import StringIO
2523

24+
2625
class CSVTestCase(TestCase):
2726

2827
def setUp(self):
@@ -38,7 +37,8 @@ def csv_match(self, csv_file, expected_data, **csv_kwargs):
3837
for csv_row, expected_row in test_pairs:
3938
if is_first:
4039
# add the BOM to the data
41-
expected_row = ['\xef\xbb\xbf' + expected_row[0]] + expected_row[1:]
40+
expected_row = (['\xef\xbb\xbf' + expected_row[0]] +
41+
expected_row[1:])
4242
is_first = False
4343
iteration_happened = True
4444
assertion_results.append(csv_row == expected_row)
@@ -55,7 +55,6 @@ def assertNotMatchesCsv(self, *args, **kwargs):
5555
assertion_results = self.csv_match(*args, **kwargs)
5656
self.assertFalse(all(assertion_results))
5757

58-
5958
def assertQuerySetBecomesCsv(self, qs, expected_data, **kwargs):
6059
obj = StringIO()
6160
djqscsv.write_csv(qs, obj, **kwargs)
@@ -68,27 +67,30 @@ def assertEmptyQuerySetMatches(self, expected_data, **kwargs):
6867
if DJANGO_VERSION[:2] == (1, 5):
6968
with self.assertRaises(djqscsv.CSVException):
7069
djqscsv.write_csv(qs, obj)
71-
elif DJANGO_VERSION[:2] == (1, 6):
70+
else:
7271
djqscsv.write_csv(qs, obj,
7372
**kwargs)
7473
self.assertEqual(obj.getvalue(), expected_data)
7574

76-
7775
# the csv data that is returned by the most inclusive query under test.
7876
# use this data structure to build smaller data sets
7977
BASE_CSV = [
8078
['id', 'name', 'address',
81-
'info', 'hobby_id', 'hobby__name', 'Most Powerful'],
82-
['1', 'vetch', 'iffish', 'wizard', '1', 'Doing Magic', '0'],
83-
['2', 'nemmerle', 'roke', 'deceased arch mage', '2', 'Resting', '1'],
84-
['3', 'ged', 'gont', 'former arch mage', '2', 'Resting', '1']]
79+
'info', 'hobby_id', 'born', 'hobby__name', 'Most Powerful'],
80+
['1', 'vetch', 'iffish',
81+
'wizard', '1', '2001-01-01T01:01:00', 'Doing Magic', '0'],
82+
['2', 'nemmerle', 'roke',
83+
'deceased arch mage', '2', '2001-01-01T01:01:00', 'Resting', '1'],
84+
['3', 'ged', 'gont',
85+
'former arch mage', '2', '2001-01-01T01:01:00', 'Resting', '1']]
8586

8687
FULL_PERSON_CSV_WITH_RELATED = SELECT(BASE_CSV,
8788
AS('id', 'ID'),
8889
AS('name', 'Person\'s name'),
8990
'address',
9091
AS('info', 'Info on Person'),
9192
'hobby_id',
93+
'born',
9294
'hobby__name')
9395

9496
FULL_PERSON_CSV = EXCLUDE(FULL_PERSON_CSV_WITH_RELATED,
@@ -115,11 +117,11 @@ def test_write_csv_full_no_verbose(self):
115117
def test_write_csv_limited_no_verbose(self):
116118
qs = self.qs.values('name', 'address', 'info')
117119
self.assertQuerySetBecomesCsv(qs, self.LIMITED_PERSON_CSV_NO_VERBOSE,
118-
use_verbose_names=False)
120+
use_verbose_names=False)
119121

120122
def test_empty_queryset_no_verbose(self):
121123
self.assertEmptyQuerySetMatches(
122-
'\xef\xbb\xbfid,name,address,info,hobby_id\r\n',
124+
'\xef\xbb\xbfid,name,address,info,hobby_id,born\r\n',
123125
use_verbose_names=False)
124126

125127

@@ -135,13 +137,18 @@ def test_write_csv_limited(self):
135137
def test_empty_queryset(self):
136138
self.assertEmptyQuerySetMatches(
137139
'\xef\xbb\xbfID,Person\'s name,address,'
138-
'Info on Person,hobby_id\r\n')
140+
'Info on Person,hobby_id,born\r\n')
141+
139142

140143
class FieldHeaderMapTests(CSVTestCase):
141144
def test_write_csv_full_custom_headers(self):
142-
overridden_info_csv = ([['ID', "Person's name", 'address',
143-
'INFORMATION', 'hobby_id']] +
144-
self.FULL_PERSON_CSV[1:])
145+
overridden_info_csv = SELECT(self.FULL_PERSON_CSV,
146+
'ID',
147+
"Person's name",
148+
'address',
149+
AS('Info on Person', 'INFORMATION'),
150+
'hobby_id',
151+
'born')
145152

146153
self.assertQuerySetBecomesCsv(
147154
self.qs, overridden_info_csv,
@@ -155,8 +162,7 @@ def test_write_csv_limited_custom_headers(self):
155162

156163
self.assertQuerySetBecomesCsv(
157164
qs, overridden_info_csv,
158-
field_header_map={ 'info': 'INFORMATION' })
159-
165+
field_header_map={'info': 'INFORMATION'})
160166

161167
def test_write_csv_with_related_custom_headers(self):
162168
overridden_csv = SELECT(self.FULL_PERSON_CSV_WITH_RELATED,
@@ -166,23 +172,25 @@ def test_write_csv_with_related_custom_headers(self):
166172

167173
self.assertQuerySetBecomesCsv(
168174
qs, overridden_csv,
169-
field_header_map={ 'hobby__name': 'Name of Activity' })
175+
field_header_map={'hobby__name': 'Name of Activity'})
170176

171177
def test_empty_queryset_custom_headers(self):
172178
self.assertEmptyQuerySetMatches(
173-
'\xef\xbb\xbfID,Person\'s name,address,INFORMATION,hobby_id\r\n',
174-
field_header_map={ 'info': 'INFORMATION' })
179+
'\xef\xbb\xbfID,Person\'s name,'
180+
'address,INFORMATION,hobby_id,born\r\n',
181+
field_header_map={'info': 'INFORMATION'})
175182

176183

177184
class WalkRelationshipTests(CSVTestCase):
178185

179186
def test_with_related(self):
180187

181188
qs = self.qs.values('id', 'name', 'address', 'info',
182-
'hobby_id', 'hobby__name')
189+
'hobby_id', 'born', 'hobby__name')
183190

184191
self.assertQuerySetBecomesCsv(qs, self.FULL_PERSON_CSV_WITH_RELATED)
185192

193+
186194
class ColumnOrderingTests(CSVTestCase):
187195
def setUp(self):
188196
self.qs = create_people_and_get_queryset()
@@ -208,16 +216,17 @@ def test_no_values_matches_models_file(self):
208216
'name',
209217
'address',
210218
'info',
211-
'hobby_id')
212-
219+
'hobby_id',
220+
'born')
213221
self.assertQuerySetBecomesCsv(self.qs, csv,
214222
use_verbose_names=False)
215223

216224

217225
class AggregateTests(CSVTestCase):
218226

219227
def setUp(self):
220-
self.qs = create_people_and_get_queryset().annotate(num_hobbies=Count('hobby'))
228+
self.qs = (create_people_and_get_queryset()
229+
.annotate(num_hobbies=Count('hobby')))
221230

222231
def test_aggregate(self):
223232
csv_with_aggregate = SELECT(self.FULL_PERSON_CSV,
@@ -226,6 +235,7 @@ def test_aggregate(self):
226235
'address',
227236
"Info on Person",
228237
'hobby_id',
238+
'born',
229239
CONSTANT('1', 'num_hobbies'))
230240
self.assertQuerySetBecomesCsv(self.qs, csv_with_aggregate)
231241

@@ -234,7 +244,7 @@ class ExtraOrderingTests(CSVTestCase):
234244

235245
def setUp(self):
236246
self.qs = create_people_and_get_queryset().extra(
237-
select={'Most Powerful':"info LIKE '%arch mage%'"})
247+
select={'Most Powerful': "info LIKE '%arch mage%'"})
238248

239249
def test_extra_select(self):
240250
csv_with_extra = SELECT(self.BASE_CSV,
@@ -243,19 +253,20 @@ def test_extra_select(self):
243253
'address',
244254
AS('info', 'Info on Person'),
245255
'hobby_id',
256+
'born',
246257
'Most Powerful')
247258

248259
self.assertQuerySetBecomesCsv(self.qs, csv_with_extra)
249260

250-
251261
def test_extra_select_ordering(self):
252262
custom_order_csv = SELECT(self.BASE_CSV,
253263
AS('id', 'ID'),
254264
'Most Powerful',
255265
AS('name', "Person's name"),
256266
'address',
257267
AS('info', 'Info on Person'),
258-
'hobby_id')
268+
'hobby_id',
269+
'born')
259270

260271
self.assertQuerySetBecomesCsv(self.qs, custom_order_csv,
261272
field_order=['id', 'Most Powerful'])
@@ -283,7 +294,6 @@ def test_render_to_csv_response_no_filename(self):
283294
self.assertRegexpMatches(response['Content-Disposition'],
284295
r'attachment; filename=person_export.csv;')
285296

286-
287297
def test_render_to_csv_response(self):
288298
response = djqscsv.render_to_csv_response(self.qs,
289299
filename="test_csv",
@@ -292,7 +302,6 @@ def test_render_to_csv_response(self):
292302
self.assertMatchesCsv(response.content.split('\n'),
293303
self.FULL_PERSON_CSV_NO_VERBOSE)
294304

295-
296305
def test_render_to_csv_response_other_delimiter(self):
297306
response = djqscsv.render_to_csv_response(self.qs,
298307
filename="test_csv",
@@ -304,7 +313,6 @@ def test_render_to_csv_response_other_delimiter(self):
304313
self.FULL_PERSON_CSV_NO_VERBOSE,
305314
delimiter="|")
306315

307-
308316
def test_render_to_csv_fails_on_delimiter_mismatch(self):
309317
response = djqscsv.render_to_csv_response(self.qs,
310318
filename="test_csv",

test_app/djqscsv_tests/tests/test_utilities.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
import datetime
23

34
from django.test import TestCase
@@ -11,6 +12,7 @@
1112
# csv creation process, but don't participate in it
1213
# directly.
1314

15+
1416
class ValidateCleanFilenameTests(TestCase):
1517

1618
def assertValidatedEquals(self, filename, expected_value):
@@ -63,14 +65,14 @@ def test_sanitize_date_with_non_string_formatter(self):
6365
this practice.
6466
"""
6567
record = {'name': 'Tenar'}
66-
serializer = {'name': lambda d: len(d) }
68+
serializer = {'name': lambda d: len(d)}
6769
sanitized = djqscsv._sanitize_unicode_record(serializer, record)
6870
self.assertEqual(sanitized, {'name': '5'})
6971

7072
def test_sanitize_date_with_formatter(self):
7173
record = {'name': 'Tenar',
7274
'created': datetime.datetime(1973, 5, 13)}
73-
serializer = {'created': lambda d: d.strftime('%Y-%m-%d') }
75+
serializer = {'created': lambda d: d.strftime('%Y-%m-%d')}
7476
sanitized = djqscsv._sanitize_unicode_record(serializer, record)
7577
self.assertEqual(sanitized,
7678
{'name': 'Tenar',
@@ -115,3 +117,23 @@ def test_generate_filename(self):
115117
r'person_export_[0-9]{8}.csv')
116118

117119

120+
class SafeUtf8EncodeTest(TestCase):
121+
def test_safe_utf8_encode(self):
122+
123+
class Foo(object):
124+
def __unicode__(self):
125+
return u'¯\_(ツ)_/¯'
126+
def __str_(self):
127+
return self.__unicode__().encode('utf-8')
128+
129+
for val in (u'¯\_(ツ)_/¯', 'plain', r'raw',
130+
b'123', 11312312312313L, False,
131+
datetime.datetime(2001, 01, 01),
132+
4, None, [], set(), Foo):
133+
134+
first_pass = djqscsv._safe_utf8_stringify(val)
135+
second_pass = djqscsv._safe_utf8_stringify(first_pass)
136+
third_pass = djqscsv._safe_utf8_stringify(second_pass)
137+
self.assertEqual(first_pass, second_pass)
138+
self.assertEqual(second_pass, third_pass)
139+
self.assertEqual(type(first_pass), type(third_pass))

test_app/djqscsv_tests/util.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,3 @@ def create_people_and_get_queryset():
1212
info='former arch mage', hobby=resting)
1313

1414
return Person.objects.all()
15-

0 commit comments

Comments
 (0)