Skip to content

Commit d8756c4

Browse files
authored
241 rewiew encoding before sending (#246)
* fix: Fix UTF-8 encoding when surrogates in string * doc: Update version
1 parent 2cbdad0 commit d8756c4

File tree

7 files changed

+49
-5
lines changed

7 files changed

+49
-5
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file.
44

55
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
66

7+
## [5.1.9] - 2024-02-06
8+
9+
### Added
10+
11+
- Tests for encoding of messages in Sender.
12+
13+
### Changed
14+
15+
- UFT-8 encoding with "replace" is now used by default in Sender.
16+
717
## [5.1.8] - 2024-01-10
818

919
### Added

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
The MIT License (MIT)
22

3-
Copyright (C) 2023 Devo, Inc.
3+
Copyright (C) 2024 Devo, Inc.
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ You can contact with us at _support@devo.com_.
203203

204204
MIT License
205205

206-
(C) 2023 Devo, Inc.
206+
(C) 2024 Devo, Inc.
207207

208208
Permission is hereby granted, free of charge, to any person obtaining a copy of
209209
this software and associated documentation files (the 'Software'), to deal in

devo/__version__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
__description__ = "Devo Python Library."
22
__url__ = "http://www.devo.com"
3-
__version__ = "5.1.8"
3+
__version__ = "5.1.9"
44
__author__ = "Devo"
55
__author_email__ = "support@devo.com"
66
__license__ = "MIT"
7-
__copyright__ = "Copyright 2023 Devo"
7+
__copyright__ = "Copyright 2024 Devo"

devo/sender/data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,7 @@ def __encode_record(record):
625625
:return: record encoded for PY3
626626
"""
627627
if not isinstance(record, bytes):
628-
return record.encode("utf-8")
628+
return record.encode("utf-8", "replace")
629629
return record
630630

631631
def __send_oc(self, record):

run_tests.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from tests.sender.test_number_lookup import TestLookup as SENDER_NUMBER_LOOKUP
2323
from tests.sender.test_read_csv import TestCSVRFC as SENDER_CSV
2424
from tests.sender.test_send_data import TestSender as SENDER_SEND_DATA
25+
from tests.sender.test_send_encoding import TestEncoding as SENDER_SEND_ENCODING
2526
from tests.sender.test_send_lookup import TestLookup as SENDER_SEND_LOOKUP
2627

2728
module_paths = {
@@ -38,6 +39,7 @@
3839
"SENDER_CSV": SENDER_CSV,
3940
"SENDER_NUMBER_LOOKUP": SENDER_NUMBER_LOOKUP,
4041
"SENDER_SEND_DATA": SENDER_SEND_DATA,
42+
"SENDER_SEND_ENCODING": SENDER_SEND_ENCODING,
4143
"SENDER_SEND_LOOKUP": SENDER_SEND_LOOKUP,
4244
}
4345

tests/sender/test_send_encoding.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import unittest
2+
from devo.sender.data import Sender
3+
4+
# This test case uses the private method __encode_record to test the encoding
5+
# of the records and needs some name mangling to access it.
6+
7+
8+
class TestEncoding(unittest.TestCase):
9+
10+
def test_encode_record_ascii(self):
11+
record = 'Hello' # ASCII Normal sequence
12+
encoded_record = Sender._Sender__encode_record(record)
13+
self.assertEqual(encoded_record, b'Hello')
14+
15+
def test_encode_record_utf8(self):
16+
record = 'Hello 🌍, こんにちは' # UTF-8 sequence
17+
encoded_record = Sender._Sender__encode_record(record)
18+
self.assertEqual(encoded_record, b'Hello \xf0\x9f\x8c\x8d, \xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf')
19+
20+
def test_encode_record_utf8_with_byte_sequence(self):
21+
record = '10 €' # UTF-8 valid byte sequence
22+
encoded_record = Sender._Sender__encode_record(record)
23+
self.assertEqual(encoded_record, b'10 \xe2\x82\xac')
24+
25+
def test_encode_record_utf16_surrogate(self):
26+
record = '\uD83D Hello' # UTF-16 sequence
27+
encoded_record = Sender._Sender__encode_record(record)
28+
self.assertEqual(encoded_record, b'? Hello')
29+
30+
31+
if __name__ == "__main__":
32+
unittest.main()

0 commit comments

Comments
 (0)