Skip to content

Commit 922d526

Browse files
author
Kristinn
authored
Added custom dataframe accessors. (#44)
* Added an accessor file to define customer dataframe accessors. * docstring change. * Added a test * Marked another test as flaky * Added another test to the decode accessor
1 parent 4d9aed3 commit 922d526

File tree

4 files changed

+111
-0
lines changed

4 files changed

+111
-0
lines changed

jupyter/docker/docker_build/00-import.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
from picatrix import notebook_init
1717
import ds4n6_lib as ds
1818

19+
# Add in the accessors to pandas.
20+
from laceworkjupyter import accessors
21+
1922
# Enable the Picatrix helpers.
2023
notebook_init.init()
2124

jupyter/laceworkjupyter/accessors.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""
2+
File that includes custom accessors to pandas.
3+
4+
This is a way to define custom accessors to either
5+
data frames or series objects.
6+
"""
7+
8+
import base64
9+
import urllib.parse
10+
11+
import pandas as pd
12+
13+
14+
@pd.api.extensions.register_series_accessor('decode')
15+
class DecodeAccessor:
16+
"""
17+
Accessor class for decoding data.
18+
"""
19+
def __init__(self, data):
20+
self.data = data
21+
22+
def _decode_string_base64(self, string_value):
23+
"""
24+
Returns decoded base64 string.
25+
26+
:param str string_value: The base64 decoded string.
27+
:return: A decoded string.
28+
"""
29+
decoded_string = base64.b64decode(string_value)
30+
try:
31+
return decoded_string.decode('utf8')
32+
except UnicodeDecodeError:
33+
return decoded_string
34+
35+
def base64(self):
36+
"""
37+
Takes a series with base64 encoded data and decodes it.
38+
"""
39+
return self.data.apply(self._decode_string_base64)
40+
41+
def base64_altchars(self, altchars):
42+
"""
43+
Takes a series with base64 encoded data and decodes it using altchars.
44+
45+
:param bytes altchars: A byte-like object of length 2 which specifies
46+
the alternative alphabet used instead of the '+' and '/'
47+
characters.
48+
:return: Decoded Base64 string.
49+
"""
50+
return self.data.apply(
51+
lambda x: base64.b64decode(x, altchars=altchars, validate=True))
52+
53+
def url_unquote(self):
54+
"""
55+
Takes a series with URL encoded characters and unquotes them.
56+
"""
57+
return self.data.apply(urllib.parse.unquote_plus)
58+
59+
60+
@pd.api.extensions.register_series_accessor('encode')
61+
class EncodeAccessor:
62+
"""
63+
Accessor class to encode data.
64+
"""
65+
def __init__(self, data):
66+
self.data = data
67+
68+
def base64(self):
69+
"""
70+
Returns base64 encoded data from a string series.
71+
"""
72+
return self.data.astype(bytes).apply(base64.b64encode)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""
2+
Test file for the local accessors.
3+
"""
4+
import pandas as pd
5+
6+
from laceworkjupyter import accessors # noqa: F401
7+
8+
9+
def test_decode_accessor():
10+
"""
11+
Tests the decode accessor.
12+
"""
13+
lines = [
14+
{'value': 12, 'some_string': 'VGhpcyBpcyBhIHN0cmluZw==', 'uri': 'http://mbl.is/%3Fstuff=r+1%20af'},
15+
{'value': 114, 'some_string': 'VGhpcyBpcyBhIGEgc2VjcmV0', 'uri': 'http://mbl.is/%3Fsfi=r+1%20af'},
16+
]
17+
frame = pd.DataFrame(lines)
18+
19+
decoded_series = frame.some_string.decode.base64()
20+
discovered_set = set(list(decoded_series.values))
21+
22+
expected_set = set([
23+
'This is a a secret', 'This is a string'])
24+
25+
assert expected_set == discovered_set
26+
27+
unquoted_series = frame.uri.decode.url_unquote()
28+
unquoted_set = set(list(unquoted_series.values))
29+
30+
expected_set = set([
31+
'http://mbl.is/?stuff=r 1 af',
32+
'http://mbl.is/?sfi=r 1 af'])
33+
34+
assert expected_set == unquoted_set

tests/api/v2/test_agent_access_tokens.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def test_agent_access_tokens_api_get(api):
3030
assert "data" in response.keys()
3131

3232

33+
@pytest.mark.flaky_test
3334
def test_agent_access_tokens_api_get_by_id(api):
3435
response = api.agent_access_tokens.get()
3536

@@ -47,6 +48,7 @@ def test_agent_access_tokens_api_get_by_id(api):
4748
assert "data" in response.keys()
4849

4950

51+
@pytest.mark.flaky_test
5052
def test_agent_access_tokens_api_search(api):
5153
assert AGENT_ACCESS_TOKEN_ID is not None
5254
if AGENT_ACCESS_TOKEN_ID:

0 commit comments

Comments
 (0)