From d800a5d11f1c1b25a2640dbd9619e34763975f75 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 19 May 2023 20:36:58 -0500 Subject: [PATCH 1/5] Expand integration test to simulate the problem --- tests/integration/standard/test_policies.py | 71 +++++++++++++++++---- 1 file changed, 57 insertions(+), 14 deletions(-) diff --git a/tests/integration/standard/test_policies.py b/tests/integration/standard/test_policies.py index 8f46306236..ce4a2445ae 100644 --- a/tests/integration/standard/test_policies.py +++ b/tests/integration/standard/test_policies.py @@ -14,12 +14,12 @@ from decimal import Decimal import os -import random import unittest from cassandra.cluster import ExecutionProfile, EXEC_PROFILE_DEFAULT from cassandra.policies import HostFilterPolicy, RoundRobinPolicy, SimpleConvictionPolicy, \ - WhiteListRoundRobinPolicy, ColDesc, AES256ColumnEncryptionPolicy, AES256_KEY_SIZE_BYTES + WhiteListRoundRobinPolicy, ColDesc, AES256ColumnEncryptionPolicy, AES256_KEY_SIZE_BYTES, \ + AES256_BLOCK_SIZE_BYTES from cassandra.pool import Host from cassandra.connection import DefaultEndPoint @@ -101,25 +101,29 @@ def _recreate_keyspace(self, session): session.execute("CREATE KEYSPACE foo WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}") session.execute("CREATE TABLE foo.bar(encrypted blob, unencrypted int, primary key(unencrypted))") + def _create_policy(self, key, iv = None): + cl_policy = AES256ColumnEncryptionPolicy(iv = iv) if iv else AES256ColumnEncryptionPolicy() + col_desc = ColDesc('foo','bar','encrypted') + cl_policy.add_column(col_desc, key, "int") + return (col_desc, cl_policy) + def test_end_to_end_prepared(self): # We only currently perform testing on a single type/expected value pair since CLE functionality is essentially # independent of the underlying type. We intercept data after it's been encoded when it's going out and before it's # encoded when coming back; the actual types of the data involved don't impact us. - expected = 12345 - expected_type = "int" + expected = 0 key = os.urandom(AES256_KEY_SIZE_BYTES) - cl_policy = AES256ColumnEncryptionPolicy() - col_desc = ColDesc('foo','bar','encrypted') - cl_policy.add_column(col_desc, key, expected_type) + (_, cl_policy) = self._create_policy(key) cluster = TestCluster(column_encryption_policy=cl_policy) session = cluster.connect() self._recreate_keyspace(session) prepared = session.prepare("insert into foo.bar (encrypted, unencrypted) values (?,?)") - session.execute(prepared, (expected,expected)) + for i in range(100): + session.execute(prepared, (i,i)) # A straight select from the database will now return the decrypted bits. We select both encrypted and unencrypted # values here to confirm that we don't interfere with regular processing of unencrypted vals. @@ -135,20 +139,20 @@ def test_end_to_end_prepared(self): def test_end_to_end_simple(self): - expected = 67890 - expected_type = "int" + expected = 1 key = os.urandom(AES256_KEY_SIZE_BYTES) - cl_policy = AES256ColumnEncryptionPolicy() - col_desc = ColDesc('foo','bar','encrypted') - cl_policy.add_column(col_desc, key, expected_type) + (col_desc, cl_policy) = self._create_policy(key) cluster = TestCluster(column_encryption_policy=cl_policy) session = cluster.connect() self._recreate_keyspace(session) # Use encode_and_encrypt helper function to populate date - session.execute("insert into foo.bar (encrypted, unencrypted) values (%s,%s)",(cl_policy.encode_and_encrypt(col_desc, expected), expected)) + for i in range(1,100): + self.assertIsNotNone(i) + encrypted = cl_policy.encode_and_encrypt(col_desc, i) + session.execute("insert into foo.bar (encrypted, unencrypted) values (%s,%s)", (encrypted, i)) # A straight select from the database will now return the decrypted bits. We select both encrypted and unencrypted # values here to confirm that we don't interfere with regular processing of unencrypted vals. @@ -161,3 +165,42 @@ def test_end_to_end_simple(self): (encrypted,unencrypted) = session.execute(prepared, [expected]).one() self.assertEquals(expected, encrypted) self.assertEquals(expected, unencrypted) + + def test_end_to_end_different_cle_contexts(self): + + expected = 2 + + key = os.urandom(AES256_KEY_SIZE_BYTES) + + # Simulate the creation of two AES256 policies at two different times. Python caches + # default param args at function definition time so a single value will be used any time + # the default val is used. Upshot is that within the same test we'll always have the same + # IV if we rely on the default args, so manually introduce some variation here to simulate + # what actually happens if you have two distinct sessions created at two different times. + iv1 = os.urandom(AES256_BLOCK_SIZE_BYTES) + (col_desc1, cl_policy1) = self._create_policy(key, iv=iv1) + cluster1 = TestCluster(column_encryption_policy=cl_policy1) + session1 = cluster1.connect() + self._recreate_keyspace(session1) + + # Use encode_and_encrypt helper function to populate date + for i in range(1,100): + self.assertIsNotNone(i) + encrypted = cl_policy1.encode_and_encrypt(col_desc1, i) + session1.execute("insert into foo.bar (encrypted, unencrypted) values (%s,%s)", (encrypted, i)) + session1.shutdown() + cluster1.shutdown() + + # Explicitly clear the class-level cache here; we're trying to simulate a second connection from a completely new process and + # that would entail not re-using any cached ciphers + AES256ColumnEncryptionPolicy._build_cipher.cache_clear() + cache_info = cl_policy1.cache_info() + self.assertEqual(cache_info.currsize, 0) + + iv2 = os.urandom(AES256_BLOCK_SIZE_BYTES) + (_, cl_policy2) = self._create_policy(key, iv=iv2) + cluster2 = TestCluster(column_encryption_policy=cl_policy2) + session2 = cluster2.connect() + (encrypted,unencrypted) = session2.execute("select encrypted, unencrypted from foo.bar where unencrypted = %s allow filtering", (expected,)).one() + self.assertEquals(expected, encrypted) + self.assertEquals(expected, unencrypted) From 10fb359f373cecd11dcb1c2535c46cf36d556a73 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 19 May 2023 20:41:27 -0500 Subject: [PATCH 2/5] Re-work default val for policy IV in order to avoid any issues with caching of default args --- cassandra/policies.py | 7 +++++-- tests/integration/standard/test_policies.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/cassandra/policies.py b/cassandra/policies.py index 36063abafe..bbff33cb89 100644 --- a/cassandra/policies.py +++ b/cassandra/policies.py @@ -1262,10 +1262,13 @@ class AES256ColumnEncryptionPolicy(ColumnEncryptionPolicy): # TODO: Need to find some way to expose mode options # (CBC etc.) without leaking classes from the underlying # impl here - def __init__(self, mode = modes.CBC, iv = os.urandom(AES256_BLOCK_SIZE_BYTES)): + def __init__(self, mode = modes.CBC, iv = None): self.mode = mode - self.iv = iv + + # Avoid defining IV with a default arg in order to stay away from + # any issues around the caching of default args + self.iv = iv or os.urandom(AES256_BLOCK_SIZE_BYTES) # ColData for a given ColDesc is always preserved. We only create a Cipher # when there's an actual need to for a given ColDesc diff --git a/tests/integration/standard/test_policies.py b/tests/integration/standard/test_policies.py index ce4a2445ae..8d88e66d3b 100644 --- a/tests/integration/standard/test_policies.py +++ b/tests/integration/standard/test_policies.py @@ -102,7 +102,7 @@ def _recreate_keyspace(self, session): session.execute("CREATE TABLE foo.bar(encrypted blob, unencrypted int, primary key(unencrypted))") def _create_policy(self, key, iv = None): - cl_policy = AES256ColumnEncryptionPolicy(iv = iv) if iv else AES256ColumnEncryptionPolicy() + cl_policy = AES256ColumnEncryptionPolicy() col_desc = ColDesc('foo','bar','encrypted') cl_policy.add_column(col_desc, key, "int") return (col_desc, cl_policy) From d37826423e7fb1f3d8b3b0437ac7624e4660e7b5 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 19 May 2023 21:12:52 -0500 Subject: [PATCH 3/5] Fix block cipher mode, add some tests for IV length, various other fixes --- cassandra/policies.py | 20 ++++++++++++-------- tests/unit/test_policies.py | 21 +++++++++++++++++++++ 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/cassandra/policies.py b/cassandra/policies.py index bbff33cb89..2adac6d9e6 100644 --- a/cassandra/policies.py +++ b/cassandra/policies.py @@ -1257,18 +1257,22 @@ def encode_and_encrypt(self, coldesc, obj): class AES256ColumnEncryptionPolicy(ColumnEncryptionPolicy): - # CBC uses an IV that's the same size as the block size - # - # TODO: Need to find some way to expose mode options - # (CBC etc.) without leaking classes from the underlying - # impl here - def __init__(self, mode = modes.CBC, iv = None): + def __init__(self, iv = None): - self.mode = mode + # Fix block cipher mode for now. IV size is a function of block cipher used + # so fixing this avoids (possibly unnecessary) validation logic here. + self.mode = modes.CBC + # CBC uses an IV that's the same size as the block size + # # Avoid defining IV with a default arg in order to stay away from # any issues around the caching of default args - self.iv = iv or os.urandom(AES256_BLOCK_SIZE_BYTES) + self.iv = iv + if self.iv: + if not len(self.iv) == AES256_BLOCK_SIZE_BYTES: + raise ValueError("AES256 column encryption policy uses CBC mode and therefore expects a 128-bit initialization vector") + else: + self.iv = os.urandom(AES256_BLOCK_SIZE_BYTES) # ColData for a given ColDesc is always preserved. We only create a Cipher # when there's an actual need to for a given ColDesc diff --git a/tests/unit/test_policies.py b/tests/unit/test_policies.py index 451d5c50c9..15711ff8eb 100644 --- a/tests/unit/test_policies.py +++ b/tests/unit/test_policies.py @@ -1540,6 +1540,23 @@ def test_add_column_invalid_key_size_raises(self): with self.assertRaises(ValueError): policy.add_column(coldesc, os.urandom(key_size), "blob") + def test_add_column_invalid_iv_size_raises(self): + def test_iv_size(iv_size): + policy = AES256ColumnEncryptionPolicy(iv = os.urandom(iv_size)) + policy.add_column(coldesc, os.urandom(AES256_KEY_SIZE_BYTES), "blob") + policy.encrypt(coldesc, os.urandom(128)) + + coldesc = ColDesc('ks1','table1','col1') + for iv_size in range(1,AES256_BLOCK_SIZE_BYTES - 1): + with self.assertRaises(ValueError): + test_iv_size(iv_size) + for iv_size in range(AES256_BLOCK_SIZE_BYTES + 1,(2 * AES256_BLOCK_SIZE_BYTES) - 1): + with self.assertRaises(ValueError): + test_iv_size(iv_size) + + # Finally, confirm that the expected IV size has no issue + test_iv_size(AES256_BLOCK_SIZE_BYTES) + def test_add_column_null_coldesc_raises(self): with self.assertRaises(ValueError): policy = AES256ColumnEncryptionPolicy() @@ -1610,6 +1627,10 @@ def test_decrypt_unknown_column(self): policy.decrypt(ColDesc('ks2','table2','col2'), encrypted_bytes) def test_cache_info(self): + + # Exclude any interference from tests above + AES256ColumnEncryptionPolicy._build_cipher.cache_clear() + coldesc1 = ColDesc('ks1','table1','col1') coldesc2 = ColDesc('ks2','table2','col2') coldesc3 = ColDesc('ks3','table3','col3') From 47dcda5b261da6b4ee0e83dc9b497ce0b1a25609 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 19 May 2023 21:20:10 -0500 Subject: [PATCH 4/5] Explicitly make caching key a function of key + iv only --- cassandra/policies.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cassandra/policies.py b/cassandra/policies.py index 2adac6d9e6..e8023c8a7a 100644 --- a/cassandra/policies.py +++ b/cassandra/policies.py @@ -1257,11 +1257,11 @@ def encode_and_encrypt(self, coldesc, obj): class AES256ColumnEncryptionPolicy(ColumnEncryptionPolicy): - def __init__(self, iv = None): + # Fix block cipher mode for now. IV size is a function of block cipher used + # so fixing this avoids (possibly unnecessary) validation logic here. + mode = modes.CBC - # Fix block cipher mode for now. IV size is a function of block cipher used - # so fixing this avoids (possibly unnecessary) validation logic here. - self.mode = modes.CBC + def __init__(self, iv = None): # CBC uses an IV that's the same size as the block size # @@ -1345,11 +1345,11 @@ def _get_cipher(self, coldesc): try: coldata = self.coldata[coldesc] - return AES256ColumnEncryptionPolicy._build_cipher(coldata.key, self.mode, self.iv) + return AES256ColumnEncryptionPolicy._build_cipher(coldata.key, self.iv) except KeyError: raise ValueError("Could not find column {}".format(coldesc)) # Explicitly use a class method here to avoid caching self @lru_cache(maxsize=128) - def _build_cipher(key, mode, iv): - return Cipher(algorithms.AES256(key), mode(iv)) + def _build_cipher(key, iv): + return Cipher(algorithms.AES256(key), AES256ColumnEncryptionPolicy.mode(iv)) From f4add8b67a12a61fe29f966405171413d2cdc627 Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Fri, 19 May 2023 21:28:23 -0500 Subject: [PATCH 5/5] Store IV along with encrypted bytes and extract them when decrypting --- cassandra/policies.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/cassandra/policies.py b/cassandra/policies.py index e8023c8a7a..3e302141a3 100644 --- a/cassandra/policies.py +++ b/cassandra/policies.py @@ -1261,7 +1261,7 @@ class AES256ColumnEncryptionPolicy(ColumnEncryptionPolicy): # so fixing this avoids (possibly unnecessary) validation logic here. mode = modes.CBC - def __init__(self, iv = None): + def __init__(self, iv=None): # CBC uses an IV that's the same size as the block size # @@ -1293,11 +1293,13 @@ def encrypt(self, coldesc, obj_bytes): cipher = self._get_cipher(coldesc) encryptor = cipher.encryptor() - return encryptor.update(padded_bytes) + encryptor.finalize() + return self.iv + encryptor.update(padded_bytes) + encryptor.finalize() - def decrypt(self, coldesc, encrypted_bytes): + def decrypt(self, coldesc, bytes): - cipher = self._get_cipher(coldesc) + iv = bytes[:AES256_BLOCK_SIZE_BYTES] + encrypted_bytes = bytes[AES256_BLOCK_SIZE_BYTES:] + cipher = self._get_cipher(coldesc, iv=iv) decryptor = cipher.decryptor() padded_bytes = decryptor.update(encrypted_bytes) + decryptor.finalize() @@ -1337,15 +1339,14 @@ def cache_info(self): def column_type(self, coldesc): return self.coldata[coldesc].type - def _get_cipher(self, coldesc): + def _get_cipher(self, coldesc, iv=None): """ Access relevant state from this instance necessary to create a Cipher and then get one, hopefully returning a cached instance if we've already done so (and it hasn't been evicted) """ - try: coldata = self.coldata[coldesc] - return AES256ColumnEncryptionPolicy._build_cipher(coldata.key, self.iv) + return AES256ColumnEncryptionPolicy._build_cipher(coldata.key, iv or self.iv) except KeyError: raise ValueError("Could not find column {}".format(coldesc))