Skip to content

Commit

Permalink
PYTHON-1351 Convert cryptography to an optional dependency (datastax#…
Browse files Browse the repository at this point in the history
  • Loading branch information
absurdfarce authored and dkropachev committed Aug 9, 2024
1 parent c9c09f5 commit e9aa10d
Show file tree
Hide file tree
Showing 12 changed files with 421 additions and 318 deletions.
126 changes: 126 additions & 0 deletions cassandra/column_encryption/_policies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Copyright DataStax, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from collections import namedtuple
from functools import lru_cache

import logging
import os

log = logging.getLogger(__name__)

from cassandra.cqltypes import _cqltypes
from cassandra.policies import ColumnEncryptionPolicy

from cryptography.hazmat.primitives import padding
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes

AES256_BLOCK_SIZE = 128
AES256_BLOCK_SIZE_BYTES = int(AES256_BLOCK_SIZE / 8)
AES256_KEY_SIZE = 256
AES256_KEY_SIZE_BYTES = int(AES256_KEY_SIZE / 8)

ColData = namedtuple('ColData', ['key','type'])

class AES256ColumnEncryptionPolicy(ColumnEncryptionPolicy):

# CBC uses an IV that's the same size as the block size
#
# TODO: Need to find some way to expose mode options
# (CBC etc.) without leaking classes from the underlying
# impl here
def __init__(self, mode = modes.CBC, iv = os.urandom(AES256_BLOCK_SIZE_BYTES)):

self.mode = mode
self.iv = iv

# ColData for a given ColDesc is always preserved. We only create a Cipher
# when there's an actual need to for a given ColDesc
self.coldata = {}
self.ciphers = {}

def encrypt(self, coldesc, obj_bytes):

# AES256 has a 128-bit block size so if the input bytes don't align perfectly on
# those blocks we have to pad them. There's plenty of room for optimization here:
#
# * Instances of the PKCS7 padder should be managed in a bounded pool
# * It would be nice if we could get a flag from encrypted data to indicate
# whether it was padded or not
# * Might be able to make this happen with a leading block of flags in encrypted data
padder = padding.PKCS7(AES256_BLOCK_SIZE).padder()
padded_bytes = padder.update(obj_bytes) + padder.finalize()

cipher = self._get_cipher(coldesc)
encryptor = cipher.encryptor()
return encryptor.update(padded_bytes) + encryptor.finalize()

def decrypt(self, coldesc, encrypted_bytes):

cipher = self._get_cipher(coldesc)
decryptor = cipher.decryptor()
padded_bytes = decryptor.update(encrypted_bytes) + decryptor.finalize()

unpadder = padding.PKCS7(AES256_BLOCK_SIZE).unpadder()
return unpadder.update(padded_bytes) + unpadder.finalize()

def add_column(self, coldesc, key, type):

if not coldesc:
raise ValueError("ColDesc supplied to add_column cannot be None")
if not key:
raise ValueError("Key supplied to add_column cannot be None")
if not type:
raise ValueError("Type supplied to add_column cannot be None")
if type not in _cqltypes.keys():
raise ValueError("Type %s is not a supported type".format(type))
if not len(key) == AES256_KEY_SIZE_BYTES:
raise ValueError("AES256 column encryption policy expects a 256-bit encryption key")
self.coldata[coldesc] = ColData(key, _cqltypes[type])

def contains_column(self, coldesc):
return coldesc in self.coldata

def encode_and_encrypt(self, coldesc, obj):
if not coldesc:
raise ValueError("ColDesc supplied to encode_and_encrypt cannot be None")
if not obj:
raise ValueError("Object supplied to encode_and_encrypt cannot be None")
coldata = self.coldata.get(coldesc)
if not coldata:
raise ValueError("Could not find ColData for ColDesc %s".format(coldesc))
return self.encrypt(coldesc, coldata.type.serialize(obj, None))

def cache_info(self):
return AES256ColumnEncryptionPolicy._build_cipher.cache_info()

def column_type(self, coldesc):
return self.coldata[coldesc].type

def _get_cipher(self, coldesc):
"""
Access relevant state from this instance necessary to create a Cipher and then get one,
hopefully returning a cached instance if we've already done so (and it hasn't been evicted)
"""

try:
coldata = self.coldata[coldesc]
return AES256ColumnEncryptionPolicy._build_cipher(coldata.key, self.mode, self.iv)
except KeyError:
raise ValueError("Could not find column {}".format(coldesc))

# Explicitly use a class method here to avoid caching self
@lru_cache(maxsize=128)
def _build_cipher(key, mode, iv):
return Cipher(algorithms.AES256(key), mode(iv))
20 changes: 20 additions & 0 deletions cassandra/column_encryption/policies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright DataStax, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

try:
import cryptography
from cassandra.column_encryption._policies import *
except ImportError:
# Cryptography is not installed
pass
107 changes: 1 addition & 106 deletions cassandra/policies.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,31 +17,24 @@
from functools import lru_cache
from itertools import islice, cycle, groupby, repeat
import logging
import os
from random import randint, shuffle
from threading import Lock
import socket
import warnings

from cryptography.hazmat.primitives import padding
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
log = logging.getLogger(__name__)

from cassandra import WriteType as WT
from cassandra.connection import UnixSocketEndPoint
from cassandra.cqltypes import _cqltypes


# This is done this way because WriteType was originally
# defined here and in order not to break the API.
# It may removed in the next mayor.
WriteType = WT


from cassandra import ConsistencyLevel, OperationTimedOut

log = logging.getLogger(__name__)


class HostDistance(object):
"""
A measure of how "distant" a node is from the client, which
Expand Down Expand Up @@ -1397,7 +1390,6 @@ def _rethrow(self, *args, **kwargs):


ColDesc = namedtuple('ColDesc', ['ks', 'table', 'col'])
ColData = namedtuple('ColData', ['key','type'])

class ColumnEncryptionPolicy(object):
"""
Expand Down Expand Up @@ -1454,100 +1446,3 @@ def encode_and_encrypt(self, coldesc, obj):
statements.
"""
raise NotImplementedError()

AES256_BLOCK_SIZE = 128
AES256_BLOCK_SIZE_BYTES = int(AES256_BLOCK_SIZE / 8)
AES256_KEY_SIZE = 256
AES256_KEY_SIZE_BYTES = int(AES256_KEY_SIZE / 8)

class AES256ColumnEncryptionPolicy(ColumnEncryptionPolicy):

# CBC uses an IV that's the same size as the block size
#
# TODO: Need to find some way to expose mode options
# (CBC etc.) without leaking classes from the underlying
# impl here
def __init__(self, mode = modes.CBC, iv = os.urandom(AES256_BLOCK_SIZE_BYTES)):

self.mode = mode
self.iv = iv

# ColData for a given ColDesc is always preserved. We only create a Cipher
# when there's an actual need to for a given ColDesc
self.coldata = {}
self.ciphers = {}

def encrypt(self, coldesc, obj_bytes):

# AES256 has a 128-bit block size so if the input bytes don't align perfectly on
# those blocks we have to pad them. There's plenty of room for optimization here:
#
# * Instances of the PKCS7 padder should be managed in a bounded pool
# * It would be nice if we could get a flag from encrypted data to indicate
# whether it was padded or not
# * Might be able to make this happen with a leading block of flags in encrypted data
padder = padding.PKCS7(AES256_BLOCK_SIZE).padder()
padded_bytes = padder.update(obj_bytes) + padder.finalize()

cipher = self._get_cipher(coldesc)
encryptor = cipher.encryptor()
return encryptor.update(padded_bytes) + encryptor.finalize()

def decrypt(self, coldesc, encrypted_bytes):

cipher = self._get_cipher(coldesc)
decryptor = cipher.decryptor()
padded_bytes = decryptor.update(encrypted_bytes) + decryptor.finalize()

unpadder = padding.PKCS7(AES256_BLOCK_SIZE).unpadder()
return unpadder.update(padded_bytes) + unpadder.finalize()

def add_column(self, coldesc, key, type):

if not coldesc:
raise ValueError("ColDesc supplied to add_column cannot be None")
if not key:
raise ValueError("Key supplied to add_column cannot be None")
if not type:
raise ValueError("Type supplied to add_column cannot be None")
if type not in _cqltypes.keys():
raise ValueError("Type %s is not a supported type".format(type))
if not len(key) == AES256_KEY_SIZE_BYTES:
raise ValueError("AES256 column encryption policy expects a 256-bit encryption key")
self.coldata[coldesc] = ColData(key, _cqltypes[type])

def contains_column(self, coldesc):
return coldesc in self.coldata

def encode_and_encrypt(self, coldesc, obj):
if not coldesc:
raise ValueError("ColDesc supplied to encode_and_encrypt cannot be None")
if not obj:
raise ValueError("Object supplied to encode_and_encrypt cannot be None")
coldata = self.coldata.get(coldesc)
if not coldata:
raise ValueError("Could not find ColData for ColDesc %s".format(coldesc))
return self.encrypt(coldesc, coldata.type.serialize(obj, None))

def cache_info(self):
return AES256ColumnEncryptionPolicy._build_cipher.cache_info()

def column_type(self, coldesc):
return self.coldata[coldesc].type

def _get_cipher(self, coldesc):
"""
Access relevant state from this instance necessary to create a Cipher and then get one,
hopefully returning a cached instance if we've already done so (and it hasn't been evicted)
"""

try:
coldata = self.coldata[coldesc]
return AES256ColumnEncryptionPolicy._build_cipher(coldata.key, self.mode, self.iv)
except KeyError:
raise ValueError("Could not find column {}".format(coldesc))

# Explicitly use a class method here to avoid caching self
@lru_cache(maxsize=128)
def _build_cipher(key, mode, iv):
return Cipher(algorithms.AES256(key), mode(iv))
3 changes: 2 additions & 1 deletion docs/column_encryption.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ when it's created.
import os
from cassandra.policies import ColDesc, AES256ColumnEncryptionPolicy, AES256_KEY_SIZE_BYTES
from cassandra.policies import ColDesc
from cassandra.column_encryption.policies import AES256ColumnEncryptionPolicy, AES256_KEY_SIZE_BYTES
key = os.urandom(AES256_KEY_SIZE_BYTES)
cl_policy = AES256ColumnEncryptionPolicy()
Expand Down
25 changes: 23 additions & 2 deletions docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@ To check if the installation was successful, you can run::

python -c 'import cassandra; print cassandra.__version__'

It should print something like "3.22.0".
It should print something like "3.27.0".

.. _installation-datastax-graph:

(*Optional*) Graph
---------------------------
The driver provides an optional fluent graph API that depends on Apache TinkerPop (gremlinpython). It is
not installed by default. To be able to build Gremlin traversals, you need to install
the `graph` requirements::
the `graph` extra::

pip install scylla-driver[graph]

Expand Down Expand Up @@ -65,6 +65,27 @@ support this::

pip install scales

*Optional:* Column-Level Encryption (CLE) Support
--------------------------------------------------
The driver has built-in support for client-side encryption and
decryption of data. For more, see :doc:`column_encryption`.

CLE depends on the Python `cryptography <https://cryptography.io/en/latest/>`_ module.
When installing Python driver 3.27.0. the `cryptography` module is
also downloaded and installed.
If you are using Python driver 3.28.0 or later and want to use CLE, you must
install the `cryptography <https://cryptography.io/en/latest/>`_ module.

You can install this module along with the driver by specifying the `cle` extra::

pip install cassandra-driver[cle]

Alternatively, you can also install the module directly via `pip`::

pip install cryptography

Any version of cryptography >= 35.0 will work for the CLE feature. You can find additional
details at `PYTHON-1351 <https://datastax-oss.atlassian.net/browse/PYTHON-1351>`_

Speeding Up Installation
^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
cryptography >= 35.0
geomet>=0.1,<0.3
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,11 +421,11 @@ def run_setup(extensions):
'geomet>=0.1,<0.3',
'pyyaml > 5.0',
'six >=1.9',
'cryptography>=35.0'
]

_EXTRAS_REQUIRE = {
'graph': ['gremlinpython==3.4.6']
'graph': ['gremlinpython==3.4.6'],
'cle': ['cryptography>=35.0']
}

setup(
Expand All @@ -445,6 +445,7 @@ def run_setup(extensions):
'cassandra', 'cassandra.io', 'cassandra.cqlengine', 'cassandra.graph',
'cassandra.datastax', 'cassandra.datastax.insights', 'cassandra.datastax.graph',
'cassandra.datastax.graph.fluent', 'cassandra.datastax.cloud', 'cassandra.scylla'
"cassandra.column_encryption"
],
keywords='cassandra,cql,orm,dse,graph',
include_package_data=True,
Expand Down
1 change: 1 addition & 0 deletions test-datastax-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
-r test-requirements.txt
kerberos
gremlinpython==3.4.6
cryptography >= 35.0
Loading

0 comments on commit e9aa10d

Please sign in to comment.