Skip to content

Commit

Permalink
Add support for dlpack, expose python docs for DeviceQuantileDMatrix (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
RAMitchell committed Apr 1, 2020
1 parent 6601a64 commit 15f40e5
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 9 deletions.
3 changes: 3 additions & 0 deletions doc/python/python_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ Core Data Structure
:members:
:show-inheritance:

.. autoclass:: xgboost.DeviceQuantileDMatrix
:show-inheritance:

.. autoclass:: xgboost.Booster
:members:
:show-inheritance:
Expand Down
30 changes: 22 additions & 8 deletions python-package/xgboost/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,17 @@ def _maybe_dt_data(data, feature_names, feature_types,

return data, feature_names, feature_types

def _is_dlpack(x):
return 'PyCapsule' in str(type(x)) and "dltensor" in str(x)

# Just convert dlpack into cupy (zero copy)
def _maybe_dlpack_data(data, feature_names, feature_types):
if not _is_dlpack(data):
return data, feature_names, feature_types
from cupy import fromDlpack # pylint: disable=E0401
data = fromDlpack(data)
return data, feature_names, feature_types


def _convert_dataframes(data, feature_names, feature_types,
meta=None, meta_type=None):
Expand All @@ -399,6 +410,9 @@ def _convert_dataframes(data, feature_names, feature_types,
data, feature_names, feature_types = _maybe_cudf_dataframe(
data, feature_names, feature_types)

data, feature_names, feature_types = _maybe_dlpack_data(
data, feature_names, feature_types)

return data, feature_names, feature_types


Expand Down Expand Up @@ -439,7 +453,7 @@ def __init__(self, data, label=None, weight=None, base_margin=None,
"""Parameters
----------
data : os.PathLike/string/numpy.array/scipy.sparse/pd.DataFrame/
dt.Frame/cudf.DataFrame/cupy.array
dt.Frame/cudf.DataFrame/cupy.array/dlpack
Data source of DMatrix.
When data is string or os.PathLike type, it represents the path
libsvm format txt file, csv file (by specifying uri parameter
Expand Down Expand Up @@ -1028,12 +1042,12 @@ def feature_types(self, feature_types):
class DeviceQuantileDMatrix(DMatrix):
"""Device memory Data Matrix used in XGBoost for training with tree_method='gpu_hist'. Do not
use this for test/validation tasks as some information may be lost in quantisation. This
DMatrix is primarily designed to save memory in training and avoids intermediate steps,
directly creating a compressed representation for training without allocating additional
memory. Implementation does not currently consider weights in quantisation process(unlike
DMatrix).
DMatrix is primarily designed to save memory in training from device memory inputs by
avoiding intermediate storage. Implementation does not currently consider weights in
quantisation process(unlike DMatrix). Set max_bin to control the number of bins during
quantisation.
You can construct DeviceDMatrix from cupy/cudf
You can construct DeviceQuantileDMatrix from cupy/cudf/dlpack.
"""

def __init__(self, data, label=None, weight=None, base_margin=None,
Expand All @@ -1044,8 +1058,8 @@ def __init__(self, data, label=None, weight=None, base_margin=None,
nthread=None, max_bin=256):
self.max_bin = max_bin
if not (hasattr(data, "__cuda_array_interface__") or (
CUDF_INSTALLED and isinstance(data, CUDF_DataFrame))):
raise ValueError('Only cupy/cudf currently supported for DeviceDMatrix')
CUDF_INSTALLED and isinstance(data, CUDF_DataFrame)) or _is_dlpack(data)):
raise ValueError('Only cupy/cudf/dlpack currently supported for DeviceQuantileDMatrix')

super().__init__(data, label=label, weight=weight, base_margin=base_margin,
missing=missing,
Expand Down
16 changes: 15 additions & 1 deletion tests/python-gpu/test_from_cupy.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def _test_cupy_metainfo(DMatrixT):
assert np.array_equal(dmat.get_uint_info('group_ptr'), dmat_cupy.get_uint_info('group_ptr'))


class TestFromArrayInterface:
class TestFromCupy:
'''Tests for constructing DMatrix from data structure conforming Apache
Arrow specification.'''

Expand All @@ -122,3 +122,17 @@ def test_cupy_metainfo_simple_dmat(self):
@pytest.mark.skipif(**tm.no_cupy())
def test_cupy_metainfo_device_dmat(self):
_test_cupy_metainfo(xgb.DeviceQuantileDMatrix)

@pytest.mark.skipif(**tm.no_cupy())
def test_dlpack_simple_dmat(self):
import cupy as cp
n = 100
X = cp.random.random((n, 2))
xgb.DMatrix(X.toDlpack())

@pytest.mark.skipif(**tm.no_cupy())
def test_dlpack_device_dmat(self):
import cupy as cp
n = 100
X = cp.random.random((n, 2))
xgb.DeviceQuantileDMatrix(X.toDlpack())

0 comments on commit 15f40e5

Please sign in to comment.