Skip to content

Commit

Permalink
Merge branch 'branch-24.10' into wence/fix/16893
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar committed Sep 25, 2024
2 parents bda24c1 + 4160423 commit 2aa5945
Show file tree
Hide file tree
Showing 18 changed files with 215 additions and 56 deletions.
2 changes: 1 addition & 1 deletion ci/run_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ python -m pytest \
-m "" \
-p cudf_polars.testing.plugin \
-v \
--tb=short \
--tb=native \
${DESELECTED_TESTS} \
"$@" \
py-polars/tests
3 changes: 1 addition & 2 deletions ci/test_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ python -m pip install ./local-pylibcudf-dep/pylibcudf*.whl
rapids-logger "Install cudf_polars"
python -m pip install $(echo ./dist/cudf_polars*.whl)

# TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")')
TAG="py-1.7.0"
TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")')
rapids-logger "Clone polars to ${TAG}"
git clone https://github.com/pola-rs/polars.git --branch ${TAG} --depth 1

Expand Down
5 changes: 1 addition & 4 deletions ci/test_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,14 @@ if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
| tee ./constraints.txt
fi

# echo to expand wildcard before adding `[extra]` requires for pip
# echo to expand wildcard before adding `[test]` requires for pip
python -m pip install \
-v \
--constraint ./constraints.txt \
"$(echo ./dist/cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
"$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"

rapids-logger "Pin to 1.7.0 Temporarily"
python -m pip install polars==1.7.0

rapids-logger "Run cudf_polars tests"

function set_exitcode()
Expand Down
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ dependencies:
common:
- output_types: [conda, requirements, pyproject]
packages:
- polars>=1.6
- polars>=1.8,<1.9
run_dask_cudf:
common:
- output_types: [conda, requirements, pyproject]
Expand Down
10 changes: 10 additions & 0 deletions python/cudf/cudf/pandas/fast_slow_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,6 +881,12 @@ def _assert_fast_slow_eq(left, right):
assert_eq(left, right)


class ProxyFallbackError(Exception):
"""Raised when fallback occurs"""

pass


def _fast_function_call():
"""
Placeholder fast function for pytest profiling purposes.
Expand Down Expand Up @@ -957,6 +963,10 @@ def _fast_slow_function_call(
f"The exception was {e}."
)
except Exception as err:
if _env_get_bool("CUDF_PANDAS_FAIL_ON_FALLBACK", False):
raise ProxyFallbackError(
f"The operation failed with cuDF, the reason was {type(err)}: {err}."
) from err
with nvtx.annotate(
"EXECUTE_SLOW",
color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"],
Expand Down
16 changes: 15 additions & 1 deletion python/cudf/cudf_pandas_tests/test_cudf_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@

from cudf.core._compat import PANDAS_GE_220
from cudf.pandas import LOADED, Profiler
from cudf.pandas.fast_slow_proxy import _Unusable, is_proxy_object
from cudf.pandas.fast_slow_proxy import (
ProxyFallbackError,
_Unusable,
is_proxy_object,
)
from cudf.testing import assert_eq

if not LOADED:
Expand Down Expand Up @@ -1738,3 +1742,13 @@ def add_one_ufunc(a):
return a + 1

assert_eq(cp.asarray(add_one_ufunc(arr1)), cp.asarray(add_one_ufunc(arr2)))


@pytest.mark.xfail(
reason="Fallback expected because casting to object is not supported",
)
def test_fallback_raises_error(monkeypatch):
with monkeypatch.context() as monkeycontext:
monkeycontext.setenv("CUDF_PANDAS_FAIL_ON_FALLBACK", "True")
with pytest.raises(ProxyFallbackError):
pd.Series(range(2)).astype(object)
100 changes: 100 additions & 0 deletions python/cudf/cudf_pandas_tests/test_cudf_pandas_no_fallback.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import pytest

from cudf.pandas import LOADED

if not LOADED:
raise ImportError("These tests must be run with cudf.pandas loaded")

import numpy as np
import pandas as pd


@pytest.fixture(autouse=True)
def fail_on_fallback(monkeypatch):
monkeypatch.setenv("CUDF_PANDAS_FAIL_ON_FALLBACK", "True")


@pytest.fixture
def dataframe():
df = pd.DataFrame(
{
"a": [1, 1, 1, 2, 3],
"b": [1, 2, 3, 4, 5],
"c": [1.2, 1.3, 1.5, 1.7, 1.11],
}
)
return df


@pytest.fixture
def series(dataframe):
return dataframe["a"]


@pytest.fixture
def array(series):
return series.values


@pytest.mark.parametrize(
"op",
[
"sum",
"min",
"max",
"mean",
"std",
"var",
"prod",
"median",
],
)
def test_no_fallback_in_reduction_ops(series, op):
s = series
getattr(s, op)()


def test_groupby(dataframe):
df = dataframe
df.groupby("a", sort=True).max()


def test_no_fallback_in_binops(dataframe):
df = dataframe
df + df
df - df
df * df
df**df
df[["a", "b"]] & df[["a", "b"]]
df <= df


def test_no_fallback_in_groupby_rolling_sum(dataframe):
df = dataframe
df.groupby("a").rolling(2).sum()


def test_no_fallback_in_concat(dataframe):
df = dataframe
pd.concat([df, df])


def test_no_fallback_in_get_shape(dataframe):
df = dataframe
df.shape


def test_no_fallback_in_array_ufunc_op(array):
np.add(array, array)


def test_no_fallback_in_merge(dataframe):
df = dataframe
pd.merge(df * df, df + df, how="inner")
pd.merge(df * df, df + df, how="outer")
pd.merge(df * df, df + df, how="left")
pd.merge(df * df, df + df, how="right")
8 changes: 5 additions & 3 deletions python/cudf_polars/cudf_polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@

from __future__ import annotations

# Check we have a supported polars version
import cudf_polars.utils.versions as v
from cudf_polars._version import __git_commit__, __version__
from cudf_polars.callback import execute_with_cudf
from cudf_polars.dsl.translate import translate_ir

del v
# Check we have a supported polars version
from cudf_polars.utils.versions import _ensure_polars_version

_ensure_polars_version()
del _ensure_polars_version

__all__: list[str] = [
"execute_with_cudf",
Expand Down
8 changes: 0 additions & 8 deletions python/cudf_polars/cudf_polars/dsl/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,6 @@ def _(
cloud_options = None
else:
reader_options, cloud_options = map(json.loads, options)
if (
typ == "csv"
and visitor.version()[0] == 1
and reader_options["schema"] is not None
):
reader_options["schema"] = {
"fields": reader_options["schema"]["inner"]
} # pragma: no cover; CI tests 1.7
file_options = node.file_options
with_columns = file_options.with_columns
n_rows = file_options.n_rows
Expand Down
14 changes: 9 additions & 5 deletions python/cudf_polars/cudf_polars/testing/asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,11 @@ def assert_collect_raises(
cudf-polars.
Useful for controlling optimization settings.
polars_except
Exception or exceptions polars CPU is expected to raise.
Exception or exceptions polars CPU is expected to raise. If
None, CPU is not expected to raise an exception.
cudf_except
Exception or exceptions polars GPU is expected to raise.
Exception or exceptions polars GPU is expected to raise. If
None, GPU is not expected to raise an exception.
collect_kwargs
Common keyword arguments to pass to collect for both polars CPU and
cudf-polars.
Expand Down Expand Up @@ -203,7 +205,8 @@ def assert_collect_raises(
f"CPU execution RAISED {type(e)}, EXPECTED {polars_except}"
) from e
else:
raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}")
if polars_except != ():
raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}")

engine = GPUEngine(raise_on_fail=True)
try:
Expand All @@ -212,7 +215,8 @@ def assert_collect_raises(
pass
except Exception as e:
raise AssertionError(
f"GPU execution RAISED {type(e)}, EXPECTED {polars_except}"
f"GPU execution RAISED {type(e)}, EXPECTED {cudf_except}"
) from e
else:
raise AssertionError(f"GPU execution DID NOT RAISE {polars_except}")
if cudf_except != ():
raise AssertionError(f"GPU execution DID NOT RAISE {cudf_except}")
4 changes: 4 additions & 0 deletions python/cudf_polars/cudf_polars/testing/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,15 @@ def pytest_configure(config: pytest.Config):
"tests/unit/io/test_csv.py::test_read_csv_only_loads_selected_columns": "Memory usage won't be correct due to GPU",
"tests/unit/io/test_lazy_count_star.py::test_count_compressed_csv_18057": "Need to determine if file is compressed",
"tests/unit/io/test_lazy_csv.py::test_scan_csv_slice_offset_zero": "Integer overflow in sliced read",
"tests/unit/io/test_lazy_parquet.py::test_dsl2ir_cached_metadata[False]": "cudf-polars doesn't use metadata read by rust preprocessing",
"tests/unit/io/test_lazy_parquet.py::test_parquet_is_in_statistics": "Debug output on stderr doesn't match",
"tests/unit/io/test_lazy_parquet.py::test_parquet_statistics": "Debug output on stderr doesn't match",
"tests/unit/io/test_lazy_parquet.py::test_parquet_different_schema[False]": "Needs cudf#16394",
"tests/unit/io/test_lazy_parquet.py::test_parquet_schema_mismatch_panic_17067[False]": "Needs cudf#16394",
"tests/unit/io/test_lazy_parquet.py::test_parquet_slice_pushdown_non_zero_offset[False]": "Thrift data not handled correctly/slice pushdown wrong?",
"tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read[False]": "Incomplete handling of projected reads with mismatching schemas, cudf#16394",
"tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_dtype_mismatch[False]": "Different exception raised, but correctly raises an exception",
"tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_missing_cols_from_first[False]": "Different exception raised, but correctly raises an exception",
"tests/unit/io/test_parquet.py::test_read_parquet_only_loads_selected_columns_15098": "Memory usage won't be correct due to GPU",
"tests/unit/io/test_scan.py::test_scan[single-csv-async]": "Debug output on stderr doesn't match",
"tests/unit/io/test_scan.py::test_scan_with_limit[single-csv-async]": "Debug output on stderr doesn't match",
Expand Down
16 changes: 8 additions & 8 deletions python/cudf_polars/cudf_polars/utils/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@

POLARS_VERSION = parse(__version__)

POLARS_VERSION_GE_16 = POLARS_VERSION >= parse("1.6")
POLARS_VERSION_GT_16 = POLARS_VERSION > parse("1.6")
POLARS_VERSION_LT_16 = POLARS_VERSION < parse("1.6")

if POLARS_VERSION_LT_16:
raise ImportError(
"cudf_polars requires py-polars v1.6 or greater."
) # pragma: no cover
POLARS_VERSION_LT_18 = POLARS_VERSION < parse("1.8")


def _ensure_polars_version():
if POLARS_VERSION_LT_18:
raise ImportError(
"cudf_polars requires py-polars v1.8 or greater."
) # pragma: no cover
2 changes: 1 addition & 1 deletion python/cudf_polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ authors = [
license = { text = "Apache 2.0" }
requires-python = ">=3.10"
dependencies = [
"polars>=1.6",
"polars>=1.8,<1.9",
"pylibcudf==24.10.*,>=0.0.0a0",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
classifiers = [
Expand Down
6 changes: 5 additions & 1 deletion python/cudf_polars/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,11 @@ def test_groupby_nan_minmax_raises(op):
"expr",
[
pl.lit(1).alias("value"),
pl.lit([[4, 5, 6]]).alias("value"),
pytest.param(
pl.lit([[4, 5, 6]]).alias("value"),
marks=pytest.mark.xfail(reason="Need to expose OtherScalar in rust IR"),
),
pl.Series("value", [[4, 5, 6]], dtype=pl.List(pl.Int32)),
pl.col("float") * (1 - pl.col("int")),
[pl.lit(2).alias("value"), pl.col("float") * 2],
],
Expand Down
35 changes: 15 additions & 20 deletions python/cudf_polars/tests/testing/test_asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@

import polars as pl

from cudf_polars.containers import DataFrame
from cudf_polars.dsl.ir import Select
from cudf_polars.testing.asserts import (
assert_collect_raises,
assert_gpu_result_equal,
Expand Down Expand Up @@ -38,14 +36,24 @@ class E(Exception):
assert_ir_translation_raises(unsupported, E)


def test_collect_assert_raises(monkeypatch):
def test_collect_assert_raises():
df = pl.LazyFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})

with pytest.raises(AssertionError):
# This should raise, because polars CPU can run this query
with pytest.raises(AssertionError, match="CPU execution DID NOT RAISE"):
# This should raise, because polars CPU can run this query,
# but we expect an error.
assert_collect_raises(
df,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=(),
)

with pytest.raises(AssertionError, match="GPU execution DID NOT RAISE"):
# This should raise, because polars GPU can run this query,
# but we expect an error.
assert_collect_raises(
df,
polars_except=(),
cudf_except=pl.exceptions.InvalidOperationError,
)

Expand All @@ -60,31 +68,18 @@ def test_collect_assert_raises(monkeypatch):
cudf_except=pl.exceptions.InvalidOperationError,
)

with pytest.raises(AssertionError):
with pytest.raises(AssertionError, match="GPU execution RAISED"):
# This should raise because the expected GPU error is wrong
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=NotImplementedError,
)

with pytest.raises(AssertionError):
with pytest.raises(AssertionError, match="CPU execution RAISED"):
# This should raise because the expected CPU error is wrong
assert_collect_raises(
q,
polars_except=NotImplementedError,
cudf_except=pl.exceptions.InvalidOperationError,
)

with monkeypatch.context() as m:
m.setattr(Select, "evaluate", lambda self, cache: DataFrame([]))
# This query should fail, but we monkeypatch a bad
# implementation of Select which "succeeds" to check that our
# assertion notices this case.
q = df.select(pl.col("a") + pl.Series([1, 2]))
with pytest.raises(AssertionError):
assert_collect_raises(
q,
polars_except=pl.exceptions.ComputeError,
cudf_except=pl.exceptions.ComputeError,
)
Loading

0 comments on commit 2aa5945

Please sign in to comment.