Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Support passing struct data to the DB API #718

Merged
merged 26 commits into from
Jul 1, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
586a022
Added support for dbi struct parameters with explicit types
jimfulton Jun 22, 2021
13f9107
Make the dataset_id fixture a session fixture
jimfulton Jun 23, 2021
aea7bae
Make the dataset_id fixture a session fixture
jimfulton Jun 23, 2021
6f26130
system test of the struct machinery
jimfulton Jun 23, 2021
c386448
Verify that we can bind non-parameterized types
jimfulton Jun 23, 2021
71c8614
Parse and remove type parameters from explcit types.
jimfulton Jun 23, 2021
6f5b345
Document passing struct data.
jimfulton Jun 23, 2021
e08f6f6
Merge remote-tracking branch 'origin/master' into riversnake-dbi-stru…
jimfulton Jun 23, 2021
411c336
blacken
jimfulton Jun 23, 2021
ef2b323
using match.groups() throws off pytypes, also fix some type hints.
jimfulton Jun 23, 2021
654b108
🦉 Updates from OwlBot
gcf-owl-bot[bot] Jun 23, 2021
525b8fd
blacken
jimfulton Jun 23, 2021
462f2eb
remove type hints -- maybe they broke docs?
jimfulton Jun 23, 2021
904d2ce
merge upstream
jimfulton Jun 23, 2021
a6393e6
Revert "remove type hints -- maybe they broke docs?"
jimfulton Jun 23, 2021
e63e8b7
pin gcp-sphinx-docfx-yaml==0.2.0 so docfx doesn't fail.
jimfulton Jun 23, 2021
2f2bdcd
Merge remote-tracking branch 'origin/master' into riversnake-dbi-stru…
jimfulton Jun 24, 2021
91b0028
Review comments: examples, and guard against large number of fields
jimfulton Jun 24, 2021
35555aa
🦉 Updates from OwlBot
gcf-owl-bot[bot] Jun 24, 2021
0d81b80
Merge remote-tracking branch 'origin/master' into riversnake-dbi-stru…
jimfulton Jun 24, 2021
d3f959c
Merge branch 'riversnake-dbi-struct-types' of github.com:googleapis/p…
jimfulton Jun 24, 2021
5554301
Factored some repeated code in handling complex parameters
jimfulton Jun 25, 2021
8830113
Improved the error for dict (structish) parameter values without expl…
jimfulton Jun 25, 2021
a72cafb
blacken
jimfulton Jun 25, 2021
cba9697
removed repeated word
jimfulton Jun 25, 2021
12bd941
Update google/cloud/bigquery/dbapi/_helpers.py
jimfulton Jun 29, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 145 additions & 12 deletions google/cloud/bigquery/dbapi/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
import decimal
import functools
import numbers
import re

from google.cloud import bigquery
from google.cloud.bigquery import table, enums
from google.cloud.bigquery import table, enums, query
from google.cloud.bigquery.dbapi import exceptions


Expand Down Expand Up @@ -113,6 +114,135 @@ def array_to_query_parameter(value, name=None, query_parameter_type=None):
return bigquery.ArrayQueryParameter(name, array_type, value)


complex_query_parameter_parse = re.compile(
r"""
\s*
(ARRAY|STRUCT|RECORD) # Type
\s*
<([A-Z0-9<> ,]+)> # Subtype(s)
\s*$
""",
re.IGNORECASE | re.VERBOSE,
).match
parse_struct_field = re.compile(
r"""
(?:(\w+)\s+) # field name
([A-Z0-9<> ,]+) # Field type
plamut marked this conversation as resolved.
Show resolved Hide resolved
$""", re.VERBOSE | re.IGNORECASE).match


def split_struct_fields(fields):
fields = fields.split(',')
while fields:
field = fields.pop(0)
while fields and field.count('<') != field.count('>'):
field += ',' + fields.pop(0)
plamut marked this conversation as resolved.
Show resolved Hide resolved
yield field


def complex_query_parameter_type(name: str, type_: str, base: str):
type_ = type_.strip()
if '<' not in type_:
try:
type_ = getattr(enums.SqlParameterScalarTypes, type_.upper())
except AttributeError:
raise exceptions.ProgrammingError(
f"Invalid scalar type, {type_}, in {base}")
if name:
type_ = type_.with_name(name)
return type_

m = complex_query_parameter_parse(type_)
if not m:
raise exceptions.ProgrammingError(f"Invalid parameter type, {type_}")
tname, sub = m.groups()
tname = tname.upper()
sub = sub.strip()
if tname == 'ARRAY':
return query.ArrayQueryParameterType(
complex_query_parameter_type(None, sub, base),
name=name)
else:
fields = []
for field_string in split_struct_fields(sub):
field_string = field_string.strip()
m = parse_struct_field(field_string)
if not m:
raise exceptions.ProgrammingError(
f"Invalid struct field, {field_string}, in {base}")
field_name, field_type = m.groups()
fields.append(complex_query_parameter_type(
field_name, field_type, base))

return query.StructQueryParameterType(*fields, name=name)


def complex_query_parameter(name, value, type_, base=None):
"""
Construct a query parameter for a complex type (array or struct record)

or for a subtype, which may not be complex
"""
type_ = type_.strip()
base = base or type_
if '>' not in type_:
try:
type_ = getattr(enums.SqlParameterScalarTypes, type_.upper())._type
except AttributeError:
raise exceptions.ProgrammingError(
f"The given parameter type, {type_},"
f" for {name} is not a valid BigQuery scalar type, in {base}."
)

return query.ScalarQueryParameter(name, type_, value)

m = complex_query_parameter_parse(type_)
if not m:
raise exceptions.ProgrammingError(f"Invalid parameter type, {type_}")
tname, sub = m.groups()
tname = tname.upper()
sub = sub.strip()
if tname == 'ARRAY':
if not array_like(value):
raise exceptions.ProgrammingError(
f"Array type with non-array-like value"
f" with type {type(value).__name__}")
array_type = complex_query_parameter_type(name, sub, base)
if isinstance(array_type, query.ArrayQueryParameterType):
raise exceptions.ProgrammingError(f"Array can't contain an array in {base}")
return query.ArrayQueryParameter(
name,
array_type,
[complex_query_parameter(None, v, sub, base)
for v in value] if '<' in sub else value,
)
else:
fields = []
if not isinstance(value, collections_abc.Mapping):
raise exceptions.ProgrammingError(
f"Non-mapping value for type {type_}")
value_keys = set(value)
for field_string in split_struct_fields(sub):
field_string = field_string.strip()
m = parse_struct_field(field_string)
if not m:
raise exceptions.ProgrammingError(
f"Invalid struct field, {field_string}, in {base or type_}")
field_name, field_type = m.groups()
if field_name not in value:
raise exceptions.ProgrammingError(
f"No field value for {field_name} in {type_}")
value_keys.remove(field_name)
fields.append(
complex_query_parameter(
field_name, value[field_name], field_type, base)
)
if value_keys:
raise exceptions.ProgrammingError(f"Extra data keys for {type_}")

return query.StructQueryParameter(name, *fields)


def to_query_parameters_list(parameters, parameter_types):
"""Converts a sequence of parameter values into query parameters.

Expand All @@ -129,7 +259,9 @@ def to_query_parameters_list(parameters, parameter_types):
result = []

for value, type_ in zip(parameters, parameter_types):
if isinstance(value, collections_abc.Mapping):
if type_ is not None and '<' in type_:
param = complex_query_parameter(None, value, type_)
elif isinstance(value, collections_abc.Mapping):
raise NotImplementedError("STRUCT-like parameter values are not supported.")
elif array_like(value):
param = array_to_query_parameter(value, None, type_)
Expand Down Expand Up @@ -157,20 +289,21 @@ def to_query_parameters_dict(parameters, query_parameter_types):
result = []

for name, value in parameters.items():
if isinstance(value, collections_abc.Mapping):
query_parameter_type = query_parameter_types.get(name)
if query_parameter_type is not None and '<' in query_parameter_type:
param = complex_query_parameter(name, value, query_parameter_type)
elif isinstance(value, collections_abc.Mapping):
raise NotImplementedError(
"STRUCT-like parameter values are not supported "
"(parameter {}).".format(name)
)
else:
query_parameter_type = query_parameter_types.get(name)
if array_like(value):
param = array_to_query_parameter(
value, name=name, query_parameter_type=query_parameter_type
)
else:
param = scalar_to_query_parameter(
value, name=name, query_parameter_type=query_parameter_type,
elif array_like(value):
param = array_to_query_parameter(
value, name=name, query_parameter_type=query_parameter_type
)
else:
param = scalar_to_query_parameter(
value, name=name, query_parameter_type=query_parameter_type,
)

result.append(param)
Expand Down
12 changes: 11 additions & 1 deletion google/cloud/bigquery/dbapi/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,17 @@ def _format_operation(operation, parameters):


def _extract_types(
operation, extra_type_sub=re.compile(r"(%*)%(?:\(([^:)]*)(?::(\w+))?\))?s").sub
operation, extra_type_sub=re.compile(
r"""
(%*) # Extra %s. We'll dal with these in the replacement code
% # Beginning of replacement, %s, %(...)s
(?:\( # Begin of optional name and/or type
([^:)]*) # name
(?::([a-zA-Z0-9<>, ]+))? # type
\))? # End of optional name and/or type
s # End of replacement
""",
re.VERBOSE).sub
):
"""Remove type information from parameter placeholders.

Expand Down
122 changes: 122 additions & 0 deletions tests/unit/test_dbapi__helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import decimal
import math
import operator as op
import re
import unittest

import pytest
Expand Down Expand Up @@ -416,3 +417,124 @@ def test_to_query_parameters_list_w_types():
bigquery.ScalarQueryParameter(None, "STRING", None),
bigquery.ArrayQueryParameter(None, "FLOAT64", []),
]

@pytest.mark.parametrize(
"value,type_,expect",
[
([], 'ARRAY<INT64>',
{'parameterType': {'type': 'ARRAY', 'arrayType': {'type': 'INT64'}},
'parameterValue': {'arrayValues': []},
}),
([1, 2], 'ARRAY<INT64>',
{'parameterType': {'type': 'ARRAY', 'arrayType': {'type': 'INT64'}},
'parameterValue': {'arrayValues': [{'value': '1'}, {'value': '2'}]},
}),
(dict(name='par',
children=[
dict(name='ch1', bdate=datetime.date(2021, 1, 1)),
dict(name='ch2', bdate=datetime.date(2021, 1, 2)),
]),
'struct<name string, children array<struct<name string, bdate date>>>',
{
'parameterType':
{'structTypes':
[{'name': 'name',
'type': {'type': 'STRING'}},
{'name': 'children',
'type': {'arrayType': {'structTypes': [{'name': 'name',
'type': {'type': 'STRING'}},
{'name': 'bdate',
'type': {'type': 'DATE'}}],
'type': 'STRUCT'},
'type': 'ARRAY'}}],
'type': 'STRUCT'},
'parameterValue':
{'structValues':
{'children':
{'arrayValues': [{'structValues': {'bdate': {'value': '2021-01-01'},
'name': {'value': 'ch1'}}},
{'structValues': {'bdate': {'value': '2021-01-02'},
'name': {'value': 'ch2'}}}]},
'name': {'value': 'par'}}},
}
),
])
def test_complex_query_parameter_type(type_, value, expect):
from google.cloud.bigquery.dbapi._helpers import complex_query_parameter
param = complex_query_parameter("test", value, type_).to_api_repr()
assert param.pop('name') == 'test'
assert param == expect


@pytest.mark.parametrize(
"value,type_,expect",
[
([], 'ARRAY<INT>', "Invalid scalar type, INT, in ARRAY<INT>"),
([], 'x<INT>', "Invalid parameter type, x<INT>"),
({}, 'struct<int>', "Invalid struct field, int, in struct<int>"),
({'x': 1}, 'struct<x int>',
"The given parameter type, int,"
" for x is not a valid BigQuery scalar type, in struct<x int>."),
([], 'x<<INT>', "Invalid parameter type, x<<INT>"),
(0, 'ARRAY<INT64>', "Array type with non-array-like value with type int"),
([], 'ARRAY<ARRAY<INT64>>',
"Array can't contain an array in ARRAY<ARRAY<INT64>>"),
([], 'struct<x int>', "Non-mapping value for type struct<x int>"),
({}, 'struct<x int>', "No field value for x in struct<x int>"),
({'x': 1, 'y': 1}, 'struct<x int64>', "Extra data keys for struct<x int64>"),
([], 'array<struct<xxx>>', "Invalid struct field, xxx, in array<struct<xxx>>"),
([], 'array<<>>', "Invalid parameter type, <>"),
])
def test_complex_query_parameter_type_errors(type_, value, expect):
from google.cloud.bigquery.dbapi._helpers import complex_query_parameter
from google.cloud.bigquery.dbapi import exceptions

with pytest.raises(
exceptions.ProgrammingError,
match="^" + re.escape(expect) + "$",
):
complex_query_parameter("test", value, type_)


@pytest.mark.parametrize(
"parameters,parameter_types,expect",
[
([[], dict(name='ch1', bdate=datetime.date(2021, 1, 1))],
['ARRAY<INT64>', 'struct<name string, bdate date>'],
[
{'parameterType': {'arrayType': {'type': 'INT64'},
'type': 'ARRAY'},
'parameterValue': {'arrayValues': []}},
{'parameterType': {'structTypes': [{'name': 'name',
'type': {'type': 'STRING'}},
{'name': 'bdate',
'type': {'type': 'DATE'}}],
'type': 'STRUCT'},
'parameterValue': {'structValues': {'bdate': {'value': '2021-01-01'},
'name': {'value': 'ch1'}}}},
]),
(dict(ids=[], child=dict(name='ch1', bdate=datetime.date(2021, 1, 1))),
dict(ids='ARRAY<INT64>', child='struct<name string, bdate date>'),
[
{'name': 'ids',
'parameterType': {'arrayType': {'type': 'INT64'},
'type': 'ARRAY'},
'parameterValue': {'arrayValues': []}},
{'name': 'child',
'parameterType': {'structTypes': [{'name': 'name',
'type': {'type': 'STRING'}},
{'name': 'bdate',
'type': {'type': 'DATE'}}],
'type': 'STRUCT'},
'parameterValue': {'structValues': {'bdate': {'value': '2021-01-01'},
'name': {'value': 'ch1'}}}},
]),
])
def test_to_query_parameters_complex_types(parameters, parameter_types, expect):
from google.cloud.bigquery.dbapi._helpers import to_query_parameters

result = [
p.to_api_repr()
for p in to_query_parameters(parameters, parameter_types)
]
assert result == expect
4 changes: 4 additions & 0 deletions tests/unit/test_dbapi_cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,10 @@ def test__format_operation_no_placeholders(self):
"values(%%%%%(foo:INT64)s, %(bar)s)",
("values(%%%%%(foo)s, %(bar)s)", dict(foo="INT64")),
),
(
"values(%%%%%(foo:struct<x string, y int64>)s, %(bar)s)",
("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct<x string, y int64>")),
),
],
)
def test__extract_types(inp, expect):
Expand Down