Skip to content

Commit

Permalink
doc id update
Browse files Browse the repository at this point in the history
  • Loading branch information
blublinsky committed Jun 25, 2024
1 parent 93814b3 commit 76c9d1f
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 17 deletions.
7 changes: 5 additions & 2 deletions transforms/universal/doc_id/ray/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/
RUN cd data-processing-lib-ray && pip install --no-cache-dir -e .

COPY requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
# Install ray project source
COPY --chown=ray:users src/ src/
COPY --chown=ray:users pyproject.toml pyproject.toml
COPY --chown=ray:users Readme.md Readme.md
RUN pip install --no-cache-dir -e .

# copy source data
COPY ./src/doc_id_transform_ray.py .
Expand Down
24 changes: 13 additions & 11 deletions transforms/universal/doc_id/ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,15 @@ publish:: publish-dist publish-image

publish-image:: .transforms.publish-image-ray

set-versions:
$(MAKE) TRANSFORM_PYTHON_VERSION=not-used .transforms.set-versions

build-dist:: set-versions .defaults.build-dist
setup:: .transforms.setup

publish-dist:: .defaults.publish-dist
# set the version of python transform that this depends on.
set-versions:
$(MAKE) TRANSFORM_PYTHON_VERSION=${CODE2PARQUET_PYTHON_VERSION} TOML_VERSION=$(CODE2PARQUET_PYTHON_VERSION) .transforms.set-versions

run-cli-sample:
$(MAKE) RUN_FILE=$(TRANSFORM_NAME)_transform_ray.py \
RUN_ARGS="--run_locally True --data_local_config \"{ 'input_folder' : '../test-data/input', 'output_folder' : '../output'}\" \
--doc_id_int True " \
.transforms.run-src-file
build-dist:: set-versions .defaults.build-dist

run-local-python-only-sample: .transforms.run-local-sample
publish-dist:: .defaults.publish-dist

run-local-sample: .transforms.run-local-ray-sample

Expand All @@ -52,3 +47,10 @@ run-s3-sample: .transforms.run-s3-ray-sample
minio-start: .minio-start

load-image:: .transforms.load-image

run-cli-sample:
$(MAKE) RUN_FILE=$(TRANSFORM_NAME)_transform_ray.py \
RUN_ARGS="--run_locally True --data_local_config \"{ 'input_folder' : '../test-data/input', 'output_folder' : '../output'}\" \
--doc_id_int True " \
.transforms.run-src-file

45 changes: 45 additions & 0 deletions transforms/universal/doc_id/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
[project]
name = "dpk_docid_transform_ray"
version = "0.4.0.dev6"
requires-python = ">=3.10"
description = "docid Ray Transform"
license = {text = "Apache-2.0"}
readme = {file = "README.md", content-type = "text/markdown"}
authors = [
{ name = "David Wood", email = "dawood@us.ibm.com" },
{ name = "Boris Lublinsky", email = "blublinsky@ibm.com" },
]
dependencies = [
"data-prep-toolkit-ray==0.2.0.dev6",
]

[build-system]
requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0"]
build-backend = "setuptools.build_meta"

[project.optional-dependencies]
dev = [
"twine",
"pytest>=7.3.2",
"pytest-dotenv>=0.5.2",
"pytest-env>=1.0.0",
"pre-commit>=3.3.2",
"pytest-cov>=4.1.0",
"pytest-mock>=3.10.0",
"moto==5.0.5",
"markupsafe==2.0.1",
]

[options]
package_dir = ["src","test"]

[options.packages.find]
where = ["src/"]

[tool.pytest.ini_options]
# Currently we use low coverage since we have to run tests separately (see makefile)
#addopts = "--cov --cov-report term-missing --cov-fail-under 25"
markers = ["unit: unit tests", "integration: integration tests"]

[tool.coverage.run]
include = ["src/*"]
2 changes: 0 additions & 2 deletions transforms/universal/doc_id/ray/requirements.txt

This file was deleted.

2 changes: 1 addition & 1 deletion transforms/universal/ededup/ray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "dpk_ededup_transform_ray"
version = "0.4.0.dev6"
requires-python = ">=3.10"
description = "code2parquet Ray Transform"
description = "ededup Ray Transform"
license = {text = "Apache-2.0"}
readme = {file = "README.md", content-type = "text/markdown"}
authors = [
Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/fdedup/ray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "dpk_fdedup_transform_ray"
version = "0.4.0.dev6"
requires-python = ">=3.10"
description = "code2parquet Ray Transform"
description = "fdedup Ray Transform"
license = {text = "Apache-2.0"}
readme = {file = "README.md", content-type = "text/markdown"}
authors = [
Expand Down

0 comments on commit 76c9d1f

Please sign in to comment.