Skip to content

Commit

Permalink
Github Workflow Replacement for Jenkins Jobs, beam_PerformanceTests_M…
Browse files Browse the repository at this point in the history
…anyFiles_TextIOIT* (#28581)

* beam_PerformanceTests_ManyFiles_TextIOIT

* fix

* cron fix and whitespace
  • Loading branch information
magicgoody authored Sep 28, 2023
1 parent 1c0f1be commit e328ab5
Show file tree
Hide file tree
Showing 4 changed files with 258 additions and 0 deletions.
91 changes: 91 additions & 0 deletions .github/workflows/beam_PerformanceTests_ManyFiles_TextIOIT.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: PerformanceTests ManyFiles TextIOIT

on:
issue_comment:
types: [created]
schedule:
- cron: '10 2/12 * * *'
workflow_dispatch:

#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event
permissions:
actions: write
pull-requests: write
checks: write
contents: read
deployments: read
id-token: none
issues: write
discussions: read
packages: read
pages: read
repository-projects: read
security-events: read
statuses: read

# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}'
cancel-in-progress: true

env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
beam_PerformanceTests_ManyFiles_TextIOIT_test_arguments_1: ''

jobs:
beam_PerformanceTests_ManyFiles_TextIOIT:
if: |
github.event_name == 'workflow_dispatch' ||
github.event_name == 'schedule' ||
github.event.comment.body == 'Run Java ManyFilesTextIO Performance Test'
runs-on: [self-hosted, ubuntu-20.04, main]
timeout-minutes: 100
name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
strategy:
matrix:
job_name: ["beam_PerformanceTests_ManyFiles_TextIOIT"]
job_phrase: ["Run Java ManyFilesTextIO Performance Test"]
steps:
- uses: actions/checkout@v3
- name: Setup repository
uses: ./.github/actions/setup-action
with:
comment_phrase: ${{ matrix.job_phrase }}
github_token: ${{ secrets.GITHUB_TOKEN }}
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
- name: Prepare test arguments
uses: ./.github/actions/test-arguments-action
with:
test-type: performance
test-language: java
argument-file-paths: |
${{ github.workspace }}/.github/workflows/performance-tests-job-configs/config_ManyFiles_TextIOIT.txt
arguments: |
--filenamePrefix=gs://temp-storage-for-perf-tests/${{ matrix.job_name }}/${{github.run_id}}/
- name: run integrationTest
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:java:io:file-based-io-tests:integrationTest
arguments: |
--tests org.apache.beam.sdk.io.text.TextIOIT \
--info \
-Dfilesystem=gcs \
-DintegrationTestRunner=dataflow \
-DintegrationTestPipelineOptions='[${{ env.beam_PerformanceTests_ManyFiles_TextIOIT_test_arguments_1 }}]'
109 changes: 109 additions & 0 deletions .github/workflows/beam_PerformanceTests_ManyFiles_TextIOIT_HDFS.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: PerformanceTests ManyFiles TextIOIT HDFS

on:
issue_comment:
types: [created]
schedule:
- cron: '50 2/12 * * *'
workflow_dispatch:

#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event
permissions:
actions: write
pull-requests: write
checks: write
contents: read
deployments: read
id-token: none
issues: write
discussions: read
packages: read
pages: read
repository-projects: read
security-events: read
statuses: read

# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}'
cancel-in-progress: true

env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
beam_PerformanceTests_ManyFiles_TextIOIT_HDFS_test_arguments_1: ''

jobs:
beam_PerformanceTests_ManyFiles_TextIOIT_HDFS:
if: |
github.event_name == 'workflow_dispatch' ||
github.event_name == 'schedule' ||
github.event.comment.body == 'Run Java ManyFilesTextIO Performance Test HDFS'
runs-on: [self-hosted, ubuntu-20.04, main]
timeout-minutes: 100
name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
strategy:
matrix:
job_name: ["beam_PerformanceTests_ManyFiles_TextIOIT_HDFS"]
job_phrase: ["Run Java ManyFilesTextIO Performance Test HDFS"]
steps:
- uses: actions/checkout@v4
- name: Setup repository
uses: ./.github/actions/setup-action
with:
comment_phrase: ${{ matrix.job_phrase }}
github_token: ${{ secrets.GITHUB_TOKEN }}
github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
- name: Authenticate on GCP
id: auth
uses: google-github-actions/auth@v1
with:
credentials_json: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
- name: Set k8s access
uses: ./.github/actions/setup-k8s-access
with:
k8s_namespace: ${{ matrix.job_name }}-${{ github.run_id }}
- name: Install Hadoop
id: install_hadoop
run: |
kubectl apply -f ${{ github.workspace }}/.test-infra/kubernetes/hadoop/LargeITCluster/hdfs-multi-datanode-cluster.yml
kubectl wait svc/hadoop --for=jsonpath='{.status.loadBalancer.ingress[0].ip}' --timeout=120s
loadbalancer_IP=$(kubectl get svc hadoop -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
echo hadoop_IP=$loadbalancer_IP >> $GITHUB_OUTPUT
- name: Prepare test arguments
uses: ./.github/actions/test-arguments-action
with:
test-type: performance
test-language: java
argument-file-paths: |
${{ github.workspace }}/.github/workflows/performance-tests-job-configs/config_ManyFiles_TextIOIT_HDFS.txt
arguments: |
--filenamePrefix=hdfs://${{ steps.install_hadoop.outputs.hadoop_IP }}:9000/TEXTIO_IT_
--hdfsConfiguration=[{\\\"fs.defaultFS\\\":\\\"hdfs:${{ steps.install_hadoop.outputs.hadoop_IP }}:9000\\\",\\\"dfs.replication\\\":1}]
- name: run integrationTest
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:java:io:file-based-io-tests:integrationTest
arguments: |
--tests org.apache.beam.sdk.io.text.TextIOIT \
--info \
-Dfilesystem=hdfs \
-DintegrationTestRunner=dataflow \
-DintegrationTestPipelineOptions='[${{ env.beam_PerformanceTests_ManyFiles_TextIOIT_HDFS_test_arguments_1 }}]'
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

--runner=DataflowRunner
--tempRoot=gs://temp-storage-for-perf-tests
--bigQueryDataset=beam_performance
--bigQueryTable=many_files_textioit_results
--influxMeasurement=many_files_textioit_results
--reportGcsPerformanceMetrics=true
--gcsPerformanceMetrics=true
--numberOfRecords=25000000
--expectedHash=f8453256ccf861e8a312c125dfe0e436
--datasetSize=1062290000
--numberOfShards=1000
--numWorkers=5
--autoscalingAlgorithm=NONE
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

--runner=DataflowRunner
--tempRoot=gs://temp-storage-for-perf-tests
--bigQueryDataset=beam_performance
--bigQueryTable=many_files_textioit_hdfs_results
--influxMeasurement=many_files_textioit_hdfs_results
--reportGcsPerformanceMetrics=true
--gcsPerformanceMetrics=true
--numberOfRecords=25000000
--expectedHash=f8453256ccf861e8a312c125dfe0e436
--datasetSize=1062290000
--numberOfShards=1000
--numWorkers=5
--autoscalingAlgorithm=NONE

0 comments on commit e328ab5

Please sign in to comment.