Skip to content
This repository was archived by the owner on Mar 13, 2020. It is now read-only.

Commit df984d3

Browse files
authored
Merge pull request #60 from pageuppeople-opensource/feature/SP-149-persist-batch-count
SP-149 - Persist count of batches per model and per execution
2 parents bb3ee5e + d13c143 commit df984d3

File tree

4 files changed

+39
-0
lines changed

4 files changed

+39
-0
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
"""add batches_processed count
2+
3+
Revision ID: bb0c5e8d05e2
4+
Revises: 00f2b412576b
5+
Create Date: 2019-07-26 13:56:06.412042
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = 'bb0c5e8d05e2'
14+
down_revision = '00f2b412576b'
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
op.add_column('execution', sa.Column('batches_processed', sa.Integer(), nullable=True), schema='rdl')
21+
op.add_column('execution_model', sa.Column('batches_processed', sa.Integer(), nullable=True), schema='rdl')
22+
23+
24+
def downgrade():
25+
op.drop_column('execution_model', 'batches_processed', schema='rdl')
26+
op.drop_column('execution', 'batches_processed', schema='rdl')

rdl/data_load_tracking/DataLoadTrackerRepository.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,14 @@ def complete_execution(self, execution_id, total_number_of_models,
4444
execution_end_time = session.query(func.now()).scalar()
4545
total_execution_seconds = (execution_end_time - current_execution.started_on).total_seconds()
4646
total_rows_processed = self.get_execution_rows(current_execution.execution_id)
47+
total_batches_processed = self.get_execution_batches(current_execution.execution_id)
4748

4849
current_execution.models_processed = total_number_of_models
4950
current_execution.status = status
5051
current_execution.completed_on = execution_end_time
5152
current_execution.execution_time_s = total_execution_seconds
5253
current_execution.rows_processed = total_rows_processed
54+
current_execution.batches_processed = total_batches_processed
5355
session.commit()
5456
self.logger.info(current_execution)
5557
session.close()
@@ -85,6 +87,7 @@ def save_execution_model(self, data_load_tracker):
8587
current_execution_model.completed_on = execution_end_time
8688
current_execution_model.execution_time_ms = int(total_execution_seconds * 1000)
8789

90+
current_execution_model.batches_processed = len(data_load_tracker.batches)
8891
current_execution_model.rows_processed = data_load_tracker.total_row_count
8992
current_execution_model.status = data_load_tracker.status
9093
current_execution_model.is_full_refresh = data_load_tracker.is_full_refresh
@@ -104,6 +107,14 @@ def get_execution_rows(self, execution_id):
104107
session.close()
105108
return results
106109

110+
def get_execution_batches(self, execution_id):
111+
session = self.session_maker()
112+
results = session.query(func.sum(ExecutionModelEntity.batches_processed))\
113+
.filter(ExecutionModelEntity.execution_id == execution_id)\
114+
.scalar()
115+
session.close()
116+
return results
117+
107118
def get_full_refresh_since(self, timestamp):
108119
session = self.session_maker()
109120
results = session.query(ExecutionModelEntity.model_name)\

rdl/entities/execution_entity.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class ExecutionEntity(Base):
1919
completed_on = Column(DateTime(timezone=True), nullable=True)
2020
execution_time_s = Column(BigInteger, nullable=True)
2121
rows_processed = Column(BigInteger, nullable=True)
22+
batches_processed = Column(Integer, nullable=True)
2223
models_processed = Column(Integer, nullable=True)
2324

2425
def __str__(self):

rdl/entities/execution_model_entity.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class ExecutionModelEntity(Base):
3131
completed_on = Column(DateTime(timezone=True), nullable=True)
3232
execution_time_ms = Column(BigInteger, nullable=True)
3333
rows_processed = Column(BigInteger, nullable=True)
34+
batches_processed = Column(Integer, nullable=True)
3435
model_checksum = Column(String(100), nullable=False)
3536
failure_reason = Column(String(1000), nullable=True)
3637

0 commit comments

Comments
 (0)