pageuppeople-opensource
diff --git a/‎.gitignore
Lines changed: 3 additions & 0 deletions b/‎.gitignore
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 83 additions & 23 deletions b/‎README.md
Lines changed: 83 additions & 23 deletions
diff --git a/‎appveyor.yml
Lines changed: 2 additions & 1 deletion b/‎appveyor.yml
Lines changed: 2 additions & 1 deletion
diff --git a/‎rdl/DataLoadManager.py
Lines changed: 8 additions & 22 deletions b/‎rdl/DataLoadManager.py
Lines changed: 8 additions & 22 deletions
diff --git a/‎rdl/alembic/README.md
Lines changed: 0 additions & 73 deletions b/‎rdl/alembic/README.md
Lines changed: 0 additions & 73 deletions
diff --git a/‎rdl/alembic/env.py
Lines changed: 1 addition & 1 deletion b/‎rdl/alembic/env.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎rdl/alembic/versions/0d4a3ce9c0a9_add_failure_reason_column_to_data_load_.py
Lines changed: 2 additions & 1 deletion b/‎rdl/alembic/versions/0d4a3ce9c0a9_add_failure_reason_column_to_data_load_.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎rdl/alembic/versions/710e28aa5978_add_data_load_execution_table.py
Lines changed: 1 addition & 0 deletions b/‎rdl/alembic/versions/710e28aa5978_add_data_load_execution_table.py
Lines changed: 1 addition & 0 deletions
@@ -10,6 +10,9 @@ __pycache__/
 *.py[cod]
 *$py.class
 
+# generated csvs
+rdl_integration_tests.*.csv
+
 # C extensions
 *.so
 
 
@@ -80,6 +80,76 @@ See `./tests/integration_tests/test_*.cmd` scripts for usage samples.
 
 ## Development
 
+### Alembic
+
+#### To upgrade to the latest schema
+
+```bash
+alembic -c rdl/alembic.ini -x $DESTINATION_DB_URL upgrade head
+```
+
+#### Updating the schema
+
+Ensure any new tables inherit from the same Base used in `alembic/env.py`
+
+```python
+from rdl.entities import Base
+```
+
+Whenever you make a schema change, run
+
+```bash
+pip install .
+alembic -c rdl/alembic.ini -x $DESTINATION_DB_URL revision -m "$REVISION_MESSAGE" --autogenerate
+```
+
+check that the new version in `alembic/versions` is correct
+
+#### Downgrading the schema
+
+Whenever you want to downgrade the schema
+
+```bash
+alembic -c rdl/alembic.ini -x $DESTINATION_DB_URL history # see the list of revision ids
+alembic -c rdl/alembic.ini -x $DESTINATION_DB_URL current # see the current revision id
+alembic -c rdl/alembic.ini -x $DESTINATION_DB_URL downgrade -1 # revert back one revision
+alembic -c rdl/alembic.ini -x $DESTINATION_DB_URL downgrade $revision_id # revert back to a revision id, found using the history command
+```
+
+#### Inaccurate autogenerated revisions
+
+Does your autogenerated revision not look right?
+
+Try editing the function `use_schema` in `alembic/env.py`, this determines what alembic looks for in the database.
+
+[Relevant Documentation](https://alembic.sqlalchemy.org/en/latest/api/runtime.html?highlight=include_schemas#alembic.runtime.environment.EnvironmentContext.configure.params.include_object)
+
+#### New models aren't showing up in upgrade section
+
+Ensure all model classes inherit from the same Base that `alembic/env.py` imports, and that the following class
+properties are set
+
+```python
+__tablename__ = 'your_mapped_table_name'
+__table_args__ = {'schema': Constants.DATA_PIPELINE_EXECUTION_SCHEMA_NAME}
+```
+
+Also try importing the models into `alembic/env.py`, eg
+
+```python
+from rdl.data_load_tracking import DataLoadExecution
+```
+
+#### Alembic won't pick up my change
+
+[Alembic only supports some changes](https://alembic.sqlalchemy.org/en/latest/autogenerate.html#what-does-autogenerate-detect-and-what-does-it-not-detect)
+
+Try adding raw sql in the `upgrade()` and `downgrade()` functions of your revision
+
+```python
+op.execute(RAW_SQL)
+```
+
 ### Linting
 
 Use autopep8 before pushing commits (include the "." for the folder)
@@ -109,29 +179,6 @@ Use the following vscode settings by either:
 
 ### Testing
 
-### Postgres debugging
-
-Ensure the database you are using is in utf8 mode. You cannot change encoding once the database is created.
-
-```sql
-
-CREATE DATABASE "my_database"
-    WITH OWNER "postgres"
-    ENCODING 'UTF8'
-    TEMPLATE template0;
-
-```
-
-Also ensure that the database has the CITEXT extension by logging into the DB and adding it
-
-```sql
-
->>>psql my_database
-
-CREATE EXTENSION CITEXT;
-
-```
-
 #### Integration
 
 The test batch files assume there is a user by the name of `postgres` on the system.
@@ -159,6 +206,19 @@ _Execution:_
 
 Execution is as simply as `python3 run_tests.py`
 
+### Postgres debugging
+
+Ensure the database you are using is in utf8 mode. You cannot change encoding once the database is created.
+
+```sql
+
+CREATE DATABASE "my_database"
+    WITH OWNER "postgres"
+    ENCODING 'UTF8'
+    TEMPLATE template0;
+
+```
+
 ### `Destination.Type` Values
 
 The destination.type value controls both the data reader type and the destination column type. These are implemented in ColumnTypeResolver.py.
 
@@ -39,7 +39,6 @@ build_script:
   #Setup the target PostgreSQL database
   - psql -c "SELECT VERSION()"
   - createdb %DBNAME%
-  - psql -d %DBNAME% -c "CREATE EXTENSION IF NOT EXISTS citext"
   - C:\projects\relational-data-loader\venv\Scripts\activate.bat
   #Install the dependencies for rdl.
   - pip install .
@@ -73,3 +72,5 @@ test_script:
 on_finish:
   #Enable this line to make the build pause after completion for RDP troubleshooting.
   #- ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://github.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
+
+  - alembic -c rdl/alembic.ini -x postgresql+psycopg2://postgres:there_is_no_password_due_to_pg_trust@localhost/relational_data_loader_integration_tests downgrade base
@@ -21,13 +21,11 @@ def __init__(self, configuration_path, source_db, target_db, data_load_tracker_r
         self.source_db = source_db
         self.target_db = target_db
         self.data_load_tracker_repository = data_load_tracker_repository
-        self.correlation_id = uuid.uuid4()
         self.model_pattern = '**/{model_name}.json'
         self.all_model_pattern = self.model_pattern.format(model_name='*')
 
     def start_imports(self, force_full_refresh_models):
-        self.logger.info(f"Starting Execution ID: '{self.correlation_id}'")
-        execution_start_time = datetime.now()
+        self.execution_id = self.data_load_tracker_repository.create_execution()
 
         model_folder = Path(self.configuration_path)
         if not model_folder.is_dir():
@@ -60,19 +58,7 @@ def start_imports(self, force_full_refresh_models):
             model_number += 1  # avoid all_model_names.index(model_name) due to linear time-complexity in list length
             self.start_single_import(model_file, request_full_refresh, model_number, total_number_of_models)
 
-        self.logger.info("Execution completed.")
-        execution_end_time = datetime.now()
-        total_execution_seconds = int((execution_end_time - execution_start_time).total_seconds())
-        execution_hours = total_execution_seconds // 3600
-        execution_minutes = (total_execution_seconds // 60) % 60
-        execution_seconds = total_execution_seconds % 60
-        total_number_of_rows_processed = self.data_load_tracker_repository.get_execution_rows(self.correlation_id)
-        self.logger.info(
-            f"Completed Execution ID: {self.correlation_id}"
-            f"; Models Processed: {total_number_of_models:,}"
-            f"; Rows Processed: {total_number_of_rows_processed:,}"
-            f"; Execution Time: {execution_hours}h {execution_minutes}m {execution_seconds}s"
-            f"; Average rows processed per second: {(total_number_of_rows_processed//max(total_execution_seconds, 1)):,}.")
+        self.data_load_tracker_repository.complete_execution(self.execution_id, total_number_of_models)
 
     def start_single_import(self, model_file, requested_full_refresh, model_number, total_number_of_models):
         model_name = model_file.stem
@@ -128,9 +114,9 @@ def start_single_import(self, model_file, requested_full_refresh, model_number,
         if full_refresh:
             self.logger.info(f"Performing full refresh for reason '{full_refresh_reason}'")
 
-        data_load_tracker = DataLoadTracker(self.correlation_id, model_name, model_checksum, model_config,
+        data_load_tracker = DataLoadTracker(self.execution_id, model_name, model_checksum, model_config,
                                             full_refresh, full_refresh_reason, change_tracking_info)
-
+        self.data_load_tracker_repository.create_execution_model(data_load_tracker)
         destination_table_manager.create_schema(model_config['target_schema'])
 
         self.logger.debug(f"Recreating the staging table {model_config['target_schema']}."
@@ -158,7 +144,8 @@ def start_single_import(self, model_file, requested_full_refresh, model_number,
                 batch_data_loader.load_batch(batch_key_tracker)
             except SensitiveDataError as e:
                 data_load_tracker.data_load_failed(e.sensitive_error_args)
-                self.data_load_tracker_repository.save(data_load_tracker)
+                self.data_load_tracker_repository.save_execution_model(data_load_tracker)
+                self.data_load_tracker_repository.fail_execution(self.execution_id, model_number)
                 raise e
 
         if full_refresh:
@@ -177,10 +164,9 @@ def start_single_import(self, model_file, requested_full_refresh, model_number,
             destination_table_manager.drop_table(model_config['target_schema'],
                                                  model_config['stage_table'])
         data_load_tracker.data_load_successful()
-        self.data_load_tracker_repository.save(data_load_tracker)
         self.logger.info(f"{model_number:0{max_model_number_len}d} of {total_number_of_models}"
-                         f" COMPLETED {model_name},"
-                         f" {data_load_tracker.get_statistics()}")
+                         f" COMPLETED {model_name}")
+        self.data_load_tracker_repository.save_execution_model(data_load_tracker)
 
     @staticmethod
     def is_full_refresh(*,
 
@@ -6,7 +6,7 @@
 from sqlalchemy import pool
 
 from alembic import context
-from rdl.data_load_tracking.DataLoadExecution import Base
+from rdl.entities import Base
 from rdl.shared import Constants
 
 # this is the Alembic Config object, which provides
 
@@ -18,7 +18,8 @@
 
 def upgrade():
     # ### commands auto generated by Alembic - please adjust! ###
-    op.add_column('data_load_execution', sa.Column('failure_reason', sa.String(length=1000), nullable=True), schema='rdl')
+    op.add_column('data_load_execution', sa.Column('failure_reason',
+                                                   sa.String(length=1000), nullable=True), schema='rdl')
     # ### end Alembic commands ###
 
 
 
@@ -19,6 +19,7 @@
 def upgrade():
     # ### commands auto generated by Alembic - please adjust! ###
     op.execute('CREATE SCHEMA IF NOT EXISTS rdl')
+    op.execute('CREATE EXTENSION IF NOT EXISTS CITEXT')
     op.create_table('data_load_execution',
                     sa.Column('id', sa.Integer(), nullable=False),
                     sa.Column('correlation_id', postgresql.UUID(as_uuid=True), nullable=True),