Introduce MigrationMixin and document migration and tests

Previously a migration was only tested be reverting(running down migration) until being one migration before the one that shall be tested, then executing the actual test in which all migrations run the up part even of migrations that come after the migration on wants to test. This commit introduces the MigrationMixin module, a valuable utility for Sequel migration testing. This new module reverts the database schema to its version prior to a specific migration, provides a directory with the migration file for running a given migration in a test, and restores the schema to its full state after test completion to avoid half-migrated database scenarios. The accompanying updates to the documentation elaborate on the distinctive operating nature of Sequel migration test. Also extensive documentation on sequel migrations itself in the scope of the supported dbs and cloud_controller_ng has been added to provide a good knowledge base for writing highly sophisticated migrations that are resilient, offer high level of consistency, are compatible with mysql and psql and fast. Constraints and restrictions on Sequel migration and test writing have also been detailed, stressing the need to exclude Cloud Controller (CC) code in migrations and tests to maintain consistency in test outcomes and behaviours.
cloudfoundry · Sep 6, 2023 · cd91acd · cd91acd
1 parent eef9168
commit cd91acd
Show file tree

Hide file tree

Showing 4 changed files with 156 additions and 32 deletions.
diff --git a/db/migrations/Readme.md b/db/migrations/Readme.md
@@ -0,0 +1 @@
+../../spec/migrations/Readme.md
diff --git a/spec/migrations/20230822153000_streamline_annotation_key_prefix_spec.rb b/spec/migrations/20230822153000_streamline_annotation_key_prefix_spec.rb
@@ -1,40 +1,14 @@
 require 'spec_helper'
+require 'migrations/helpers/migration_mixin'
 
 RSpec.describe 'migration to streamline changes to annotation_key_prefix', isolation: :truncation do
-  let(:filename) { '20230822153000_streamline_annotation_key_prefix.rb' }
-  let(:tmp_all_migrations_dir) { Dir.mktmpdir }
-  let(:tmp_down_migrations_dir) { Dir.mktmpdir }
-  let(:tmp_up_migrations_dir) { Dir.mktmpdir }
-  let(:db) { Sequel::Model.db }
+  let(:migration_filename) { '20230822153000_streamline_annotation_key_prefix.rb' }
+  include MigrationMixin
+
   let(:isolation_segment) { VCAP::CloudController::IsolationSegmentModel }
   let(:annotation) { VCAP::CloudController::IsolationSegmentAnnotationModel }
   let(:label) { VCAP::CloudController::IsolationSegmentLabelModel }
 
-  before(:each) do
-    Sequel.extension :migration
-    # Find all migrations
-    migration_files = Dir.glob("#{DBMigrator::SEQUEL_MIGRATIONS}/*.rb")
-    # Calculate the index of our migration file we`d  like to test
-    migration_index = migration_files.find_index { |file| file.end_with?(filename) }
-    # Make a file list of the migration file we like to test plus all migrations after the one we want to test
-    migration_files_after_test = migration_files[migration_index...]
-    # Copy them to a temp directory
-    FileUtils.cp(migration_files, tmp_all_migrations_dir)
-    FileUtils.cp(migration_files_after_test, tmp_down_migrations_dir)
-    FileUtils.cp(File.join(DBMigrator::SEQUEL_MIGRATIONS, filename), tmp_up_migrations_dir)
-    # Revert the given migration and everything newer so we are at the database version exactly before our migration we want to test.
-    Sequel::Migrator.run(db, tmp_down_migrations_dir, target: 0, allow_missing_migration_files: true)
-  end
-
-  after do
-    FileUtils.rm_rf(tmp_up_migrations_dir)
-    FileUtils.rm_rf(tmp_down_migrations_dir)
-
-    # Complete the migration to not leave the test database half migrated and following tests fail due to this
-    Sequel::Migrator.run(db, tmp_all_migrations_dir, allow_missing_migration_files: true)
-    FileUtils.rm_rf(tmp_all_migrations_dir)
-  end
-
   describe 'annotation tables' do
     it 'converts all legacy key_prefixes to annotations with prefixes in the key_prefix column' do
       db[:isolation_segments].insert(name: 'bommel', guid: '123')
@@ -46,7 +20,7 @@
         key: 'mylegacyprefix/mykey',
         value: 'some_value')
       a1 = db[:isolation_segment_annotations].first(resource_guid: '123')
-      expect { Sequel::Migrator.run(db, tmp_up_migrations_dir, allow_missing_migration_files: true) }.not_to raise_error
+      expect { Sequel::Migrator.run(db, migration_to_test, allow_missing_migration_files: true) }.not_to raise_error
       b1 = db[:isolation_segment_annotations].first(resource_guid: '123')
       expect(b1[:guid]).to eq a1[:guid]
       expect(b1[:created_at]).to eq a1[:created_at]
@@ -64,7 +38,7 @@
       db[:isolation_segment_annotations].insert(guid: 'bommel2', resource_guid: '123', key: 'mykey2', value: 'some_value2')
       b1 = db[:isolation_segment_annotations].first(key: 'mykey')
       b2 = db[:isolation_segment_annotations].first(key: 'mykey2')
-      expect { Sequel::Migrator.run(db, tmp_up_migrations_dir, allow_missing_migration_files: true) }.not_to raise_error
+      expect { Sequel::Migrator.run(db, migration_to_test, allow_missing_migration_files: true) }.not_to raise_error
       c1 = db[:isolation_segment_annotations].first(key: 'mykey')
       c2 = db[:isolation_segment_annotations].first(key: 'mykey2')
       expect(b1.values).to eq(c1.values)

diff --git a/spec/migrations/Readme.md b/spec/migrations/Readme.md
@@ -0,0 +1,114 @@
+# Sequel Migrations
+
+Sequel migrations alter the database schema. There are several limitations to obey to write high quality migrations in regard to downtime, error handling and transactional safety.
+
+### Differences of supported DBs
+
+Currently, the CloudController supports postgres and mysql in different versions. Those DBs behave quite differently and have a different set of features.
+
+1. While postgres supports transactional DDL(Data Definition Language) Statements mysql does not. This means when an error occurs and a transaction is rolled back, only postgres will roll back the altered changes properly. Mysql does an automatic commit on every DDL statement making it impossible to roll back any schema change.
+2. While postgres resets table locks on rollback and commit of a transaction automatically, mysql does not. If the error handling of a migration that locks tables does not unlock them as part of error handling, mysql has a permanently blocked table until its manually unlocked again. Additionally, Sequel does not know how to lock tables so this must be done with raw sql queries per used db.
+3. If you lock a table in mysql, and you use sub-queries you have to name your sub-queries(alias them) and lock those aliases also(even if they don't exist yet at the time of locking)
+
+### Rules when writing migrations
+
+Therefore, following rules must be obeyed to write highly compatible migrations.
+1. No Cloud Controller (CC) code should be used in a migration. Any changes in the model can result in modifications to the old migrations and alter the behaviors and outcomes of the tests. Only use raw Sequel operations since when writing a migration one just knows the exact schema of the Database at that point in time. For Example when using a Model in a migration whose table name gets changed by later migrations, the initial migration would fail now since at the point in time this migration runs and uses the Model from CCs codebase, the table does not exist yet.
+2. Try writing separate [up and down](https://sequel.jeremyevans.net/rdoc/files/doc/migration_rdoc.html#top) Sequel migrations. There exists `change do` which automatically generates the down part but sometimes does weird stuff and not in all cases can automatically calculate the opposite of what the migration did. It is better do define `up do` and `down do` separately to be in full control what happens in the `down` part.
+3. Write migrations Idempotent. While a row in the DB prevents a migration from running twice, as schema changes cannot be transactional rolled back in any case(e.g. on mysql), one cannot be sure that after an error in the migration the DB is still initial. There could be some tables/changes being committet and some are not. The migration must be retryable in such a cases. This is often not the case when using Sequel functions default values. As an example there is [drop_index](https://www.rubydoc.info/github/jeremyevans/sequel/Sequel%2FSchema%2FAlterTableGenerator:drop_index) which ,by default, throws an error if the index one wants to drop was already dropped before. Thus, in a migration on wants to pass the optional parameter `if_exists: true` so that this statement is idempotent and if a migration failed in the middle for whatever reason(could be also a db connection issue), the migration can be rerun, one is not stuck in a half migrated state and no manual intervention to repair the DBs schema must be conducted to finish the update. If a build in check like `if_exists: true` is not available for the needed sequel function one must test against the Schema and just run statements conditionally. Also, an option is to always drop an index if it exits and then create it again to also make the `create_index` function idempotent.
+4. On migrations that do DML(Data Manipulation Language) statements it's best to think about locking the table while altering data in a Table. As an example imagine to remove duplicates from a table and set a unique constraint. Therefore, one must prevent new inserts for the time the migration runs. Note here that Sequel does not hav build in functions for table locks, so you have to do raw sql queries that differ from the Database backend they run on. One special thing for mysql is that you also need to lock [sub-queries](https://stackoverflow.com/a/58898755) if you use table locks at all. For this one can rename a subquery `self.from("#{table}___subquery".to_sym)` gives the subquery the alias `subquery` and in the beginning you must also lock that "temporary imaginary table" `run "LOCK TABLES #{table} WRITE, #{table} AS subquery WRITE;"` even if it doesn`t exist yet. Also while postgres does a proper rollback on errors and automatically unlocks tables after commit, mysql does not, and you have to make absolutely sure to unlock tables to not cause a table lock on mysql blocking the DB.
+```ruby
+# Lock table while the migration runs for full consistency. Mysql requires also locks on tables generated by sub-queries.
+transaction do
+    table = "app_annotations"
+    run "LOCK TABLE #{table} IN SHARE MODE;" if database_type == :postgres
+    run "LOCK TABLES #{table} WRITE, #{table} AS subquery WRITE;" if database_type == :mysql
+    ...
+    # "#{table}___subquery".to_sym makes the Select being something like "SELECT .. FROM app_annotations as subquery ..." due to the double underscores. Some magic of Sequel library apparently.
+    min_ids_subquery = self.from("#{table}___subquery".to_sym).
+                               select(Sequel.function(:MIN, :id).as(:min_id)).
+                               group_by(:resource_guid, :key_prefix, :key_name)
+    self[table.to_sym].exclude(id: min_ids_subquery).delete
+    ...
+    # Be sure to unlock the table afterwards as this does not happen automatically after committing a transaction in mysql
+    run 'UNLOCK TABLES;' if database_type == :mysql
+rescue Sequel::DatabaseError
+    # Clean up potential temporary assets left behind
+    drop_table?(:tmp_table)
+    # Be sure to unlock the table on errors as this does not happen automatically by rolling back a transaction mysql
+    run 'UNLOCK TABLES;' if database_type == :mysql
+    raise
+end
+```
+5. Try to do DML(Data Manipulation Language) statements in  a way they are completely handled by the DB and no looping over returned data happens in the migration. This is to reduce the time e.g. a table is locked. As an example instead of finding all duplicates with a select and then looping over the result in ruby and deleting the rows, one could do the select in a subquery so the DB does the heavy lifting here.
+```ruby
+min_ids_subquery = self.from("mytable__subquery".to_sym).
+                           select(Sequel.function(:MIN, :id).as(:min_id)).
+                           group_by(:resource_guid, :key_prefix, :key_name)
+self[table.to_sym].exclude(id: min_ids_subquery).delete
+```
+6. If looping over multiple tables or altering multiple tables, open a transaction separately for each table as otherwise you lock all the tables at once and there is a high likelihood of a mutual lock between two or more operations on the database(Deadlock). The migration might then lock tables a select with a join needs but this select already locked some other tables the migration needs. The DBs then usually abort both queries which results in a failing API request plus a failed transaction. An `up do` block implicitly opens a transaction for its whole block. So when accessing multiple tables put every table into an own `transaction do` block inside the up/down migration.
+```ruby
+Sequel.migration do
+  annotation_tables = %w[
+    app_annotations
+    build_annotations
+    ...
+  ].freeze
+
+  up do
+    annotation_tables.each do |table|
+      transaction do # DO NOT FORGET THIS TRANSACTION INSIDE THE LOOP OTHERWISE YOU LOCK MANY TABLES IN A SINGLE TRANSACTION
+      ...
+      end
+    end
+  end
+  down do
+    annotation_tables.each do |table|
+      transaction do # DO NOT FORGET THIS TRANSACTION INSIDE THE LOOP OTHERWISE YOU LOCK MANY TABLES IN A SINGLE TRANSACTION
+        ...
+      end
+    end
+  end
+end
+
+```
+
+# Sequel Migration Tests
+
+Sequel migration tests operate differently from typical RSpec tests. Primarily, they invoke the Down migration and restore the database state to its form prior to the execution of the test-specific migration. The procedure involves running a test, creating assets, executing a particular migration file for testing, and asserting specific behaviors.
+However, recognize that Sequel migration tests impose particular restrictions and limitations on test writing.
+
+1. No Cloud Controller (CC) code should influence the migration spec. Same requirement as for the migration itself. Any changes in the model can result in modifications to the old migrations and alter the behaviors and outcomes of the tests. So do not select data via the CC Models, rather do the selects in raw Sequel and assert what behaviour you`d like to test.
+2. You should use the MigrationMixin because it ensures that the database schema first reverts to the version preceding the migration you aim to test. This mixin also provides a directory containing the single migration for running the particular migration within a test. When the test completes, this mixin makes sure to restore the correct schema by running the migrations that are more recent than the tested one. Thus, avoiding cases of a half-migrated database that can result in random test failures. Also, it makes sure to test not every migration that comes after the migration one wants to test, but rather just a single migration is executed and then expected behaviour is assessed.
+
+### Usage
+
+Here's an illustrative guide on how to write a migration spec. The `MigrationMixin` accesses the `migration_file` variable, which should contain the filename of the migration under review. This mixin also offers a `migration_to_test` variable for executing the particular migration. All other aspects, including migrating one version before the test migration and fully migrating the table after test completion, are handled automatically by the Mixin.
+
+
+```ruby
+require 'spec_helper'
+require 'helpers/migration_mixin'
+
+RSpec.describe 'migration to modify isolation_segments', isolation: :truncation do
+  let(:migration_file) { '20230822153000_isolation_segments_modify.rb' }
+
+  include MigrationMixin
+
+  describe 'isolation_segments table' do
+    it 'retains the initial name and guid' do
+      db[:isolation_segments].insert(name: 'bommel', guid: '123')
+      a1 = db[:isolation_segment_annotations].first(resource_guid: '123')
+      expect { Sequel::Migrator.run(db, migration_to_test, allow_missing_migration_files: true) }.not_to raise_error
+      b1 = db[:isolation_segment_annotations].first(resource_guid: '123')
+      expect(b1[:guid]).to eq a1[:guid]
+      expect(b1[:name]).to eq a1[:name]
+    end
+  end
+end
+```
+
+The above code tests a migration that modifies the isolation_segments table. It verifies that the original name and guid persist even after migration operation.
+Note here that not the CC Models are used rather the selects and inserts are done directly with Sequel with the Schema in mind that exists before and after the migration. This will always be correct and the test continues to work even if later migrations alter the table, CC Code and Models, or even drop Tables etc. As the migration is tested with the state the DB was in at the time of writing the migration.
+
diff --git a/spec/migrations/helpers/migration_mixin.rb b/spec/migrations/helpers/migration_mixin.rb
@@ -0,0 +1,35 @@
+module MigrationMixin
+  def self.included(included_class)
+    included_class.class_eval <<-RUBY, __FILE__, __LINE__ + 1
+      let(:all_migrations) { Dir.mktmpdir }
+      let(:down_migrations) { Dir.mktmpdir }
+      let(:migration_to_test) { Dir.mktmpdir }
+      let(:db) { Sequel::Model.db }
+
+      before(:each) do
+        Sequel.extension :migration
+        # Find all migrations
+        migration_files = Dir.glob(format("%s/*.rb", DBMigrator::SEQUEL_MIGRATIONS))
+        # Calculate the index of our migration file we`d  like to test
+        migration_index = migration_files.find_index { |file| file.end_with?(migration_filename) }
+        # Make a file list of the migration file we like to test plus all migrations after the one we want to test
+        migration_files_after_test = migration_files[migration_index...]
+        # Copy them to a temp directory
+        FileUtils.cp(migration_files, all_migrations)
+        FileUtils.cp(migration_files_after_test, down_migrations)
+        FileUtils.cp(File.join(DBMigrator::SEQUEL_MIGRATIONS, migration_filename), migration_to_test)
+        # Revert the given migration and everything newer so we are at the database version exactly before our migration we want to test.
+        Sequel::Migrator.run(db, down_migrations, target: 0, allow_missing_migration_files: true)
+      end
+
+      after(:each) do
+        FileUtils.rm_rf(migration_to_test)
+        FileUtils.rm_rf(down_migrations)
+
+        # Complete the migration to not leave the test database half migrated and following tests fail due to this
+        Sequel::Migrator.run(db, all_migrations, allow_missing_migration_files: true)
+        FileUtils.rm_rf(all_migrations)
+      end
+    RUBY
+  end
+end