From 638479b04b9b7ac432d27ee2b57f075da4322ffc Mon Sep 17 00:00:00 2001 From: Philipp Thun Date: Fri, 18 Nov 2022 10:08:07 +0100 Subject: [PATCH] Optimize Role model/dataset The Role model is not backed by a dedicated table, but a dataset that is a UNION of eight individual roles tables. When filtering for roles, this was done by adding WHERE conditions to the overall dataset, which resulted in slow query executions. A more efficient approach is to move the WHERE conditions into the individual datasets per role. This commit customizes the Role model/dataset. The creation of the UNIONed dataset is moved into a dedicated class (RoleDataset) that supports building a dataset with filters (i.e. WHERE conditions) per role (i.e. individual table). The 'where' method of the Role dataset is overridden and uses Dataset.from ([1]) to change the dataset's source. It replaces the source with a new dataset built with filters per role. As 'where' calls can be chained, the filters need to be kept in the dataset instance (cache) and the source dataset needs to be built again on each 'where' invocation with all the filters, i.e. previous and new WHERE conditions. Furthermore fully qualified identifiers (table name and column name) have to be adapted; as the WHERE conditions are applied on the individual roles tables, no table name prefix is required and thus removed from the identifiers. Some filters can now be handled in a more efficient way. When filtering for a specific role type, the corresponding dataset gets a 'WHERE 1 = 1', whereas all the other datasets get a 'WHERE 1 = 0', meaning they never return any rows. Something similar is done when filtering for organizations or spaces; the opposite role types get a 'WHERE 1 = 0' as they don't have the requested 'id' field. As there might be filters that still should be applied to the overall dataset, 'where' invocations with a block (aka. virtual row block) are treated as before, i.e. applied at the end. Drawbacks: - The current implementation of the custom Role model/dataset is rather strict with regards to allowed filter expressions. This means that if some functionality is added (e.g. new or different filter), the custom Role model/dataset needs to be adapted. This was done by intention to ensure that every code path is tested. - To limit the number of different filter expressions, invocations of 'first(cond)' and 'find(cond)' on the custom Role model/dataset have been replaced with 'where(cond).first' which is semantically identical. [1] https://sequel.jeremyevans.net/rdoc/classes/Sequel/Dataset.html#method-i-from --- app/controllers/v3/roles_controller.rb | 10 +- app/fetchers/base_list_fetcher.rb | 8 +- app/fetchers/role_list_fetcher.rb | 4 +- app/models/runtime/pollable_job_model.rb | 2 +- app/models/runtime/role.rb | 311 +++++++++++++++-------- spec/unit/models/runtime/role_spec.rb | 40 ++- 6 files changed, 252 insertions(+), 123 deletions(-) diff --git a/app/controllers/v3/roles_controller.rb b/app/controllers/v3/roles_controller.rb index 4dedabf784e..659f998a635 100644 --- a/app/controllers/v3/roles_controller.rb +++ b/app/controllers/v3/roles_controller.rb @@ -54,14 +54,14 @@ def show decorators << IncludeRoleOrganizationDecorator if IncludeRoleOrganizationDecorator.match?(message.include) decorators << IncludeRoleSpaceDecorator if IncludeRoleSpaceDecorator.match?(message.include) - role = readable_roles.first(guid: hashed_params[:guid]) + role = readable_roles.where(guid: hashed_params[:guid]).first resource_not_found!(:role) unless role render status: :ok, json: Presenters::V3::RolePresenter.new(role, decorators: decorators) end def destroy - role = readable_roles.first(guid: hashed_params[:guid]) + role = readable_roles.where(guid: hashed_params[:guid]).first resource_not_found!(:role) unless role if role.for_space? @@ -147,7 +147,11 @@ def readable_roles if permission_queryer.can_read_globally? Role else - Role.where(space_id: visible_space_ids).or(organization_id: visible_org_ids) + # This rather complex filter should be applied to the overall Role dataset and is thus using the (virtual row) + # block syntax. + readable_by_space = { space_id: visible_space_ids } + readable_by_org = { organization_id: visible_org_ids } + Role.where { Sequel.|(readable_by_space, readable_by_org) } end end diff --git a/app/fetchers/base_list_fetcher.rb b/app/fetchers/base_list_fetcher.rb index a230d60e838..a4422fc3ec3 100644 --- a/app/fetchers/base_list_fetcher.rb +++ b/app/fetchers/base_list_fetcher.rb @@ -23,16 +23,16 @@ def advanced_filtering(message, dataset, klass) values.map do |operator, given_timestamp| if operator == RelationalOperators::LESS_THAN_COMPARATOR normalized_timestamp = Time.parse(given_timestamp).utc - dataset = dataset.where { Sequel.qualify(klass.table_name, filter) < normalized_timestamp } + dataset = dataset.where(Sequel.qualify(klass.table_name, filter) < normalized_timestamp) elsif operator == RelationalOperators::LESS_THAN_OR_EQUAL_COMPARATOR normalized_timestamp = (Time.parse(given_timestamp).utc + 0.999999).utc - dataset = dataset.where { Sequel.qualify(klass.table_name, filter) <= normalized_timestamp } + dataset = dataset.where(Sequel.qualify(klass.table_name, filter) <= normalized_timestamp) elsif operator == RelationalOperators::GREATER_THAN_COMPARATOR normalized_timestamp = (Time.parse(given_timestamp).utc + 0.999999).utc - dataset = dataset.where { Sequel.qualify(klass.table_name, filter) > normalized_timestamp } + dataset = dataset.where(Sequel.qualify(klass.table_name, filter) > normalized_timestamp) elsif operator == RelationalOperators::GREATER_THAN_OR_EQUAL_COMPARATOR normalized_timestamp = Time.parse(given_timestamp).utc - dataset = dataset.where { Sequel.qualify(klass.table_name, filter) >= normalized_timestamp } + dataset = dataset.where(Sequel.qualify(klass.table_name, filter) >= normalized_timestamp) end end else diff --git a/app/fetchers/role_list_fetcher.rb b/app/fetchers/role_list_fetcher.rb index 12973b648ad..e6ae6538281 100644 --- a/app/fetchers/role_list_fetcher.rb +++ b/app/fetchers/role_list_fetcher.rb @@ -5,8 +5,8 @@ module VCAP::CloudController class RoleListFetcher < BaseListFetcher class << self - def fetch(message, readable_users_dataset, eager_loaded_associations: []) - filter(message, readable_users_dataset).eager(eager_loaded_associations) + def fetch(message, readable_roles, eager_loaded_associations: []) + filter(message, readable_roles).eager(eager_loaded_associations) end private diff --git a/app/models/runtime/pollable_job_model.rb b/app/models/runtime/pollable_job_model.rb index a22c670eb73..05ca63db012 100644 --- a/app/models/runtime/pollable_job_model.rb +++ b/app/models/runtime/pollable_job_model.rb @@ -32,7 +32,7 @@ def resource_exists? Sequel::Model(ActiveSupport::Inflector.pluralize(resource_type).to_sym) end - !!model.find(guid: resource_guid) + !!model.where(guid: resource_guid).first end def self.find_by_delayed_job(delayed_job) diff --git a/app/models/runtime/role.rb b/app/models/runtime/role.rb index 92ee25ccadb..ca230d750c0 100644 --- a/app/models/runtime/role.rb +++ b/app/models/runtime/role.rb @@ -3,97 +3,74 @@ module VCAP::CloudController SPACE_OR_ORGANIZATION_NOT_SPECIFIED = -1 - # Sequel allows to create models based on datasets. The following is a dataset that unions all the individual roles - # tables and labels each row with a `type` column based on which table it came from - class Role < Sequel::Model( - OrganizationUser.select( - Sequel.as(VCAP::CloudController::RoleTypes::ORGANIZATION_USER, :type), - Sequel.as(:role_guid, :guid), - :user_id, - :organization_id, - Sequel.as(SPACE_OR_ORGANIZATION_NOT_SPECIFIED, :space_id), - :created_at, - :updated_at - ).union( - OrganizationManager.select( - Sequel.as(VCAP::CloudController::RoleTypes::ORGANIZATION_MANAGER, :type), - Sequel.as(:role_guid, :guid), - :user_id, - :organization_id, - Sequel.as(SPACE_OR_ORGANIZATION_NOT_SPECIFIED, :space_id), - :created_at, - :updated_at), - all: true, - from_self: false - ).union( - OrganizationBillingManager.select( - Sequel.as(VCAP::CloudController::RoleTypes::ORGANIZATION_BILLING_MANAGER, :type), - Sequel.as(:role_guid, :guid), - :user_id, - :organization_id, - Sequel.as(SPACE_OR_ORGANIZATION_NOT_SPECIFIED, :space_id), - :created_at, - :updated_at), - all: true, - from_self: false - ).union( - OrganizationAuditor.select( - Sequel.as(VCAP::CloudController::RoleTypes::ORGANIZATION_AUDITOR, :type), - Sequel.as(:role_guid, :guid), - :user_id, - :organization_id, - Sequel.as(SPACE_OR_ORGANIZATION_NOT_SPECIFIED, :space_id), - :created_at, - :updated_at), - all: true, - from_self: false - ).union( - SpaceDeveloper.select( - Sequel.as(VCAP::CloudController::RoleTypes::SPACE_DEVELOPER, :type), - Sequel.as(:role_guid, :guid), - :user_id, - Sequel.as(SPACE_OR_ORGANIZATION_NOT_SPECIFIED, :organization_id), - :space_id, - :created_at, - :updated_at), - all: true, - from_self: false - ).union( - SpaceAuditor.select( - Sequel.as(VCAP::CloudController::RoleTypes::SPACE_AUDITOR, :type), - Sequel.as(:role_guid, :guid), - :user_id, - Sequel.as(SPACE_OR_ORGANIZATION_NOT_SPECIFIED, :organization_id), - :space_id, - :created_at, - :updated_at), - all: true, - from_self: false - ).union( - SpaceSupporter.select( - Sequel.as(VCAP::CloudController::RoleTypes::SPACE_SUPPORTER, :type), - Sequel.as(:role_guid, :guid), - :user_id, - Sequel.as(SPACE_OR_ORGANIZATION_NOT_SPECIFIED, :organization_id), - :space_id, - :created_at, - :updated_at), - all: true, - from_self: false - ).union( - SpaceManager.select( - Sequel.as(VCAP::CloudController::RoleTypes::SPACE_MANAGER, :type), - Sequel.as(:role_guid, :guid), - :user_id, - Sequel.as(SPACE_OR_ORGANIZATION_NOT_SPECIFIED, :organization_id), - :space_id, - :created_at, - :updated_at), - all: true, - from_self: false - ).from_self - ) + # Creates UNIONed dataset and supports building a dataset with filters (i.e. WHERE conditions) per role (i.e. + # individual table). + class RoleDataset + class << self + def model_by_role(role) + case role + when RoleTypes::ORGANIZATION_USER + OrganizationUser + when RoleTypes::ORGANIZATION_AUDITOR + OrganizationAuditor + when RoleTypes::ORGANIZATION_BILLING_MANAGER + OrganizationBillingManager + when RoleTypes::ORGANIZATION_MANAGER + OrganizationManager + when RoleTypes::SPACE_AUDITOR + SpaceAuditor + when RoleTypes::SPACE_SUPPORTER + SpaceSupporter + when RoleTypes::SPACE_DEVELOPER + SpaceDeveloper + when RoleTypes::SPACE_MANAGER + SpaceManager + else + raise "Invalid role type: #{role}" + end + end + + def dataset_from_model_and_filters(model, filters) + filters.inject(model) do |dataset, filter| + dataset.where(filter) + end + end + + def dataset_with_select(dataset, role) + ds = dataset.select(Sequel.as(role, :type), Sequel.as(:role_guid, :guid)) + ds = if RoleTypes::ORGANIZATION_ROLES.include?(role) + ds.select_append(:organization_id, Sequel.as(SPACE_OR_ORGANIZATION_NOT_SPECIFIED, :space_id)) + else + ds.select_append(Sequel.as(SPACE_OR_ORGANIZATION_NOT_SPECIFIED, :organization_id), :space_id) + end + ds.select_append(:user_id, :created_at, :updated_at) + end + + def datasets_for_individual_roles(filters_per_role) + RoleTypes::ALL_ROLES.map do |role| + model = model_by_role(role) + filters = filters_per_role[role] || [] + dataset = dataset_from_model_and_filters(model, filters) + dataset_with_select(dataset, role) + end + end + def unioned_dataset(datasets) + datasets.inject do |dataset, ds| + dataset.union(ds, all: true, from_self: false) + end + end + + def build(filters_per_role={}) + datasets = datasets_for_individual_roles(filters_per_role) + unioned_dataset(datasets) + end + end + end + + # Sequel allows to create models based on datasets. The following is a dataset that unions all the individual roles + # tables and labels each row with a `type` column based on which table it came from. + class Role < Sequel::Model(RoleDataset.build.from_self) many_to_one :user, key: :user_id many_to_one :organization, key: :organization_id many_to_one :space, key: :space_id @@ -113,30 +90,146 @@ def space_guid end def for_space? - VCAP::CloudController::RoleTypes::SPACE_ROLES.include?(type) + RoleTypes::SPACE_ROLES.include?(type) end def model_class - case type - when VCAP::CloudController::RoleTypes::SPACE_MANAGER - SpaceManager - when VCAP::CloudController::RoleTypes::SPACE_AUDITOR - SpaceAuditor - when VCAP::CloudController::RoleTypes::SPACE_DEVELOPER - SpaceDeveloper - when VCAP::CloudController::RoleTypes::SPACE_SUPPORTER - SpaceSupporter - when VCAP::CloudController::RoleTypes::ORGANIZATION_USER - OrganizationUser - when VCAP::CloudController::RoleTypes::ORGANIZATION_AUDITOR - OrganizationAuditor - when VCAP::CloudController::RoleTypes::ORGANIZATION_BILLING_MANAGER - OrganizationBillingManager - when VCAP::CloudController::RoleTypes::ORGANIZATION_MANAGER - OrganizationManager + RoleDataset.model_by_role(type) + end + end + + # rubocop:disable Metrics/BlockLength + Role.dataset_module do + def first(*cond) + raise 'Use where(cond).first instead' unless cond.empty? + + super + end + + # Customized Role model/dataset behavior when filtering: + # - Replace dataset source with new dataset built with filters per role. + # - Cache previous filters to supported chained method calls. + # - Treat invocations with (virtual row) block as before, i.e. apply to overall dataset. + def where(*cond) + return super if block_given? + + filters_per_role = self.cache_get(:filters_per_role) || init_filters([]) + + Array.wrap(cond).each do |condition| + filters = case condition + when Hash + hash_condition_to_filters(condition) + when Sequel::SQL::BooleanExpression + boolean_expression_to_filters(condition) + else + raise "Unsupported condition type: #{condition.class}" + end + + append_filters(filters_per_role, filters) + end + + dataset = self.from(RoleDataset.build(filters_per_role)) + dataset.cache_set(:filters_per_role, filters_per_role) + dataset + end + + private + + INCLUDE = { 1 => 1 }.freeze + EXCLUDE = { 1 => 0 }.freeze + + def init_filters(default) + RoleTypes::ALL_ROLES.each_with_object({}) do |role, filters| + filters[role] = default.dup + end + end + + def append_filters(all_filters, new_filters) + RoleTypes::ALL_ROLES.each do |role| + all_filters[role] << new_filters[role] + end + end + + def remove_table_name(identifier) + case identifier + when String, Symbol + identifier.to_s.remove(/.*__/).to_sym + when Sequel::SQL::QualifiedIdentifier + identifier.column.to_sym else - raise Error.new("Invalid role type: #{type}") + raise "Unsupported identifier type: #{identifier.class}" + end + end + + def adapt_column_name(column_name) + case column_name + when :guid + :role_guid + else + column_name + end + end + + def adapt_identifier(identifier) + column_name = remove_table_name(identifier) + adapt_column_name(column_name) + end + + def apply_filter_to_roles(all_filters, roles, new_filter) + roles.each do |role| + all_filters[role]&.merge!(new_filter) + end + end + + def adapt_boolean_expression(expression) + args = expression.args.map do |arg| + case arg + when Sequel::SQL::BooleanExpression + adapt_boolean_expression(arg) + when Sequel::SQL::QualifiedIdentifier + column_name = adapt_identifier(arg) + raise "Unsupported column: #{column_name}" unless [:created_at, :updated_at].include?(column_name) + + Sequel::SQL::Identifier.new(column_name) + else + arg + end + end + Sequel::SQL::BooleanExpression.new(expression.op, *args) + end + + def hash_condition_to_filters(condition) + filters = init_filters({}) + + condition.transform_keys! { |key| adapt_identifier(key) } + + condition.each do |column_name, filter| + case column_name + when :type + roles = Array.wrap(filter) + apply_filter_to_roles(filters, roles, INCLUDE) + apply_filter_to_roles(filters, RoleTypes::ALL_ROLES - roles, EXCLUDE) + when :organization_id + apply_filter_to_roles(filters, RoleTypes::ORGANIZATION_ROLES, { organization_id: filter }) + apply_filter_to_roles(filters, RoleTypes::SPACE_ROLES, EXCLUDE) + when :space_id + apply_filter_to_roles(filters, RoleTypes::ORGANIZATION_ROLES, EXCLUDE) + apply_filter_to_roles(filters, RoleTypes::SPACE_ROLES, { space_id: filter }) + when :role_guid, :user_id + apply_filter_to_roles(filters, RoleTypes::ALL_ROLES, { column_name => filter }) + else + raise "Unsupported column: #{column_name}" + end + end + + filters + end + + def boolean_expression_to_filters(expression) + RoleTypes::ALL_ROLES.each_with_object({}) do |role, filters| + filters[role] = adapt_boolean_expression(expression) end end end + # rubocop:enable Metrics/BlockLength end diff --git a/spec/unit/models/runtime/role_spec.rb b/spec/unit/models/runtime/role_spec.rb index 09eb2681a11..0f59137ee4c 100644 --- a/spec/unit/models/runtime/role_spec.rb +++ b/spec/unit/models/runtime/role_spec.rb @@ -5,12 +5,15 @@ module VCAP::CloudController it { is_expected.to have_timestamp_columns } context 'when there are roles' do - let!(:organization_user) { OrganizationUser.make } - let!(:organization_manager) { OrganizationManager.make } + let(:user) { User.make } + let(:org) { Organization.make } + let(:space) { Space.make } + let!(:organization_user) { OrganizationUser.make(user: user, organization: org) } + let!(:organization_manager) { OrganizationManager.make(organization: org) } let!(:organization_billing_manager) { OrganizationBillingManager.make } let!(:organization_auditor) { OrganizationAuditor.make } - let!(:space_developer) { SpaceDeveloper.make } - let!(:space_auditor) { SpaceAuditor.make } + let!(:space_developer) { SpaceDeveloper.make(user: user, space: space) } + let!(:space_auditor) { SpaceAuditor.make(space: space) } let!(:space_manager) { SpaceManager.make } let!(:space_supporter) { SpaceSupporter.make } @@ -28,6 +31,35 @@ module VCAP::CloudController expect(roles[VCAP::CloudController::RoleTypes::SPACE_MANAGER]).to be_a_guid expect(roles[VCAP::CloudController::RoleTypes::SPACE_SUPPORTER]).to be_a_guid end + + context 'optimized SQL queries' do + before do + OrganizationUser.make + end + + it 'works for different filters' do + expect { + expect(Role.where(type: VCAP::CloudController::RoleTypes::ORGANIZATION_USER).count).to eq(2) + expect(Role.where(guid: organization_manager.guid).count).to eq(1) + expect(Role.where(user_id: user.id).count).to eq(2) + expect(Role.where(organization_id: org.id).count).to eq(2) + expect(Role.where(space_id: space.id).count).to eq(2) + }.to have_queried_db_times(/((\bwhere\b).*?){8}/i, 5) # SQL statement has eight WHERE conditions, one per UNIONed table. + end + + it 'works for combined filters' do + expect { + expect(Role.where(type: VCAP::CloudController::RoleTypes::ORGANIZATION_USER).where(organization_id: org.id).count).to eq(1) + expect(Role.where(user_id: user.id).where(space_id: space.id).count).to eq(1) + }.to have_queried_db_times(/((\bwhere\b).*?){8}/i, 2) # SQL statement has eight WHERE conditions, one per UNIONed table. + end + + it 'works for filters applied with table name prefix' do + expect { + expect(Role.where(t1__guid: organization_billing_manager.guid).count).to eq(1) + }.to have_queried_db_times(/((\bwhere\b).*?){8}/i, 1) # SQL statement has eight WHERE conditions, one per UNIONed table. + end + end end end end