Skip to content

Commit

Permalink
Merge pull request #15761 from Ladas/ultimate_batch_saving_speedup
Browse files Browse the repository at this point in the history
Ultimate batch saving speedup
  • Loading branch information
agrare authored Aug 10, 2017
2 parents 51c45fa + 60e14d1 commit 2597fbb
Show file tree
Hide file tree
Showing 5 changed files with 210 additions and 93 deletions.
12 changes: 12 additions & 0 deletions app/models/manager_refresh/inventory_collection.rb
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,8 @@ def stringify_reference(reference)
end

def manager_ref_to_cols
# TODO(lsmola) this should contain the polymorphic _type, otherwise the IC with polymorphic unique key will get
# conflicts
# Convert attributes from unique key to actual db cols
manager_ref.map do |ref|
association_to_foreign_key_mapping[ref] || ref
Expand Down Expand Up @@ -911,6 +913,14 @@ def fixed_foreign_keys
@fixed_foreign_keys_cache
end

def serializable_keys?(all_attribute_keys)
return @serializable_keys_cache unless @serializable_keys_cache.nil?

@serializable_keys_cache = all_attribute_keys.any? do |key|
model_class.type_for_attribute(key.to_s).respond_to?(:coder)
end
end

def base_class_name
return "" unless model_class

Expand Down Expand Up @@ -986,10 +996,12 @@ def db_collection_for_comparison
end

def db_collection_for_comparison_for(manager_uuids_set)
# TODO(lsmola) this should have the build_multi_selection_condition, like in the method above
full_collection_for_comparison.where(manager_ref.first => manager_uuids_set)
end

def db_collection_for_comparison_for_complement_of(manager_uuids_set)
# TODO(lsmola) this should have the build_multi_selection_condition, like in the method above
full_collection_for_comparison.where.not(manager_ref.first => manager_uuids_set)
end

Expand Down
18 changes: 10 additions & 8 deletions app/models/manager_refresh/inventory_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ def initialize(inventory_collection, data)
end

def manager_uuid
# TODO(lsmola) should we have a separate function for uuid containing foreign keys? Probably yes, since it could
# speed up the ID fetching.
id_with_keys(manager_ref)
end

Expand Down Expand Up @@ -50,36 +52,36 @@ def attributes(inventory_collection_scope = nil)
data[key] = value.compact.map(&:load).compact
# We can use built in _ids methods to assign array of ids into has_many relations. So e.g. the :key_pairs=
# relation setter will become :key_pair_ids=
attributes_for_saving[key.to_s.singularize + "_ids"] = data[key].map(&:id).compact.uniq
attributes_for_saving[(key.to_s.singularize + "_ids").to_sym] = data[key].map(&:id).compact.uniq
elsif loadable?(value) || inventory_collection_scope.association_to_foreign_key_mapping[key]
# Lets fill also the original data, so other InventoryObject referring to this attribute gets the right
# result
data[key] = value.load if value.respond_to?(:load)
if (foreign_key = inventory_collection_scope.association_to_foreign_key_mapping[key])
# We have an association to fill, lets fill also the :key, cause some other InventoryObject can refer to it
record_id = data[key].try(:id)
attributes_for_saving[foreign_key] = record_id
record_id = data[key].try(:id)
attributes_for_saving[foreign_key.to_sym] = record_id

if (foreign_type = inventory_collection_scope.association_to_foreign_type_mapping[key])
# If we have a polymorphic association, we need to also fill a base class name, but we want to nullify it
# if record_id is missing
base_class = data[key].try(:base_class_name) || data[key].class.try(:base_class).try(:name)
attributes_for_saving[foreign_type] = record_id ? base_class : nil
attributes_for_saving[foreign_type.to_sym] = record_id ? base_class : nil
end
elsif data[key].kind_of?(::ManagerRefresh::InventoryObject)
# We have an association to fill but not an Activerecord association, so e.g. Ancestry, lets just load
# it here. This way of storing ancestry is ineffective in DB call count, but RAM friendly
attributes_for_saving[key] = data[key].base_class_name.constantize.find_by(:id => data[key].id)
attributes_for_saving[key.to_sym] = data[key].base_class_name.constantize.find_by(:id => data[key].id)
else
# We have a normal attribute to fill
attributes_for_saving[key] = data[key]
attributes_for_saving[key.to_sym] = data[key]
end
else
attributes_for_saving[key] = value
attributes_for_saving[key.to_sym] = value
end
end

attributes_for_saving.symbolize_keys
attributes_for_saving
end

def assign_attributes(attributes)
Expand Down
60 changes: 38 additions & 22 deletions app/models/manager_refresh/save_collection/saver/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,33 @@ class Base
include Vmdb::Logging
include ManagerRefresh::SaveCollection::Saver::SqlHelper

attr_reader :inventory_collection
attr_reader :inventory_collection, :association

def initialize(inventory_collection)
@inventory_collection = inventory_collection

# Private attrs
@unique_index_keys = inventory_collection.manager_ref_to_cols
@primary_key = inventory_collection.model_class.primary_key
@arel_primary_key = inventory_collection.model_class.arel_attribute(primary_key)
@unique_index_keys = inventory_collection.manager_ref_to_cols.map(&:to_sym)
@unique_index_keys_to_s = inventory_collection.manager_ref_to_cols.map(&:to_s)
@select_keys = [@primary_key] + @unique_index_keys_to_s
@unique_db_primary_keys = Set.new
@unique_db_indexes = Set.new

# TODO(lsmola) do I need to reload every time? Also it should be enough to clear the associations.
inventory_collection.parent.reload if inventory_collection.parent
@association = inventory_collection.db_collection_for_comparison

# Right now ApplicationRecordIterator in association is used for targeted refresh. Given the small amount of
# records flowing through there, we probably don't need to optimize that association to fetch a pure SQL.
@pure_sql_records_fetching = !inventory_collection.use_ar_object? && !@association.kind_of?(ManagerRefresh::ApplicationRecordIterator)

@batch_size_for_persisting = 10_000

@batch_size = @pure_sql_records_fetching ? @batch_size_for_persisting : inventory_collection.batch_size
@record_key_method = @pure_sql_records_fetching ? :pure_sql_record_key : :ar_record_key
@select_keys_indexes = @select_keys.each_with_object({}).with_index { |(key, obj), index| obj[key.to_s] = index }
end

def save_inventory_collection!
Expand All @@ -22,16 +40,14 @@ def save_inventory_collection!
# job. We want to do 1 :delete_complement job at 1 time, to keep to memory down.
return delete_complement if inventory_collection.all_manager_uuids.present?

# TODO(lsmola) do I need to reload every time? Also it should be enough to clear the associations.
inventory_collection.parent.reload if inventory_collection.parent
association = inventory_collection.db_collection_for_comparison

save!(association)
end

private

attr_reader :unique_index_keys, :unique_db_primary_keys, :unique_db_indexes
attr_reader :unique_index_keys, :unique_index_keys_to_s, :select_keys, :unique_db_primary_keys, :unique_db_indexes,
:primary_key, :arel_primary_key, :record_key_method, :pure_sql_records_fetching, :select_keys_indexes,
:batch_size, :batch_size_for_persisting

def save!(association)
attributes_index = {}
Expand All @@ -50,7 +66,7 @@ def save!(association)
association.find_each do |record|
index = inventory_collection.object_index_with_keys(unique_index_keys, record)

next unless assert_distinct_relation(record)
next unless assert_distinct_relation(record.id)
next unless assert_unique_record(record, index)

inventory_object = inventory_objects_index.delete(index)
Expand All @@ -62,7 +78,7 @@ def save!(association)
delete_record!(record) if inventory_collection.delete_allowed?
else
# Record was found in the DB and sent for saving, we will be updating the DB.
update_record!(record, hash, inventory_object) if assert_referential_integrity(hash, inventory_object)
update_record!(record, hash, inventory_object) if assert_referential_integrity(hash)
end
end
end
Expand All @@ -77,7 +93,7 @@ def save!(association)
inventory_objects_index.each do |index, inventory_object|
hash = attributes_index.delete(index)

create_record!(hash, inventory_object) if assert_referential_integrity(hash, inventory_object)
create_record!(hash, inventory_object) if assert_referential_integrity(hash)
end
end
end
Expand All @@ -94,8 +110,8 @@ def inventory_collection_details
"strategy: #{inventory_collection.strategy}, saver_strategy: #{inventory_collection.saver_strategy}, targeted: #{inventory_collection.targeted?}"
end

def batch_size
inventory_collection.batch_size
def record_key(record, key)
record.public_send(key)
end

def delete_complement
Expand Down Expand Up @@ -132,8 +148,8 @@ def assert_unique_record(_record, _index)
true
end

def assert_distinct_relation(record)
if unique_db_primary_keys.include?(record.id) # Include on Set is O(1)
def assert_distinct_relation(primary_key_value)
if unique_db_primary_keys.include?(primary_key_value) # Include on Set is O(1)
# Change the InventoryCollection's :association or :arel parameter to return distinct results. The :through
# relations can return the same record multiple times. We don't want to do SELECT DISTINCT by default, since
# it can be very slow.
Expand All @@ -145,17 +161,17 @@ def assert_distinct_relation(record)
raise("Please update :association or :arel for #{inventory_collection} to return a DISTINCT result. ")
end
else
unique_db_primary_keys << record.id
unique_db_primary_keys << primary_key_value
end
true
end

def assert_referential_integrity(hash, inventory_object)
inventory_object.inventory_collection.fixed_foreign_keys.each do |x|
next unless hash[x].blank?
subject = "#{inventory_object} of #{inventory_object.inventory_collection} because of missing foreign key #{x} for "\
"#{inventory_object.inventory_collection.parent.class.name}:"\
"#{inventory_object.inventory_collection.parent.try(:id)}"
def assert_referential_integrity(hash)
inventory_collection.fixed_foreign_keys.each do |x|
next unless hash[x].nil?
subject = "#{hash} of #{inventory_collection} because of missing foreign key #{x} for "\
"#{inventory_collection.parent.class.name}:"\
"#{inventory_collection.parent.try(:id)}"
if Rails.env.production?
_log.warn("Referential integrity check violated, ignoring #{subject}")
return false
Expand Down Expand Up @@ -186,7 +202,7 @@ def assign_attributes_for_update!(hash, update_time)
end

def assign_attributes_for_create!(hash, create_time)
hash[:type] = inventory_collection.model_class.name if inventory_collection.supports_sti? && hash[:type].blank?
hash[:type] = inventory_collection.model_class.name if inventory_collection.supports_sti? && hash[:type].nil?
hash[:created_on] = create_time if inventory_collection.supports_timestamps_on_variant?
hash[:created_at] = create_time if inventory_collection.supports_timestamps_at_variant?
assign_attributes_for_update!(hash, create_time)
Expand Down
Loading

0 comments on commit 2597fbb

Please sign in to comment.