Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace CAD *Ranks table calculation with SQL window function #10491

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 18 additions & 51 deletions lib/auxiliary_data_computation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,58 +50,25 @@ def self.compute_rank_tables
%w(average RanksAverage ConciseAverageResults),
].each do |field, table_name, concise_table_name|
DbHelper.with_temp_table(table_name) do |temp_table_name|
current_country_by_wca_id = Person.current.pluck(:wca_id, :countryId).to_h
# Get all personal records (note: people that changed their country appear once for each country).
personal_records_with_event = ActiveRecord::Base.connection.execute <<-SQL
SELECT eventId, personId, countryId, continentId, min(#{field}) value
FROM #{concise_table_name}
GROUP BY personId, countryId, continentId, eventId
ORDER BY eventId, value
ActiveRecord::Base.connection.execute <<-SQL
INSERT INTO #{temp_table_name} (personId, eventId, best, worldRank, continentRank, countryRank)
WITH best_per_region AS (
SELECT eventId, personId, countryId, continentId, min(#{field}) `value`
FROM #{concise_table_name}
GROUP BY personId, countryId, continentId, eventId
), personal_bests AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY eventId, personId ORDER BY value) AS really_best
FROM best_per_region
)
SELECT
personId, eventId, `value`,
RANK() OVER(PARTITION BY eventId ORDER BY `value`) AS worldRank,
RANK() OVER(PARTITION BY eventId, continentId ORDER BY `value`) AS continentRank,
RANK() OVER(PARTITION BY eventId, countryId ORDER BY `value`) AS countryRank
FROM personal_bests
WHERE really_best = 1
ORDER BY eventId, `value`
SQL
personal_records_with_event.group_by(&:first).each do |event_id, personal_records|
personal_rank = Hash.new { |h, k| h[k] = {} }
ranked = Hash.new { |h, k| h[k] = {} }
counter = Hash.new(0)
current_rank = Hash.new(0)
previous_value = {}
personal_records.each do |_, person_id, country_id, continent_id, value|
# Update the region states (unless we have ranked this person already,
# e.g. 2008SEAR01 twice in North America and World because of his two countries).
["World", continent_id, country_id].each do |region|
next if ranked[region][person_id]
counter[region] += 1
# As we ordered by value it can either be greater or tie the previous one.
current_rank[region] = counter[region] if previous_value[region].nil? || value > previous_value[region]
previous_value[region] = value
ranked[region][person_id] = true
end
cached_country = Country.c_find(current_country_by_wca_id[person_id])
# The only known case where this happens if current_country_by_wca_id[person_id] is nil,
# in other words, the personId from the Concise*Results table is not found in the Persons table.
# In the past, this has occurred when temporary results have been inserted for newcomers.
next if cached_country.nil?
# Set the person's data (first time the current location is matched).
personal_rank[person_id][:best] ||= value
personal_rank[person_id][:world_rank] ||= current_rank["World"]
if continent_id == cached_country.continentId
personal_rank[person_id][:continent_rank] ||= current_rank[continent_id]
end
if country_id == cached_country.id
personal_rank[person_id][:country_rank] ||= current_rank[country_id]
end
end
values = personal_rank.map do |person_id, rank_data|
# NOTE: continent_rank and country_rank may be not present because of a country change, in such case we default to 0.
"('#{person_id}', '#{event_id}', #{rank_data[:best]}, #{rank_data[:world_rank]}, #{rank_data[:continent_rank] || 0}, #{rank_data[:country_rank] || 0})"
end
# Insert 500 rows at once to avoid running into too long query.
values.each_slice(500) do |values_subset|
ActiveRecord::Base.connection.execute <<-SQL
INSERT INTO #{temp_table_name} (personId, eventId, best, worldRank, continentRank, countryRank) VALUES
#{values_subset.join(",\n")}
SQL
end
end
end
end
end
Expand Down
Loading