-
Notifications
You must be signed in to change notification settings - Fork 17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merge data files before lookup #98
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,8 @@ module Data | |
class Base < Hash | ||
attr_reader :locale | ||
|
||
@@doc_cache = {} | ||
|
||
def initialize(locale) | ||
@locale = locale | ||
end | ||
|
@@ -53,11 +55,45 @@ def xpath(sources) | |
end | ||
|
||
def doc | ||
@doc ||= Nokogiri::XML(File.read(path)) | ||
@@doc_cache[paths.hash] ||= merge_paths(paths) | ||
end | ||
|
||
def paths | ||
@paths ||= begin | ||
if locale | ||
Dir[File.join(Cldr::Export::Data.dir, "*", "#{Cldr::Export.from_i18n(locale)}.xml")].sort & Cldr::Export::Data.paths_by_root["ldml"] | ||
else | ||
Cldr::Export::Data.paths_by_root["supplementalData"] | ||
end | ||
end | ||
end | ||
|
||
def path | ||
@path ||= "#{Cldr::Export::Data.dir}/main/#{Cldr::Export.from_i18n(locale)}.xml" | ||
private | ||
|
||
def merge_paths(paths_to_merge) | ||
# Some parts (`ldml`, `ldmlBCP47` amd `supplementalData`) of CLDR data require that you merge all the | ||
# files with the same root element before doing lookups. | ||
# Ref: https://www.unicode.org/reports/tr35/tr35.html#XML_Format | ||
# | ||
# The return of this method is a merged XML Nokogiri document. | ||
# Note that it technically is no longer compliant with the CLDR `ldml.dtd`, since: | ||
# * it has repeated elements | ||
# * the <identity> elements no longer refer to the filename | ||
# | ||
# However, this is not an issue, since #select will find all of the matches from each of the repeated elements, | ||
# and the <identity> elements are not important to us / make no sense when combined together. | ||
return Nokogiri::XML('') if paths_to_merge.empty? | ||
|
||
rest = paths_to_merge[1..paths_to_merge.size - 1] | ||
rest.inject(Nokogiri::XML(File.read(paths_to_merge.first))) do |result, path| | ||
next_doc = Nokogiri::XML(File.read(path)) | ||
|
||
next_doc.root.children.each do |child| | ||
result.root.add_child(child) | ||
end | ||
|
||
result | ||
end | ||
Comment on lines
+88
to
+96
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feel like there is a cleaner/clearer way to do this part, but quickly playing around couldn't quickly figure out something better 🤷♂️ |
||
end | ||
end | ||
end | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,17 +11,16 @@ def initialize(*args) | |
end | ||
|
||
def rule_groups | ||
if File.exist?(path) | ||
select("rbnf/rulesetGrouping").map do |grouping_node| | ||
{ | ||
:type => grouping_node.attribute("type").value, | ||
:ruleset => (grouping_node / "ruleset").map do |ruleset_node| | ||
rule_set(ruleset_node) | ||
end | ||
} | ||
end | ||
else | ||
{} | ||
grouping_nodes = select("rbnf/rulesetGrouping") | ||
return {} if grouping_nodes.empty? | ||
Comment on lines
+14
to
+15
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I made this change since it didn't make sense to check for the existence of This had a side-effect of cleaning up a edge-case bug:
|
||
|
||
grouping_nodes.map do |grouping_node| | ||
{ | ||
:type => grouping_node.attribute("type").value, | ||
:ruleset => (grouping_node / "ruleset").map do |ruleset_node| | ||
rule_set(ruleset_node) | ||
end | ||
} | ||
end | ||
end | ||
|
||
|
@@ -61,11 +60,6 @@ def cast_value(val) | |
def fix_rule(rule) | ||
rule.gsub(/\A'/, '').gsub("←", '<').gsub("→", '>') | ||
end | ||
|
||
def path | ||
@path ||= "#{Cldr::Export::Data.dir}/rbnf/#{Cldr::Export.from_i18n(locale)}.xml" | ||
end | ||
|
||
end | ||
end | ||
end | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,8 +66,8 @@ def fix_rule(rule) | |
gsub("↔", '<>') | ||
end | ||
|
||
def path | ||
transform_file | ||
def paths | ||
[transform_file] | ||
end | ||
|
||
end | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# encoding: utf-8 | ||
|
||
require File.expand_path(File.join(File.dirname(__FILE__) + '/../../test_helper')) | ||
|
||
class TestBase < Test::Unit::TestCase | ||
test "#paths finds all the language-dependent data files" do | ||
expected = [ | ||
"annotations/af.xml", | ||
"annotationsDerived/af.xml", | ||
"casing/af.xml", | ||
"collation/af.xml", | ||
"main/af.xml", | ||
"rbnf/af.xml", | ||
"subdivisions/af.xml", | ||
].map {|f| File.join(Cldr::Export::Data.dir, f)} | ||
assert_equal expected, Cldr::Export::Data::Base.new('af').send(:paths) | ||
end | ||
|
||
test "#paths finds all the supplemental data files" do | ||
expected_non_transform_files = [ | ||
"supplemental/attributeValueValidity.xml", | ||
"supplemental/characters.xml", | ||
"supplemental/coverageLevels.xml", | ||
"supplemental/dayPeriods.xml", | ||
"supplemental/genderList.xml", | ||
"supplemental/languageGroup.xml", | ||
"supplemental/languageInfo.xml", | ||
"supplemental/likelySubtags.xml", | ||
"supplemental/metaZones.xml", | ||
"supplemental/numberingSystems.xml", | ||
"supplemental/ordinals.xml", | ||
"supplemental/pluralRanges.xml", | ||
"supplemental/plurals.xml", | ||
"supplemental/rgScope.xml", | ||
"supplemental/subdivisions.xml", | ||
"supplemental/supplementalData.xml", | ||
"supplemental/supplementalMetadata.xml", | ||
"supplemental/windowsZones.xml", | ||
"validity/currency.xml", | ||
"validity/language.xml", | ||
"validity/region.xml", | ||
"validity/script.xml", | ||
"validity/subdivision.xml", | ||
"validity/unit.xml", | ||
"validity/variant.xml", | ||
].map {|f| File.join(Cldr::Export::Data.dir, f)} | ||
|
||
supplemental_data_paths = Cldr::Export::Data::Base.new(nil).send(:paths) | ||
|
||
assert_equal expected_non_transform_files, supplemental_data_paths.reject {|p| p.include?("transforms/")} | ||
assert_not_empty supplemental_data_paths.select {|p| p.include?("transforms/")} | ||
end | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can't just use
paths_to_merge[1..]
since that's not available in Ruby 2.3, and we haven't officially dropped support for the old versions of Ruby.