Skip to content

Commit

Permalink
1603:3:12 #2: download Wikipedia/Wikidata languages maping and Admini…
Browse files Browse the repository at this point in the history
…strative Level 0 (country/terrotiries)
  • Loading branch information
fititnt committed Jan 10, 2022
1 parent fb326e7 commit 75264ef
Show file tree
Hide file tree
Showing 2 changed files with 182 additions and 4 deletions.
101 changes: 98 additions & 3 deletions officinam/999999999/1603_3_12.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash
#===============================================================================
#
# FILE: 1603_3_12.sh
Expand Down Expand Up @@ -27,6 +27,9 @@ set -e

ROOTDIR="$(pwd)"

# shellcheck source=999999999.lib.sh
. "$ROOTDIR"/999999999/999999999.lib.sh

# @see https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples
# @see https://github.com/maxlath/wikibase-cli
# @see - https://www.wikidata.org/wiki/Template:Wikidata_list
Expand All @@ -47,10 +50,102 @@ ROOTDIR="$(pwd)"
# OPTIONAL { ?country wdt:P3916 ?unescot. }
# OPTIONAL { ?country wdt:P9948 ?usciafb. }
# OPTIONAL { ?country wdt:P901 ?usfips4. }
# OPTIONAL { ?country wdt:P8714 ?gadm. }
# OPTIONAL { ?country wdt:P8714 ?gadm. }

# SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". }
# }
echo "TODO 1603_3_12.sh"

#######################################
# Return list of administrative level 0 codes ("country/territory" codes)
#
# Globals:
# None
# Arguments:
# None
# Outputs:
# csvfile (stdout)
#######################################
1603_3_12_wikipedia_adm0() {
# fontem_archivum=
objectivum_archivum="${ROOTDIR}/1603/3/1603_3__adm0.csv"
objectivum_archivum_temporarium="${ROOTDIR}/1603/3/1603_3__adm0.TEMP.csv"

if [ -z "$(stale_archive "$objectivum_archivum")" ]; then return 0; fi

echo "${FUNCNAME[0]} stale data on [$objectivum_archivum], refreshing..."

curl --header "Accept: text/csv" --silent --show-error \
--get https://query.wikidata.org/sparql --data-urlencode query='
SELECT ?country ?unm49 ?iso3166n ?iso3166p1a2 ?iso3166p1a3 ?osmrelid ?unescot ?usciafb ?usfips4 ?gadm
WHERE
{
?country wdt:P31 wd:Q6256 ;
OPTIONAL { ?country wdt:P2082 ?unm49. }
OPTIONAL { ?country wdt:P299 ?iso3166n. }
OPTIONAL { ?country wdt:P297 ?iso3166p1a2. }
OPTIONAL { ?country wdt:P298 ?iso3166p1a3. }
OPTIONAL { ?country wdt:P402 ?osmrelid. }
OPTIONAL { ?country wdt:P3916 ?unescot. }
OPTIONAL { ?country wdt:P9948 ?usciafb. }
OPTIONAL { ?country wdt:P901 ?usfips4. }
OPTIONAL { ?country wdt:P8714 ?gadm. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". }
}
' >"$objectivum_archivum_temporarium"

file_update_if_necessary csv "$objectivum_archivum_temporarium" "$objectivum_archivum"
}


#######################################
# Return Wikipedia/Wikidata language codes (used to know how many
# languages do wikipedia have)
#
# Globals:
# None
# Arguments:
# None
# Outputs:
# csvfile (stdout)
#######################################
1603_3_12_wikipedia_language_codes() {
# fontem_archivum=
objectivum_archivum="${ROOTDIR}/1603/3/1603_3__languages.csv"
objectivum_archivum_temporarium="${ROOTDIR}/1603/3/1603_3__languages.TEMP.csv"

if [ -z "$(stale_archive "$objectivum_archivum")" ]; then return 0; fi

echo "${FUNCNAME[0]} stale data on [$objectivum_archivum], refreshing..."

curl --header "Accept: text/csv" --silent --show-error \
--get https://query.wikidata.org/sparql --data-urlencode query='
SELECT ?wd ?wmCode ?iso6391 ?iso6392 ?iso6393 ?iso6396 ?native ?label {
VALUES (?language_type) { (wd:Q34770) (wd:Q25295) }
?wd wdt:P31/wdt:P279* ?language_type
{ ?wd wdt:P218 ?iso6391 . } UNION
{ ?wd wdt:P219 ?iso6392 . } UNION
{ ?wd wdt:P220 ?iso6393 . } UNION
{ ?wd wdt:P221 ?iso6396 . }
# OPTIONAL { ?wd wdt:P424 ?wmCode . }
?wd wdt:P424 ?wmCode .
OPTIONAL { ?wd wdt:P1705 ?native }
OPTIONAL {
?wd rdfs:label ?label
FILTER(LANG(?label) = "en")
}
}
order by (?wmCode)
' >"$objectivum_archivum_temporarium"

file_update_if_necessary csv "$objectivum_archivum_temporarium" "$objectivum_archivum"
}


1603_3_12_wikipedia_language_codes

1603_3_12_wikipedia_adm0

exit 0
85 changes: 84 additions & 1 deletion officinam/999999999/999999999.lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ NUMERORDINATIO_DATUM="${ROOTDIR}/999999/999999"

#######################################
# Return if a path (or a file) don't exist or if did not changed recently.
# Use case: reload functions that depend on action of older ones
# Use case: reload functions that depend on action of older ones.
# Opposite: stale_archive
#
# Globals:
# None
Expand All @@ -58,6 +59,88 @@ changed_recently() {
echo "1"
}

#######################################
# Return "1" if file is too old (default 24 hours). Use case: fetch
# data from outside sources.
#
# Opposite: changed_recently
#
# Globals:
# None
# Arguments:
# path_or_file
# maximum_time (default: 24h)
# Outputs:
# 1 (if need reload, Void if no reload need)
#######################################
stale_archive() {
path_or_file="$1"
maximum_time="${2:-86400}"
if [ -e "$path_or_file" ]; then
changes=$(find "$path_or_file" -mmin +"$maximum_time")
if [ -z "$changes" ]; then
return 0
fi
fi
echo "1"
}

#######################################
# Return if a path (or a file) don't exist or if did not changed recently.
# Use case: reload functions that depend on action of older ones
#
# Globals:
# None
# Arguments:
# path_or_file
# maximum_time (default: 5 minutes)
# Outputs:
# 1 (if need reload, Void if no reload need)
#######################################
file_update_if_necessary() {
formatum_archivum="$1"
fontem_archivum="$2"
objectivum_archivum="$3"

# echo "starting file_update_if_necessary ..."

case "${formatum_archivum}" in
csv)
is_valid=$(csvclean --dry-run "$fontem_archivum")
if [ "$is_valid" != "No errors." ]; then
echo "$is_valid"
return 1
fi
;;
*)
echo "Lint not implemented for this case. Skiping"
;;
esac

echo "middle file_update_if_necessary ..."

if [ -f "$objectivum_archivum" ]; then
sha256sum "$objectivum_archivum"
sha256sum "$fontem_archivum"
# TODO: review this logic
if test "$(cmp --silent "$fontem_archivum" "$objectivum_archivum")"; then
echo "INFO: already equal."
echo " [$fontem_archivum]"
echo " [$objectivum_archivum]"
rm "$fontem_archivum"
else
echo "not equal"
rm "$objectivum_archivum"
mv "$fontem_archivum" "$objectivum_archivum"
fi
else
mv "$fontem_archivum" "$objectivum_archivum"
fi

# echo "done file_update_if_necessary ..."
return 0
}

#######################################
# contains(string, substring)
#
Expand Down

0 comments on commit 75264ef

Please sign in to comment.