From 335269f37f3356828e42e80cfb21d9e02d778fd1 Mon Sep 17 00:00:00 2001 From: asitav-sen Date: Sun, 16 Oct 2022 20:41:08 +0200 Subject: [PATCH] ver 1.2.0 --- .Rproj.user/shared/notebooks/paths | 2 + .gitignore | 1 + DESCRIPTION | 5 +- NAMESPACE | 4 + NEWS.md | 5 ++ R/Koboconnect.R | 126 ++++++++++++++++++++++++++++- R/other.R | 15 +++- man/kobo_df_download.Rd | 2 +- man/kobo_export_create.Rd | 2 +- man/kobo_media_downloader.Rd | 47 +++++++++++ man/kobo_xls_dl.Rd | 64 +++++++++++++++ vignettes/Usage.Rmd | 19 +++++ 12 files changed, 283 insertions(+), 9 deletions(-) create mode 100644 man/kobo_media_downloader.Rd create mode 100644 man/kobo_xls_dl.Rd diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index 66441a8..55136ea 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -18,6 +18,8 @@ /Users/scarecrow/Datascience/Personal Projects/Packages/KoboconnectR/tests/testthat/test-kobo_df_download.R="C92CC245" /Users/scarecrow/Datascience/Personal Projects/Packages/KoboconnectR/tests/testthat/test-kobo_export_create.R="DF94B895" /Users/scarecrow/Datascience/Personal Projects/Packages/KoboconnectR/tests/testthat/test-kobo_exports.R="70210422" +/Users/scarecrow/Datascience/Personal Projects/Packages/KoboconnectR/tests/testthat/test-kobo_media_downloader.R="C7CD36C8" +/Users/scarecrow/Datascience/Personal Projects/Packages/KoboconnectR/tests/testthat/test-kobo_xls_dl.R="E10A4CA3" /Users/scarecrow/Datascience/Personal Projects/Packages/KoboconnectR/tests/testthat/test-kobotools_api.R="32336AF9" /Users/scarecrow/Datascience/Personal Projects/Packages/KoboconnectR/tests/testthat/test-kobotools_kpi_data.R="E1CED08D" /Users/scarecrow/Datascience/Personal Projects/Packages/KoboconnectR/vignettes/.gitignore="EAC491AA" diff --git a/.gitignore b/.gitignore index 1d052c5..5d9cb25 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ LICENSE README.Rmd tests/ CRAN-SUBMISSION +media/ diff --git a/DESCRIPTION b/DESCRIPTION index bc7472f..c16b8fc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,7 @@ Type: Package Title: Download Data from Kobotoolbox to R URL: https://github.com/asitav-sen/KoboconnectR BugReports: https://github.com/asitav-sen/koboconnectR/issues -Version: 1.1.1 +Version: 1.2.0 Authors@R: person("Asitav ", "Sen", email = "hello@asitavsen.com", role = c("aut", "cre","cph")) Description: Wrapper for 'Kobotoolbox' APIs ver 2 mentioned at , to download data from 'Kobotoolbox' to R. Small and simple package that adds immense convenience for the data professionals using 'Kobotoolbox'. @@ -19,7 +19,8 @@ Imports: openssl, R6, methods, - dplyr + dplyr, + readxl Suggests: knitr, rmarkdown, diff --git a/NAMESPACE b/NAMESPACE index a324f9f..2f7a85c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,8 @@ export(get_kobo_token) export(kobo_df_download) export(kobo_export_create) export(kobo_exports) +export(kobo_media_downloader) +export(kobo_xls_dl) export(kobotools_api) export(kobotools_kpi_data) import(R6) @@ -24,4 +26,6 @@ importFrom(httr,stop_for_status) importFrom(httr,timeout) importFrom(httr,warn_for_status) importFrom(jsonlite,fromJSON) +importFrom(readxl,read_excel) +importFrom(utils,download.file) importFrom(utils,read.csv) diff --git a/NEWS.md b/NEWS.md index 28da71d..549ff24 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ + +# KoboconnectR 1.2.0 + +Added new function `kobo_media_downloader` that downloads media from the kobo assets. + # KoboconnectR 1.1.1 Resolved issues of API not reacting to parameters. For example, setting `include_media_url` to "true" did not actually include the medial url. diff --git a/R/Koboconnect.R b/R/Koboconnect.R index 12672a7..0b5c717 100644 --- a/R/Koboconnect.R +++ b/R/Koboconnect.R @@ -216,7 +216,7 @@ kobo_exports <- function(url="kobo.humanitarianresponse.info", uname="", pwd="", #' @param pwd is the password of the account #' @param assetid is the id of the asset for which the export is to be created #' @param all takes logical value in string format. Used to specify whether fields from all form versions will be included in the export.Acceptable values are "true" or "false". Default value is "false". -#' @param lang takes the language. For e.g. "English (en)". +#' @param lang takes the language. For e.g. "English (en)". For "XML Values as headers", use '_xml'. #' @param hierarchy takes logical value in string format. Used to specify whether the group hierarchy will be displayed in labels. Acceptable values are "true" or "false". Default value is "false". #' @param grp_sep is the group separator. Default value is "/". #' @param include_grp defines whether or not to include groups. Default value is "true". @@ -279,7 +279,7 @@ kobo_export_create <- function(url="kobo.humanitarianresponse.info", uname="", p #' @param pwd is the password of the account #' @param assetid is the id of the asset for which the export is to be created #' @param all takes logical value in string format. Used to specify whether fields from all form versions will be included in the export.Acceptable values are "true" or "false". Default value is "false". -#' @param lang takes the language. For e.g. "English (en)". +#' @param lang takes the language. For e.g. "English (en)". For "XML Values as headers", use '_xml'. #' @param hierarchy takes logical value in string format. Used to specify whether the group hierarchy will be displayed in labels. Acceptable values are "true" or "false". Default value is "false". #' @param grp_sep is the group separator. Default value is "/". #' @param include_grp defines whether or not to include groups. Default value is "true". @@ -336,3 +336,125 @@ kobo_df_download <- function(url="kobo.humanitarianresponse.info", uname="", pwd } + +#' Downloads media data from Kobotoolbox +#' +#'@description +#' `kobo_media_downloader` downloads media from data downloaded using `kobo_df_download`. Loops through media columns and downloads files individually.` +#' +#'@details +#' The function creates an export of survey data in 'csv'. If successful, it attempts to download the data and and return a data frame. +#' +#' @param url The `[url]` of kobotoolbox Default is "kobo.humanitarianresponse.info". +#' @param uname is username of your kobotoolbox account +#' @param pwd is the password of the account +#' @param assetid is the id of the asset for which the export is to be created +#' @param fsep is the separator of the downloaded csv file. In most of the cases, it is ";", which is the default. However, +#' @param sleep is the sleep time between API actions. For example, it takes time to download an export. But R does not wait for the download to finish before going to next step. Hence the need to provide a break between consecutive API actions. Default value is 2 (seconds). +#' @param identifier is the key using with the columns with URL is identified. Default value is "URL" because in most of the cases, the columns +#' containing the URL values end with "URL". Please note that any other column name with similar value may cause error. +#' @param timeoutval is the timeout value in seconds to download the media files. Default is 300 seconds. +#' @param destfolder is the folder where the media is to be stored. +#' +#' @return The function returns a data frame of data downloaded from 'Kobotoolbox'. +#' +#' +#' @importFrom httr POST content authenticate progress DELETE GET +#' @importFrom jsonlite fromJSON +#' @importFrom utils read.csv download.file +#' +#' @export + +kobo_media_downloader <- function(url="kobo.humanitarianresponse.info",uname,pwd, assetid, fsep=";", sleep=2, identifier="URL", timeoutval=300, destfolder="media"){ + dat<-kobo_df_download(url=url,uname = uname, + pwd=pwd, assetid = assetid, + lang = "_default", sleep=sleep, fsep=fsep) + + print("Please note that this function loops over the URLs and downloads the individual files. This process can be slow and + some files may fail to download due to timeout issues. Downloading a zipped file is not supported using API, yet in Kobotoolbox.") + + cnamesdat<-colnames(dat) + + urlcols<-cnamesdat[grepl(paste0("*",identifier),cnamesdat)] + options(timeout = max(timeoutval, getOption("timeout"))) + + if (!file.exists(destfolder)){ + dir.create(destfolder) + } + + for(i in 1:length(urlcols)){ + fname<-paste0("./",destfolder,"/",urlcols[i],"_",seq(1:length(dat[,urlcols[i]]))) + download.file(dat[,urlcols[i]],fname, method="libcurl") + } + + return(TRUE) + +} + +#' Downloads data (xls type) from Kobotoolbox +#' +#'@description +#' `kobo_xls_dl` is a wrapper for kobotoolbox API `https://[url]/exports/..` +#' +#'@details +#' The function creates an export of survey data in 'xls'. If successful, it attempts to download the data and and return a data frame (reading using `readxl::read_excel`). +#' +#' @param url The `[url]` of kobotoolbox Default is "kobo.humanitarianresponse.info". +#' @param uname is username of your kobotoolbox account +#' @param pwd is the password of the account +#' @param assetid is the id of the asset for which the export is to be created +#' @param all takes logical value in string format. Used to specify whether fields from all form versions will be included in the export.Acceptable values are "true" or "false". Default value is "false". +#' @param lang takes the language. For e.g. "English (en)". For "XML Values as headers", use '_xml'. +#' @param hierarchy takes logical value in string format. Used to specify whether the group hierarchy will be displayed in labels. Acceptable values are "true" or "false". Default value is "false". +#' @param grp_sep is the group separator. Default value is "/". +#' @param include_grp defines whether or not to include groups. Default value is "true". +#' @param multi_sel is used to specify the display of multiple_select-type responses. Valid inputs include "both", "summary" or "details". Default is "both". +#' @param fields is an array of column names to be included in the export (including their group hierarchy). Valid inputs include: +#' An array containing any string value that matches the XML column name, +#' An empty array which will result in all columns being included, +#' If "fields" is not included in the "export_settings", all columns will be included in the export +#' +#' @param media_url This will include an additional column for media-type questions ("question_name_URL") with the URL link to the hosted file. Valid inputs are "true" or "false". Default value is true. +#' @param sub_ids is an array of submission ids that will filter exported submissions to only the specified array of ids. Valid inputs include an array containing integer values or an empty array. +#' @param sleep is the sleep time between API actions. For example, it takes time to download an export. But R does not wait for the download to finish before going to next step. Hence the need to provide a break between consecutive API actions. Default value is 2 (seconds). +#' +#' @return The function returns a data frame of data downloaded from 'Kobotoolbox'. +#' +#' +#' @importFrom httr POST content authenticate progress DELETE GET +#' @importFrom jsonlite fromJSON +#' @importFrom utils read.csv +#' @importFrom readxl read_excel +#' +#' @export +kobo_xls_dl<- function(url="kobo.humanitarianresponse.info", uname="", pwd="", + assetid="", all="false", lang="_default", + hierarchy="false", include_grp="true",grp_sep="/", + multi_sel="both", media_url="true", fields=NULL, sub_ids=NULL, sleep=2){ + + new_export_details<-export_creator(url=url, uname=uname, pwd=pwd, + assetid=assetid, type= "xls", all=all, lang=lang, + hierarchy=hierarchy, include_grp=include_grp,grp_sep=grp_sep, + multi_sel=multi_sel, fields=fields, media_url=media_url, sub_ids=sub_ids, sleep=sleep) + + Sys.sleep(sleep) + + if(is.null(new_export_details)){ + print("export creation was not successful") + return(NULL) + } else{ + dff<-export_downloader(new_export_details[[1]], uname=uname, pwd=pwd, sleep=sleep, type="xls") + deleteact<-DELETE(url=paste0(url,"/api/v2/assets/",assetid,"/exports/",new_export_details[[2]],"/"), + authenticate(user=uname, password = pwd), progress()) + while(is.na(deleteact$status_code) | is.null(deleteact$status_code)){ + print("Attempting export deletion \n") + } + warn_for_status(deleteact,"delete export. Please delete manually.") + if(deleteact$status_code==204) print("Export deleted from server") + return(dff) + } + + +} + + diff --git a/R/other.R b/R/other.R index c99dd89..4e300aa 100644 --- a/R/other.R +++ b/R/other.R @@ -65,7 +65,7 @@ export_creator <- function(url="kobo.humanitarianresponse.info", uname="", pwd=" fields=fields, flatten= flatten, xls_types_as_text=xls_typ_as_text, - #include_media_url=media_url, + include_media_url=media_url, submission_ids=sub_ids, query=qry ), @@ -121,13 +121,22 @@ export_creator <- function(url="kobo.humanitarianresponse.info", uname="", pwd=" } -export_downloader<-function(exp.url, fsep, uname, pwd, sleep){ +export_downloader<-function(exp.url, fsep, uname, pwd, sleep, type="csv"){ tmp_file <- tempfile() + print("Password") + print(pwd) df<-httr::GET(exp.url, httr::authenticate(user=uname, password = pwd),progress()) Sys.sleep(sleep) dff<-httr::content(df, type="raw",encoding = "UTF-8") Sys.sleep(sleep) writeBin(dff, tmp_file) - dff<-read.csv(tmp_file, sep=fsep) + if(type=="csv"){ + dff<-read.csv(tmp_file, sep=fsep) + } + + if(type=="xls"){ + dff<-readxl::read_excel(tmp_file) + } + return(dff) } diff --git a/man/kobo_df_download.Rd b/man/kobo_df_download.Rd index 6977453..ca00c3e 100644 --- a/man/kobo_df_download.Rd +++ b/man/kobo_df_download.Rd @@ -33,7 +33,7 @@ kobo_df_download( \item{all}{takes logical value in string format. Used to specify whether fields from all form versions will be included in the export.Acceptable values are "true" or "false". Default value is "false".} -\item{lang}{takes the language. For e.g. "English (en)".} +\item{lang}{takes the language. For e.g. "English (en)". For "XML Values as headers", use '_xml'.} \item{hierarchy}{takes logical value in string format. Used to specify whether the group hierarchy will be displayed in labels. Acceptable values are "true" or "false". Default value is "false".} diff --git a/man/kobo_export_create.Rd b/man/kobo_export_create.Rd index 76ddb9b..8995bc4 100644 --- a/man/kobo_export_create.Rd +++ b/man/kobo_export_create.Rd @@ -37,7 +37,7 @@ kobo_export_create( \item{all}{takes logical value in string format. Used to specify whether fields from all form versions will be included in the export.Acceptable values are "true" or "false". Default value is "false".} -\item{lang}{takes the language. For e.g. "English (en)".} +\item{lang}{takes the language. For e.g. "English (en)". For "XML Values as headers", use '_xml'.} \item{hierarchy}{takes logical value in string format. Used to specify whether the group hierarchy will be displayed in labels. Acceptable values are "true" or "false". Default value is "false".} diff --git a/man/kobo_media_downloader.Rd b/man/kobo_media_downloader.Rd new file mode 100644 index 0000000..d2dad34 --- /dev/null +++ b/man/kobo_media_downloader.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Koboconnect.R +\name{kobo_media_downloader} +\alias{kobo_media_downloader} +\title{Downloads media data from Kobotoolbox} +\usage{ +kobo_media_downloader( + url = "kobo.humanitarianresponse.info", + uname, + pwd, + assetid, + fsep = ";", + sleep = 2, + identifier = "URL", + timeoutval = 300, + destfolder = "media" +) +} +\arguments{ +\item{url}{The `[url]` of kobotoolbox Default is "kobo.humanitarianresponse.info".} + +\item{uname}{is username of your kobotoolbox account} + +\item{pwd}{is the password of the account} + +\item{assetid}{is the id of the asset for which the export is to be created} + +\item{fsep}{is the separator of the downloaded csv file. In most of the cases, it is ";", which is the default. However,} + +\item{sleep}{is the sleep time between API actions. For example, it takes time to download an export. But R does not wait for the download to finish before going to next step. Hence the need to provide a break between consecutive API actions. Default value is 2 (seconds).} + +\item{identifier}{is the key using with the columns with URL is identified. Default value is "URL" because in most of the cases, the columns +containing the URL values end with "URL". Please note that any other column name with similar value may cause error.} + +\item{timeoutval}{is the timeout value in seconds to download the media files. Default is 300 seconds.} + +\item{destfolder}{is the folder where the media is to be stored.} +} +\value{ +The function returns a data frame of data downloaded from 'Kobotoolbox'. +} +\description{ +`kobo_media_downloader` downloads media from data downloaded using `kobo_df_download`. Loops through media columns and downloads files individually.` +} +\details{ +The function creates an export of survey data in 'csv'. If successful, it attempts to download the data and and return a data frame. +} diff --git a/man/kobo_xls_dl.Rd b/man/kobo_xls_dl.Rd new file mode 100644 index 0000000..8f4beb1 --- /dev/null +++ b/man/kobo_xls_dl.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Koboconnect.R +\name{kobo_xls_dl} +\alias{kobo_xls_dl} +\title{Downloads data (xls type) from Kobotoolbox} +\usage{ +kobo_xls_dl( + url = "kobo.humanitarianresponse.info", + uname = "", + pwd = "", + assetid = "", + all = "false", + lang = "_default", + hierarchy = "false", + include_grp = "true", + grp_sep = "/", + multi_sel = "both", + media_url = "true", + fields = NULL, + sub_ids = NULL, + sleep = 2 +) +} +\arguments{ +\item{url}{The `[url]` of kobotoolbox Default is "kobo.humanitarianresponse.info".} + +\item{uname}{is username of your kobotoolbox account} + +\item{pwd}{is the password of the account} + +\item{assetid}{is the id of the asset for which the export is to be created} + +\item{all}{takes logical value in string format. Used to specify whether fields from all form versions will be included in the export.Acceptable values are "true" or "false". Default value is "false".} + +\item{lang}{takes the language. For e.g. "English (en)". For "XML Values as headers", use '_xml'.} + +\item{hierarchy}{takes logical value in string format. Used to specify whether the group hierarchy will be displayed in labels. Acceptable values are "true" or "false". Default value is "false".} + +\item{include_grp}{defines whether or not to include groups. Default value is "true".} + +\item{grp_sep}{is the group separator. Default value is "/".} + +\item{multi_sel}{is used to specify the display of multiple_select-type responses. Valid inputs include "both", "summary" or "details". Default is "both".} + +\item{media_url}{This will include an additional column for media-type questions ("question_name_URL") with the URL link to the hosted file. Valid inputs are "true" or "false". Default value is true.} + +\item{fields}{is an array of column names to be included in the export (including their group hierarchy). Valid inputs include: +An array containing any string value that matches the XML column name, +An empty array which will result in all columns being included, +If "fields" is not included in the "export_settings", all columns will be included in the export} + +\item{sub_ids}{is an array of submission ids that will filter exported submissions to only the specified array of ids. Valid inputs include an array containing integer values or an empty array.} + +\item{sleep}{is the sleep time between API actions. For example, it takes time to download an export. But R does not wait for the download to finish before going to next step. Hence the need to provide a break between consecutive API actions. Default value is 2 (seconds).} +} +\value{ +The function returns a data frame of data downloaded from 'Kobotoolbox'. +} +\description{ +`kobo_xls_dl` is a wrapper for kobotoolbox API `https://[url]/exports/..` +} +\details{ +The function creates an export of survey data in 'xls'. If successful, it attempts to download the data and and return a data frame (reading using `readxl::read_excel`). +} diff --git a/vignettes/Usage.Rmd b/vignettes/Usage.Rmd index 77a9f4c..ff9d1ac 100644 --- a/vignettes/Usage.Rmd +++ b/vignettes/Usage.Rmd @@ -124,7 +124,26 @@ kobo_df_download(uname = "username",pwd="password", assetid = "asset", lang = "English (en)") ``` +### Downloading xls file directly +Works on the same principle as that in `kobo_df_download`, except the exported file is in `excel` format. + +```{r eval=FALSE, include=TRUE} +kobo_xls_dl(uname = "username",pwd="password", assetid = "asset", + lang = "English (en)") +``` + + + +### Downloading Media Files + +This function downloads the file using `kobo_df_download` then finds the columns with media links and then downloads the files in a loop. This process is time consuming and you may face errors due to timeouts and server load issues. + +```{r eval=FALSE, include=TRUE} +kobo_media_downloader(uname = "uid",pwd="password", assetid = "assetid") +``` + +In case do not want to use this function, use the data downloaded using `kobo_df_download` and find the columns where media links exist. Then loop over the values in the column (download using the function `download.file()`). ## Issues and Suggestions