ver 1.2.0

asitav-sen · Oct 16, 2022 · 335269f · 335269f
1 parent 943a627
commit 335269f
Show file tree

Hide file tree

Showing 12 changed files with 283 additions and 9 deletions.
diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths
@@ -18,6 +18,8 @@
 /Users/scarecrow/Datascience/Personal Projects/Packages/KoboconnectR/tests/testthat/test-kobo_df_download.R="C92CC245"
 /Users/scarecrow/Datascience/Personal Projects/Packages/KoboconnectR/tests/testthat/test-kobo_export_create.R="DF94B895"
 /Users/scarecrow/Datascience/Personal Projects/Packages/KoboconnectR/tests/testthat/test-kobo_exports.R="70210422"
+/Users/scarecrow/Datascience/Personal Projects/Packages/KoboconnectR/tests/testthat/test-kobo_media_downloader.R="C7CD36C8"
+/Users/scarecrow/Datascience/Personal Projects/Packages/KoboconnectR/tests/testthat/test-kobo_xls_dl.R="E10A4CA3"
 /Users/scarecrow/Datascience/Personal Projects/Packages/KoboconnectR/tests/testthat/test-kobotools_api.R="32336AF9"
 /Users/scarecrow/Datascience/Personal Projects/Packages/KoboconnectR/tests/testthat/test-kobotools_kpi_data.R="E1CED08D"
 /Users/scarecrow/Datascience/Personal Projects/Packages/KoboconnectR/vignettes/.gitignore="EAC491AA"

diff --git a/.gitignore b/.gitignore
@@ -11,3 +11,4 @@ LICENSE
 README.Rmd
 tests/
 CRAN-SUBMISSION
+media/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -3,7 +3,7 @@ Type: Package
 Title: Download Data from Kobotoolbox to R
 URL: https://github.com/asitav-sen/KoboconnectR
 BugReports: https://github.com/asitav-sen/koboconnectR/issues
-Version: 1.1.1
+Version: 1.2.0
 Authors@R: person("Asitav ", "Sen", email = "[email protected]",
                   role = c("aut", "cre","cph"))
 Description: Wrapper for 'Kobotoolbox' APIs ver 2 mentioned at <https://support.kobotoolbox.org/api.html>, to download data from 'Kobotoolbox' to R. Small and simple package that adds immense convenience for the data professionals using 'Kobotoolbox'.
@@ -19,7 +19,8 @@ Imports:
     openssl,
     R6,
     methods,
-    dplyr
+    dplyr,
+    readxl
 Suggests: 
     knitr,
     rmarkdown,

diff --git a/NAMESPACE b/NAMESPACE
@@ -4,6 +4,8 @@ export(get_kobo_token)
 export(kobo_df_download)
 export(kobo_export_create)
 export(kobo_exports)
+export(kobo_media_downloader)
+export(kobo_xls_dl)
 export(kobotools_api)
 export(kobotools_kpi_data)
 import(R6)
@@ -24,4 +26,6 @@ importFrom(httr,stop_for_status)
 importFrom(httr,timeout)
 importFrom(httr,warn_for_status)
 importFrom(jsonlite,fromJSON)
+importFrom(readxl,read_excel)
+importFrom(utils,download.file)
 importFrom(utils,read.csv)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+
+# KoboconnectR 1.2.0
+
+Added new function `kobo_media_downloader` that downloads media from the kobo assets.
+
 # KoboconnectR 1.1.1
 
 Resolved issues of API not reacting to parameters. For example, setting `include_media_url` to "true" did not actually include the medial url. 

diff --git a/R/Koboconnect.R b/R/Koboconnect.R
@@ -216,7 +216,7 @@ kobo_exports <- function(url="kobo.humanitarianresponse.info", uname="", pwd="",
 #' @param pwd is the password of the account
 #' @param assetid is the id of the asset for which the export is to be created
 #' @param all takes logical value in string format. Used to specify whether fields from all form versions will be included in the export.Acceptable values are "true" or "false". Default value is "false".
-#' @param lang takes the language. For e.g. "English (en)".
+#' @param lang takes the language. For e.g. "English (en)". For "XML Values as headers", use '_xml'.
 #' @param hierarchy takes logical value in string format. Used to specify whether the group hierarchy will be displayed in labels. Acceptable values are "true" or "false". Default value is "false".
 #' @param grp_sep is the group separator. Default value is "/".
 #' @param include_grp defines whether or not to include groups. Default value is "true".
@@ -279,7 +279,7 @@ kobo_export_create <- function(url="kobo.humanitarianresponse.info", uname="", p
 #' @param pwd is the password of the account
 #' @param assetid is the id of the asset for which the export is to be created
 #' @param all takes logical value in string format. Used to specify whether fields from all form versions will be included in the export.Acceptable values are "true" or "false". Default value is "false".
-#' @param lang takes the language. For e.g. "English (en)".
+#' @param lang takes the language. For e.g. "English (en)". For "XML Values as headers", use '_xml'.
 #' @param hierarchy takes logical value in string format. Used to specify whether the group hierarchy will be displayed in labels. Acceptable values are "true" or "false". Default value is "false".
 #' @param grp_sep is the group separator. Default value is "/".
 #' @param include_grp defines whether or not to include groups. Default value is "true".
@@ -336,3 +336,125 @@ kobo_df_download <- function(url="kobo.humanitarianresponse.info", uname="", pwd
 
 }
 
+
+#'  Downloads media data from Kobotoolbox
+#'
+#'@description
+#' `kobo_media_downloader` downloads media from data downloaded using `kobo_df_download`. Loops through media columns and downloads files individually.`
+#'
+#'@details
+#' The function creates an export of survey data in 'csv'. If successful, it attempts to download the data and and return a data frame.
+#'
+#' @param url The `[url]` of kobotoolbox Default is "kobo.humanitarianresponse.info".
+#' @param uname is username of your kobotoolbox account
+#' @param pwd is the password of the account
+#' @param assetid is the id of the asset for which the export is to be created
+#' @param fsep is the separator of the downloaded csv file. In most of the cases, it is ";", which is the default. However,
+#' @param sleep is the sleep time between API actions. For example, it takes time to download an export. But R does not wait for the download to finish before going to next step. Hence the need to provide a break between consecutive API actions. Default value is 2 (seconds).
+#' @param identifier is the key using with the columns with URL is identified. Default value is "URL" because in most of the cases, the columns
+#' containing the URL values end with "URL". Please note that any other column name with similar value may cause error.
+#' @param timeoutval is the timeout value in seconds to download the media files. Default is 300 seconds.
+#' @param destfolder is the folder where the media is to be stored.
+#'
+#' @return The function returns a data frame of data downloaded from 'Kobotoolbox'.
+#'
+#'
+#' @importFrom httr POST content authenticate progress DELETE GET
+#' @importFrom jsonlite fromJSON
+#' @importFrom utils read.csv download.file
+#'
+#' @export
+
+kobo_media_downloader <- function(url="kobo.humanitarianresponse.info",uname,pwd, assetid, fsep=";", sleep=2, identifier="URL", timeoutval=300, destfolder="media"){
+  dat<-kobo_df_download(url=url,uname = uname,
+                   pwd=pwd, assetid = assetid,
+                   lang = "_default", sleep=sleep, fsep=fsep)
+
+  print("Please note that this function loops over the URLs and downloads the individual files. This process can be slow and
+        some files may fail to download due to timeout issues. Downloading a zipped file is not supported using API, yet in Kobotoolbox.")
+
+  cnamesdat<-colnames(dat)
+
+  urlcols<-cnamesdat[grepl(paste0("*",identifier),cnamesdat)]
+  options(timeout = max(timeoutval, getOption("timeout")))
+
+  if (!file.exists(destfolder)){
+    dir.create(destfolder)
+  }
+
+  for(i in 1:length(urlcols)){
+    fname<-paste0("./",destfolder,"/",urlcols[i],"_",seq(1:length(dat[,urlcols[i]])))
+    download.file(dat[,urlcols[i]],fname, method="libcurl")
+  }
+
+  return(TRUE)
+
+}
+
+#'  Downloads data (xls type) from Kobotoolbox
+#'
+#'@description
+#' `kobo_xls_dl` is a wrapper for kobotoolbox API `https://[url]/exports/..`
+#'
+#'@details
+#' The function creates an export of survey data in 'xls'. If successful, it attempts to download the data and and return a data frame (reading using `readxl::read_excel`).
+#'
+#' @param url The `[url]` of kobotoolbox Default is "kobo.humanitarianresponse.info".
+#' @param uname is username of your kobotoolbox account
+#' @param pwd is the password of the account
+#' @param assetid is the id of the asset for which the export is to be created
+#' @param all takes logical value in string format. Used to specify whether fields from all form versions will be included in the export.Acceptable values are "true" or "false". Default value is "false".
+#' @param lang takes the language. For e.g. "English (en)". For "XML Values as headers", use '_xml'.
+#' @param hierarchy takes logical value in string format. Used to specify whether the group hierarchy will be displayed in labels. Acceptable values are "true" or "false". Default value is "false".
+#' @param grp_sep is the group separator. Default value is "/".
+#' @param include_grp defines whether or not to include groups. Default value is "true".
+#' @param multi_sel is used to specify the display of multiple_select-type responses. Valid inputs include "both", "summary" or "details". Default is "both".
+#' @param fields is an array of column names to be included in the export (including their group hierarchy). Valid inputs include:
+#' An array containing any string value that matches the XML column name,
+#' An empty array which will result in all columns being included,
+#' If "fields" is not included in the "export_settings", all columns will be included in the export
+#'
+#' @param media_url This will include an additional column for media-type questions ("question_name_URL") with the URL link to the hosted file. Valid inputs are "true" or "false". Default value is true.
+#' @param sub_ids is an array of submission ids that will filter exported submissions to only the specified array of ids. Valid inputs include an array containing integer values or an empty array.
+#' @param sleep is the sleep time between API actions. For example, it takes time to download an export. But R does not wait for the download to finish before going to next step. Hence the need to provide a break between consecutive API actions. Default value is 2 (seconds).
+#'
+#' @return The function returns a data frame of data downloaded from 'Kobotoolbox'.
+#'
+#'
+#' @importFrom httr POST content authenticate progress DELETE GET
+#' @importFrom jsonlite fromJSON
+#' @importFrom utils read.csv
+#' @importFrom readxl read_excel
+#'
+#' @export
+kobo_xls_dl<- function(url="kobo.humanitarianresponse.info", uname="", pwd="",
+                       assetid="", all="false", lang="_default",
+                       hierarchy="false", include_grp="true",grp_sep="/",
+                       multi_sel="both", media_url="true", fields=NULL, sub_ids=NULL, sleep=2){
+
+  new_export_details<-export_creator(url=url, uname=uname, pwd=pwd,
+                                     assetid=assetid, type= "xls", all=all, lang=lang,
+                                     hierarchy=hierarchy, include_grp=include_grp,grp_sep=grp_sep,
+                                     multi_sel=multi_sel, fields=fields, media_url=media_url, sub_ids=sub_ids, sleep=sleep)
+
+  Sys.sleep(sleep)
+
+  if(is.null(new_export_details)){
+    print("export creation was not successful")
+    return(NULL)
+  } else{
+    dff<-export_downloader(new_export_details[[1]], uname=uname, pwd=pwd, sleep=sleep, type="xls")
+    deleteact<-DELETE(url=paste0(url,"/api/v2/assets/",assetid,"/exports/",new_export_details[[2]],"/"),
+                      authenticate(user=uname, password = pwd), progress())
+    while(is.na(deleteact$status_code) | is.null(deleteact$status_code)){
+      print("Attempting export deletion \n")
+    }
+    warn_for_status(deleteact,"delete export. Please delete manually.")
+    if(deleteact$status_code==204) print("Export deleted from server")
+    return(dff)
+  }
+
+
+}
+
+
diff --git a/R/other.R b/R/other.R
@@ -65,7 +65,7 @@ export_creator <- function(url="kobo.humanitarianresponse.info", uname="", pwd="
              fields=fields,
              flatten= flatten,
              xls_types_as_text=xls_typ_as_text,
-             #include_media_url=media_url,
+             include_media_url=media_url,
              submission_ids=sub_ids,
              query=qry
            ),
@@ -121,13 +121,22 @@ export_creator <- function(url="kobo.humanitarianresponse.info", uname="", pwd="
 
 }
 
-export_downloader<-function(exp.url, fsep, uname, pwd, sleep){
+export_downloader<-function(exp.url, fsep, uname, pwd, sleep, type="csv"){
   tmp_file <- tempfile()
+  print("Password")
+  print(pwd)
   df<-httr::GET(exp.url, httr::authenticate(user=uname, password = pwd),progress())
   Sys.sleep(sleep)
   dff<-httr::content(df, type="raw",encoding = "UTF-8")
   Sys.sleep(sleep)
   writeBin(dff, tmp_file)
-  dff<-read.csv(tmp_file, sep=fsep)
+  if(type=="csv"){
+    dff<-read.csv(tmp_file, sep=fsep)
+  }
+
+  if(type=="xls"){
+    dff<-readxl::read_excel(tmp_file)
+  }
+
   return(dff)
 }
diff --git a/man/kobo_df_download.Rd b/man/kobo_df_download.Rd
diff --git a/man/kobo_export_create.Rd b/man/kobo_export_create.Rd
diff --git a/man/kobo_media_downloader.Rd b/man/kobo_media_downloader.Rd
diff --git a/man/kobo_xls_dl.Rd b/man/kobo_xls_dl.Rd
diff --git a/vignettes/Usage.Rmd b/vignettes/Usage.Rmd
@@ -124,7 +124,26 @@ kobo_df_download(uname = "username",pwd="password", assetid = "asset",
                                           lang = "English (en)")
 ```
 
+### Downloading xls file directly
 
+Works on the same principle as that in `kobo_df_download`, except the exported file is in `excel` format.
+
+```{r eval=FALSE, include=TRUE}
+kobo_xls_dl(uname = "username",pwd="password", assetid = "asset",
+                                          lang = "English (en)")
+```
+
+
+
+### Downloading Media Files
+
+This function downloads the file using `kobo_df_download` then finds the columns with media links and then downloads the files in a loop. This process is time consuming and you may face errors due to timeouts and server load issues.
+
+```{r eval=FALSE, include=TRUE}
+kobo_media_downloader(uname = "uid",pwd="password", assetid = "assetid")
+```
+
+In case do not want to use this function, use the data downloaded using `kobo_df_download` and find the columns where media links exist. Then loop over the values in the column (download using the function `download.file()`).
 
 ## Issues and Suggestions
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,3 +11,4 @@ LICENSE @@
     README.Rmd
     tests/
     CRAN-SUBMISSION
+    media/