From b2df4aa6262b2156ee28b3edfd8eb4f792f6e473 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 13 Nov 2024 22:56:02 +0100 Subject: [PATCH] experiment with new dummy anndata (#193) * refactor obsvar test * make sure dummy-anndata is installed * add nolint * use fix * avoid reticulate conversion * also test a couple of things in R * split up tests --- .github/workflows/R-CMD-check.yaml | 2 +- tests/testthat/test-roundtrip-obsvar.R | 164 ++++++++++++------------- 2 files changed, 81 insertions(+), 85 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 565cf475..cb01cbd3 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -61,7 +61,7 @@ jobs: if (!reticulate:::miniconda_exists()) { reticulate::install_miniconda() } - reticulate::py_install(c("anndata", "scanpy"), pip = TRUE) + reticulate::py_install(c("anndata", "scanpy", "dummy-anndata"), pip = TRUE) shell: Rscript {0} - uses: r-lib/actions/check-r-package@v2 diff --git a/tests/testthat/test-roundtrip-obsvar.R b/tests/testthat/test-roundtrip-obsvar.R index 1b7f9ed5..a57051cd 100644 --- a/tests/testthat/test-roundtrip-obsvar.R +++ b/tests/testthat/test-roundtrip-obsvar.R @@ -1,104 +1,100 @@ skip_if_no_anndata() -skip_if_not_installed("hdf5r") +skip_if_not_installed("reticulate") -data <- generate_dataset(10L, 20L) +library(reticulate) +testthat::skip_if_not( + reticulate::py_module_available("dummy_anndata"), + message = "Python dummy_anndata module not available for testing" +) -test_names <- names(data$obs) +ad <- reticulate::import("anndata", convert = FALSE) +da <- reticulate::import("dummy_anndata", convert = FALSE) +pd <- reticulate::import("pandas", convert = FALSE) +bi <- reticulate::import_builtins() -# TODO: re-enable tests -test_names <- test_names[!grepl("_with_nas", test_names)] - -# TODO: re-enable tests -# NOTE: I think this doesn't work in cran anndata -test_names <- test_names[test_names != "logical_with_nas"] +test_names <- names(da$vector_generators) for (name in test_names) { - test_that(paste0("roundtrip with obs and var '", name, "'"), { - # create anndata - ad <- AnnData( - X = data$X, - obs = data$obs[, name, drop = FALSE], - var = data$var[, name, drop = FALSE] - ) - - # write to file - filename <- withr::local_file(tempfile(fileext = ".h5ad")) - write_h5ad(ad, filename) - - # read from file - ad_new <- read_h5ad(filename, to = "HDF5AnnData") - - # expect slots are unchanged + # first generate a python h5ad + adata_py <- da$generate_dataset( + x_type = "generate_float_matrix", + obs_types = list(name), + var_types = list(name), + layer_types = list(), + obsm_types = list(), + varm_types = list(), + obsp_types = list(), + varp_types = list(), + uns_types = list() + ) + # remove uns - workaround for https://github.com/data-intuitive/dummy-anndata/issues/2 + adata_py$uns <- bi$dict() + # TODO: remove X + + # create a couple of paths + file_py <- withr::local_file(tempfile(paste0("anndata_py_", name), fileext = ".h5ad")) + file_r <- withr::local_file(tempfile(paste0("anndata_r_", name), fileext = ".h5ad")) + + # write to file + adata_py$write_h5ad(file_py) + + test_that(paste0("reading an AnnData with obs and var '", name, "' works"), { + adata_r <- read_h5ad(file_py, to = "HDF5AnnData") expect_equal( - ad_new$obs[[name]], - data$obs[[name]], - ignore_attr = TRUE, - tolerance = 1e-6 + adata_r$shape(), + unlist(reticulate::py_to_r(adata_py$shape)) ) expect_equal( - ad_new$var[[name]], - data$var[[name]], - ignore_attr = TRUE, - tolerance = 1e-6 - ) - }) -} - -for (name in test_names) { - test_that(paste0("reticulate->hdf5 with obs and var '", name, "'"), { - ad <- anndata::AnnData( - obs = data$obs[, name, drop = FALSE], - var = data$var[, name, drop = FALSE] - ) - - # write to file - filename <- withr::local_file(tempfile(fileext = ".h5ad")) - ad$write_h5ad(filename) - - # read from file - ad_new <- HDF5AnnData$new(filename) - - # expect slots are unchanged - expect_equal( - ad_new$obs[[name]], - data$obs[[name]], - tolerance = 1e-6 + adata_r$obs_keys(), + py_to_r(adata_py$obs_keys()) ) expect_equal( - ad_new$var[[name]], - data$var[[name]], - tolerance = 1e-6 + adata_r$var_keys(), + py_to_r(adata_py$var_keys()) ) - }) -} -for (name in test_names) { - test_that(paste0("hdf5->reticulate with obs and var '", name, "'"), { - # write to file - filename <- withr::local_file(tempfile(fileext = ".h5ad")) + # check that the print output is the same + str_r <- capture.output(print(adata_r)) + str_py <- capture.output(print(adata_py)) + expect_equal(str_r, str_py) + + # if we would test the objects at this stage, + # we're also testing reticulate's conversion + # nolint start + # expect_equal( + # adata_r$obs[[name]], + # py_to_r(adata_py$obs[[name]]), + # tolerance = 1e-6 + # ) + # expect_equal( + # adata_r$var[[name]], + # py_to_r(adata_py$var[[name]]), + # tolerance = 1e-6 + # ) + # nolint end + }) - # create anndata - ad <- AnnData( - obs = data$obs[, name, drop = FALSE], - var = data$var[, name, drop = FALSE] - ) - write_h5ad(ad, filename) + test_that(paste0("Writing an AnnData with obs and var '", name, "' works"), { + adata_r <- read_h5ad(file_py, to = "InMemoryAnnData") + write_h5ad(adata_r, file_r) # read from file - ad_new <- anndata::read_h5ad(filename) - - # expect slots are unchanged - expect_equal( - ad_new$obs[[name]], - data$obs[[name]], - ignore_attr = TRUE, - tolerance = 1e-6 + adata_py2 <- ad$read_h5ad(file_r) + + # expect that the objects are the same + zz <- pd$testing$assert_frame_equal( + adata_py2$obs, + adata_py$obs, + check_dtype = FALSE, + check_exact = FALSE ) - expect_equal( - ad_new$var[[name]], - data$var[[name]], - ignore_attr = TRUE, - tolerance = 1e-6 + expect_null(reticulate::py_to_r(zz)) + pd$testing$assert_frame_equal( + adata_py2$var, + adata_py$var, + check_dtype = FALSE, + check_exact = FALSE ) + expect_null(reticulate::py_to_r(zz)) }) }