From 8a7a81c966cebe8a832e805ca93547d403493c96 Mon Sep 17 00:00:00 2001 From: Zhian Kamvar Date: Sun, 4 Feb 2018 22:34:39 -0600 Subject: [PATCH 01/19] attempt to implement euclidean dist [skip ci] This does not work at the moment with bitwise.ia. For some reason, it keeps crashing. This has something to do with passing SEXP objects to C code internally, but I'm not sure what is wrong here. --- R/bitwise.r | 52 ++++++++++++++++++++++++++++++++++++++---- man/bitwise.dist.Rd | 9 +++++++- src/bitwise_distance.c | 34 +++++++++++++++++++-------- src/init.c | 4 ++-- 4 files changed, 81 insertions(+), 18 deletions(-) diff --git a/R/bitwise.r b/R/bitwise.r index bacec91c..813196d0 100644 --- a/R/bitwise.r +++ b/R/bitwise.r @@ -64,6 +64,16 @@ #' with anything. \code{FALSE} forces missing data to not match with any other #' information, \strong{including other missing data}. #' +#' @param scale_missing A logical. If \code{TRUE}, comparisons with missing +#' data is scaled up proportionally to the number of columns used by +#' multiplying the value by \code{m / (m - x)} where m is the number of +#' loci and x is the number of missing sites. This option matches the behavior +#' of base R's \code{\link[package=base]{dist}} function. +#' Defaults to \code{FALSE}. +#' +#' @param euclidean \code{logical}. if \code{TRUE}, the Euclidean distance will +#' be calculated. +#' #' @param differences_only \code{logical}. When \code{differences_only = TRUE}, #' the output will reflect the number of different loci. The default setting, #' \code{differences_only = FALSE}, reflects the number of different alleles. @@ -107,6 +117,7 @@ #' xdt #==============================================================================# bitwise.dist <- function(x, percent = TRUE, mat = FALSE, missing_match = TRUE, + scale_missing = FALSE, euclidean = FALSE, differences_only = FALSE, threads = 0){ stopifnot(inherits(x, c("genlight", "genclone", "genind", "snpclone"))) # Stop if the ploidy of the genlight object is not consistent @@ -153,14 +164,20 @@ bitwise.dist <- function(x, percent = TRUE, mat = FALSE, missing_match = TRUE, } else { - pairwise_dist <- .Call("bitwise_distance_diploid", x, missing_match, differences_only, threads) + pairwise_dist <- .Call("bitwise_distance_diploid", x, missing_match, euclidean, differences_only, threads) } dist.mat <- pairwise_dist dim(dist.mat) <- c(inds,inds) colnames(dist.mat) <- ind.names rownames(dist.mat) <- ind.names - if (percent){ - if(differences_only) + nas <- NA.posi(x) + if (scale_missing && sum(lengths(nas)) > 0) { + dist.mat <- dist.mat * missing_correction(nas, nLoc(x), mat = TRUE) + } + if (euclidean){ + dist.mat <- sqrt(dist.mat) + } else if (percent) { + if (differences_only) { dist.mat <- dist.mat/(numPairs) } @@ -169,7 +186,7 @@ bitwise.dist <- function(x, percent = TRUE, mat = FALSE, missing_match = TRUE, dist.mat <- dist.mat/(numPairs*ploid) } } - if (mat == FALSE){ + if (mat == FALSE) { dist.mat <- as.dist(dist.mat) } return(dist.mat) @@ -197,7 +214,32 @@ poppr_has_parallel <- function(){ } - +#' Calculate correction for genetic distances +#' +#' @param nas a list of missing positions per sample +#' @param nloc the number of loci +#' @param mat a logical specifying whether or not a matrix should be returned +#' (default: TRUE) +#' +#' @return an n x n matrix or a choose(n, 2) length vector of values that scale +#' from 1 to the number of loci. +#' @noRd +missing_correction <- function(nas, nloc, mat = TRUE){ + n <- length(nas) + correction <- matrix(0, nrow = n, ncol = n) + for (i in seq(n - 1)) { + for (j in seq(from = i + 1, to = n)) { + res <- length(unique(unlist(nas[c(i, j)], use.names = FALSE))) + correction[j, i] <- res -> correction[i, j] + } + } + res <- nloc/(nloc - correction) + if (mat) { + return(res) + } else { + return(res[lower.tri(res)]) + } +} #==============================================================================# #' Calculate the index of association between samples in a genlight object. diff --git a/man/bitwise.dist.Rd b/man/bitwise.dist.Rd index 460f48e9..868ec8fb 100644 --- a/man/bitwise.dist.Rd +++ b/man/bitwise.dist.Rd @@ -5,7 +5,7 @@ \title{Calculate a dissimilarity distance matrix for SNP data.} \usage{ bitwise.dist(x, percent = TRUE, mat = FALSE, missing_match = TRUE, - differences_only = FALSE, threads = 0) + scale_missing = FALSE, differences_only = FALSE, threads = 0) } \arguments{ \item{x}{a \code{\link{genlight}}, \code{\link{genind}}, @@ -24,6 +24,13 @@ location. Default set to \code{TRUE}, which forces missing data to match with anything. \code{FALSE} forces missing data to not match with any other information, \strong{including other missing data}.} +\item{scale_missing}{A logical. If \code{TRUE}, comparisons with missing +data is scaled up proportionally to the number of columns used by +multiplying the value by \code{m / (m - x)} where m is the number of +loci and x is the number of missing sites. This option matches the behavior +of base R's \code{\link[package=base]{dist}} function. +Defaults to \code{FALSE}.} + \item{differences_only}{\code{logical}. When \code{differences_only = TRUE}, the output will reflect the number of different loci. The default setting, \code{differences_only = FALSE}, reflects the number of different alleles. diff --git a/src/bitwise_distance.c b/src/bitwise_distance.c index fad3d964..15506f29 100644 --- a/src/bitwise_distance.c +++ b/src/bitwise_distance.c @@ -96,7 +96,7 @@ struct locus SEXP bitwise_distance_haploid(SEXP genlight, SEXP missing, SEXP requested_threads); -SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP differences_only, SEXP requested_threads); +SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP euclid, SEXP differences_only, SEXP requested_threads); SEXP association_index_haploid(SEXP genlight, SEXP missing, SEXP requested_threads); SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_only, SEXP requested_threads); SEXP get_pgen_matrix_genind(SEXP genind, SEXP freqs, SEXP pops, SEXP npop); @@ -108,7 +108,7 @@ char get_similarity_set(struct zygosity *ind1, struct zygosity *ind2); int get_zeros(char sim_set); int get_difference(struct zygosity *z1, struct zygosity *z2); int get_distance(struct zygosity *z1, struct zygosity *z2); -int get_distance_custom(char sim_set, struct zygosity *z1, struct zygosity *z2); +int get_distance_custom(char sim_set, struct zygosity *z1, struct zygosity *z2, int euclid); /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Calculates the pairwise differences between samples in a genlight object. The @@ -442,13 +442,14 @@ and 2 for the distance between differing homozygotes). Input: A genlight object containing samples of diploids. A boolean representing whether missing data should match (TRUE) or not. + A boolean indicating if euclidian distance should be calculated (TRUE) or not. A boolean representing whether distance (FALSE) or differences (TRUE) should be returned. An integer representing the number of threads that should be used. Output: A distance matrix representing the distance between each sample in the genlight object. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ -SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP differences_only, SEXP requested_threads) +SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP euclid, SEXP differences_only, SEXP requested_threads) { // This function calculates the raw genetic distance between samples in // a genlight object. The general flow of this function is as follows: @@ -488,6 +489,7 @@ SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP differences_only int next_missing_i; int next_missing_j; int missing_match; + int is_euclid; int only_differences; int num_threads; char mask; @@ -551,6 +553,7 @@ SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP differences_only chr_length = 0; missing_match = asLogical(missing); only_differences = asLogical(differences_only); + is_euclid = asLogical(euclid); // Loop through every genotype for(i = 0; i < num_gens - 1; i++) @@ -718,7 +721,7 @@ SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP differences_only { // If we want the actual distance, we need a more complicated analysis of // the sets. See get_distance_custom for details. - cur_distance += get_distance_custom(tmp_sim_set,&set_1,&set_2); + cur_distance += get_distance_custom(tmp_sim_set, &set_1, &set_2, is_euclid); // Rprintf("-> Current Distance: %d\n", cur_distance); } } @@ -1127,6 +1130,7 @@ Input: A genlight object containing samples of diploids. A boolean representing whether or not missing values should match. A boolean representing whether distances or differences should be counted. An integer representing the number of threads to be used. + A kludge to allow bitwise_distance_diploid to work Output: The index of association for this genlight object over the specified loci ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_only, SEXP requested_threads) @@ -1304,6 +1308,13 @@ SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_onl chr_length = 0; missing_match = asLogical(missing); only_differences = asLogical(differences_only); + + + // Get the distance matrix from bitwise_distance + PROTECT(R_dists = allocVector(INTSXP, num_gens*num_gens)); + SEXP euclid; + PROTECT(euclid = 0); + R_dists = bitwise_distance_diploid(genlight, missing, euclid, differences_only, requested_threads); // Loop through all SNP chunks #ifdef _OPENMP @@ -1446,9 +1457,6 @@ SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_onl } } - // Get the distance matrix from bitwise_distance - PROTECT(R_dists = allocVector(INTSXP, num_gens*num_gens)); - R_dists = bitwise_distance_diploid(genlight, missing, differences_only, requested_threads); // Calculate the sum and squared sum of distances between samples D = 0; @@ -1516,7 +1524,7 @@ SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_onl R_Free(vars); R_Free(M); R_Free(M2); - UNPROTECT(6); + UNPROTECT(7); return R_out; } @@ -2127,19 +2135,25 @@ Input: A char representing the similarity set between two zygosity structs Output: The total distance between two samples, such that DD/rr are a distance of 2, and Dr/rr are a distance of 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ -int get_distance_custom(char sim_set, struct zygosity *z1, struct zygosity *z2) +int get_distance_custom(char sim_set, struct zygosity *z1, struct zygosity *z2, int euclid) { int dist = 0; + int multiplier; char Hor; char S; char ch_dist; S = sim_set; Hor = z1->ch | z2->ch; + // The diploids are calculated in two phases. The first phase simply asks for + // the differences. The second asks if there are differences on both strands. + // To get euclidian values, we can multiply this by 3 so that the result is 4, + // which is 2^2. We will take the square root in R. + multiplier = (euclid) ? 3 : 1; ch_dist = Hor | S; // Force ones everywhere they are the same dist = get_zeros(S); // Add one distance for every non-shared zygosity - dist += get_zeros(ch_dist); // Add another one for every difference that has no heterozygotes + dist += get_zeros(ch_dist) * multiplier; // Add another one for every difference that has no heterozygotes return dist; } diff --git a/src/init.c b/src/init.c index 7dce8fb0..4fb3ba0b 100644 --- a/src/init.c +++ b/src/init.c @@ -10,7 +10,7 @@ /* .Call calls */ extern SEXP association_index_diploid(SEXP, SEXP, SEXP, SEXP); extern SEXP association_index_haploid(SEXP, SEXP, SEXP); -extern SEXP bitwise_distance_diploid(SEXP, SEXP, SEXP, SEXP); +extern SEXP bitwise_distance_diploid(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP bitwise_distance_haploid(SEXP, SEXP, SEXP); extern SEXP bruvo_distance(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP expand_indices(SEXP, SEXP); @@ -28,7 +28,7 @@ extern SEXP permuto(SEXP); static const R_CallMethodDef CallEntries[] = { {"association_index_diploid", (DL_FUNC) &association_index_diploid, 4}, {"association_index_haploid", (DL_FUNC) &association_index_haploid, 3}, - {"bitwise_distance_diploid", (DL_FUNC) &bitwise_distance_diploid, 4}, + {"bitwise_distance_diploid", (DL_FUNC) &bitwise_distance_diploid, 5}, {"bitwise_distance_haploid", (DL_FUNC) &bitwise_distance_haploid, 3}, {"bruvo_distance", (DL_FUNC) &bruvo_distance, 6}, {"expand_indices", (DL_FUNC) &expand_indices, 2}, From a4cf3a747f4de40524c0466248364a77707ace72 Mon Sep 17 00:00:00 2001 From: Zhian Kamvar Date: Tue, 20 Feb 2018 21:36:13 -0600 Subject: [PATCH 02/19] [skip ci] more attempt to pass euclid parameter This is not seeming to work. Every time I run it, I get either a crash or a C stack is too close to limit warning. This happens regardless if I pass the Euclid parameter to bitwise diploid or if I attempt to create it within association_index_diploid. --- R/bitwise.r | 9 +++++++-- src/bitwise_distance.c | 7 +++---- src/init.c | 4 ++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/R/bitwise.r b/R/bitwise.r index 813196d0..e2a4fb49 100644 --- a/R/bitwise.r +++ b/R/bitwise.r @@ -297,8 +297,13 @@ bitwise.ia <- function(x, missing_match=TRUE, differences_only=FALSE, threads=0) # Ensure that every SNPbin object has data for all chromosomes if (ploid == 2){ x <- fix_uneven_diploid(x) - IA <- .Call("association_index_diploid", x, missing_match, differences_only, - threads, PACKAGE = "poppr") + IA <- .Call("association_index_diploid", + genlight = x, + missing_match = missing_match, + differences_only = differences_only, + requested_threads = threads, + euclid = FALSE, + PACKAGE = "poppr") } else if(ploid == 1) { diff --git a/src/bitwise_distance.c b/src/bitwise_distance.c index 15506f29..b9f9570e 100644 --- a/src/bitwise_distance.c +++ b/src/bitwise_distance.c @@ -98,7 +98,7 @@ struct locus SEXP bitwise_distance_haploid(SEXP genlight, SEXP missing, SEXP requested_threads); SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP euclid, SEXP differences_only, SEXP requested_threads); SEXP association_index_haploid(SEXP genlight, SEXP missing, SEXP requested_threads); -SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_only, SEXP requested_threads); +SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_only, SEXP requested_threads, SEXP euclid); SEXP get_pgen_matrix_genind(SEXP genind, SEXP freqs, SEXP pops, SEXP npop); // SEXP get_pgen_matrix_genlight(SEXP genlight, SEXP window); // void fill_Pgen(double *pgen, struct locus *loci, int interval, SEXP genlight); @@ -1133,7 +1133,7 @@ Input: A genlight object containing samples of diploids. A kludge to allow bitwise_distance_diploid to work Output: The index of association for this genlight object over the specified loci ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ -SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_only, SEXP requested_threads) +SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_only, SEXP requested_threads, SEXP euclid) { // This function calculates the index of association for samples in // a genlight object. The general flow of this function is as follows: @@ -1312,8 +1312,7 @@ SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_onl // Get the distance matrix from bitwise_distance PROTECT(R_dists = allocVector(INTSXP, num_gens*num_gens)); - SEXP euclid; - PROTECT(euclid = 0); + // PROTECT(euclid = 0); R_dists = bitwise_distance_diploid(genlight, missing, euclid, differences_only, requested_threads); // Loop through all SNP chunks diff --git a/src/init.c b/src/init.c index 4fb3ba0b..d4db0072 100644 --- a/src/init.c +++ b/src/init.c @@ -8,7 +8,7 @@ */ /* .Call calls */ -extern SEXP association_index_diploid(SEXP, SEXP, SEXP, SEXP); +extern SEXP association_index_diploid(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP association_index_haploid(SEXP, SEXP, SEXP); extern SEXP bitwise_distance_diploid(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP bitwise_distance_haploid(SEXP, SEXP, SEXP); @@ -26,7 +26,7 @@ extern SEXP permute_shuff(SEXP, SEXP, SEXP); extern SEXP permuto(SEXP); static const R_CallMethodDef CallEntries[] = { - {"association_index_diploid", (DL_FUNC) &association_index_diploid, 4}, + {"association_index_diploid", (DL_FUNC) &association_index_diploid, 5}, {"association_index_haploid", (DL_FUNC) &association_index_haploid, 3}, {"bitwise_distance_diploid", (DL_FUNC) &bitwise_distance_diploid, 5}, {"bitwise_distance_haploid", (DL_FUNC) &bitwise_distance_haploid, 3}, From 4b07e116e7c92c12be4f861129b8789c9a432318 Mon Sep 17 00:00:00 2001 From: Zhian Kamvar Date: Thu, 22 Mar 2018 04:43:45 +0000 Subject: [PATCH 03/19] protect all as[A-Z] declarations [skip ci] This is documented at the very end of http://adv-r.had.co.nz/C-interface.html#c-vectors These all create R-level objects, so they need to be PROTECT()ed. --- R/bitwise.r | 1 - man/bitwise.dist.Rd | 6 +++++- src/bitwise_distance.c | 29 +++++++++++++++-------------- src/init.c | 4 ++-- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/R/bitwise.r b/R/bitwise.r index e2a4fb49..eb45f0fa 100644 --- a/R/bitwise.r +++ b/R/bitwise.r @@ -302,7 +302,6 @@ bitwise.ia <- function(x, missing_match=TRUE, differences_only=FALSE, threads=0) missing_match = missing_match, differences_only = differences_only, requested_threads = threads, - euclid = FALSE, PACKAGE = "poppr") } else if(ploid == 1) diff --git a/man/bitwise.dist.Rd b/man/bitwise.dist.Rd index 868ec8fb..c508e052 100644 --- a/man/bitwise.dist.Rd +++ b/man/bitwise.dist.Rd @@ -5,7 +5,8 @@ \title{Calculate a dissimilarity distance matrix for SNP data.} \usage{ bitwise.dist(x, percent = TRUE, mat = FALSE, missing_match = TRUE, - scale_missing = FALSE, differences_only = FALSE, threads = 0) + scale_missing = FALSE, euclidean = FALSE, differences_only = FALSE, + threads = 0) } \arguments{ \item{x}{a \code{\link{genlight}}, \code{\link{genind}}, @@ -31,6 +32,9 @@ loci and x is the number of missing sites. This option matches the behavior of base R's \code{\link[package=base]{dist}} function. Defaults to \code{FALSE}.} +\item{euclidean}{\code{logical}. if \code{TRUE}, the Euclidean distance will +be calculated.} + \item{differences_only}{\code{logical}. When \code{differences_only = TRUE}, the output will reflect the number of different loci. The default setting, \code{differences_only = FALSE}, reflects the number of different alleles. diff --git a/src/bitwise_distance.c b/src/bitwise_distance.c index b9f9570e..0e72545f 100644 --- a/src/bitwise_distance.c +++ b/src/bitwise_distance.c @@ -98,7 +98,7 @@ struct locus SEXP bitwise_distance_haploid(SEXP genlight, SEXP missing, SEXP requested_threads); SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP euclid, SEXP differences_only, SEXP requested_threads); SEXP association_index_haploid(SEXP genlight, SEXP missing, SEXP requested_threads); -SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_only, SEXP requested_threads, SEXP euclid); +SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_only, SEXP requested_threads); SEXP get_pgen_matrix_genind(SEXP genind, SEXP freqs, SEXP pops, SEXP npop); // SEXP get_pgen_matrix_genlight(SEXP genlight, SEXP window); // void fill_Pgen(double *pgen, struct locus *loci, int interval, SEXP genlight); @@ -223,7 +223,7 @@ SEXP bitwise_distance_haploid(SEXP genlight, SEXP missing, SEXP requested_thread nap1_length = 0; nap2_length = 0; chr_length = 0; - missing_match = asLogical(missing); + PROTECT(missing_match = asLogical(missing)); // Loop through every genotype for(i = 0; i < num_gens; i++) @@ -429,7 +429,7 @@ SEXP bitwise_distance_haploid(SEXP genlight, SEXP missing, SEXP requested_thread R_Free(distance_matrix[i]); } R_Free(distance_matrix); - UNPROTECT(4); + UNPROTECT(5); return R_out; } @@ -551,9 +551,9 @@ SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP euclid, SEXP dif nap1_length = 0; nap2_length = 0; chr_length = 0; - missing_match = asLogical(missing); - only_differences = asLogical(differences_only); - is_euclid = asLogical(euclid); + PROTECT(missing_match = asLogical(missing)); + PROTECT(only_differences = asLogical(differences_only)); + PROTECT(is_euclid = asLogical(euclid)); // Loop through every genotype for(i = 0; i < num_gens - 1; i++) @@ -748,7 +748,7 @@ SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP euclid, SEXP dif R_Free(distance_matrix[i]); } R_Free(distance_matrix); - UNPROTECT(4); + UNPROTECT(7); return R_out; } @@ -925,7 +925,7 @@ SEXP association_index_haploid(SEXP genlight, SEXP missing, SEXP requested_threa nap1_length = 0; nap2_length = 0; chr_length = 0; - missing_match = asLogical(missing); + PROTECT(missing_match = asLogical(missing)); // Loop through all SNP chunks #ifdef _OPENMP @@ -1118,7 +1118,7 @@ SEXP association_index_haploid(SEXP genlight, SEXP missing, SEXP requested_threa R_Free(vars); R_Free(M); R_Free(M2); - UNPROTECT(6); + UNPROTECT(7); return R_out; } @@ -1133,7 +1133,7 @@ Input: A genlight object containing samples of diploids. A kludge to allow bitwise_distance_diploid to work Output: The index of association for this genlight object over the specified loci ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ -SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_only, SEXP requested_threads, SEXP euclid) +SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_only, SEXP requested_threads) { // This function calculates the index of association for samples in // a genlight object. The general flow of this function is as follows: @@ -1185,6 +1185,7 @@ SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_onl SEXP R_nap2; SEXP R_dists; SEXP R_nloc; + SEXP euclid; int nap1_length; int nap2_length; int chr_length; @@ -1306,13 +1307,13 @@ SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_onl nap1_length = 0; nap2_length = 0; chr_length = 0; - missing_match = asLogical(missing); - only_differences = asLogical(differences_only); + PROTECT(missing_match = asLogical(missing)); + PROTECT(only_differences = asLogical(differences_only)); // Get the distance matrix from bitwise_distance PROTECT(R_dists = allocVector(INTSXP, num_gens*num_gens)); - // PROTECT(euclid = 0); + PROTECT(euclid = ScalarLogical(0)); R_dists = bitwise_distance_diploid(genlight, missing, euclid, differences_only, requested_threads); // Loop through all SNP chunks @@ -1523,7 +1524,7 @@ SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_onl R_Free(vars); R_Free(M); R_Free(M2); - UNPROTECT(7); + UNPROTECT(9); return R_out; } diff --git a/src/init.c b/src/init.c index d4db0072..4fb3ba0b 100644 --- a/src/init.c +++ b/src/init.c @@ -8,7 +8,7 @@ */ /* .Call calls */ -extern SEXP association_index_diploid(SEXP, SEXP, SEXP, SEXP, SEXP); +extern SEXP association_index_diploid(SEXP, SEXP, SEXP, SEXP); extern SEXP association_index_haploid(SEXP, SEXP, SEXP); extern SEXP bitwise_distance_diploid(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP bitwise_distance_haploid(SEXP, SEXP, SEXP); @@ -26,7 +26,7 @@ extern SEXP permute_shuff(SEXP, SEXP, SEXP); extern SEXP permuto(SEXP); static const R_CallMethodDef CallEntries[] = { - {"association_index_diploid", (DL_FUNC) &association_index_diploid, 5}, + {"association_index_diploid", (DL_FUNC) &association_index_diploid, 4}, {"association_index_haploid", (DL_FUNC) &association_index_haploid, 3}, {"bitwise_distance_diploid", (DL_FUNC) &bitwise_distance_diploid, 5}, {"bitwise_distance_haploid", (DL_FUNC) &bitwise_distance_haploid, 3}, From 1ccb327d0331d2ef6fa70daa60221825ccdd313b Mon Sep 17 00:00:00 2001 From: Zhian Kamvar Date: Thu, 22 Mar 2018 17:19:42 +0000 Subject: [PATCH 04/19] Fix euclid segfault issue. --- src/bitwise_distance.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/bitwise_distance.c b/src/bitwise_distance.c index 0e72545f..9e54b39a 100644 --- a/src/bitwise_distance.c +++ b/src/bitwise_distance.c @@ -489,7 +489,7 @@ SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP euclid, SEXP dif int next_missing_i; int next_missing_j; int missing_match; - int is_euclid; + // int is_euclid; int only_differences; int num_threads; char mask; @@ -551,9 +551,8 @@ SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP euclid, SEXP dif nap1_length = 0; nap2_length = 0; chr_length = 0; - PROTECT(missing_match = asLogical(missing)); - PROTECT(only_differences = asLogical(differences_only)); - PROTECT(is_euclid = asLogical(euclid)); + missing_match = PROTECT(asLogical(missing)); + only_differences = PROTECT(asLogical(differences_only)); // Loop through every genotype for(i = 0; i < num_gens - 1; i++) @@ -721,7 +720,7 @@ SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP euclid, SEXP dif { // If we want the actual distance, we need a more complicated analysis of // the sets. See get_distance_custom for details. - cur_distance += get_distance_custom(tmp_sim_set, &set_1, &set_2, is_euclid); + cur_distance += get_distance_custom(tmp_sim_set, &set_1, &set_2, (int)INTEGER(euclid)[0]); // Rprintf("-> Current Distance: %d\n", cur_distance); } } @@ -748,7 +747,7 @@ SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP euclid, SEXP dif R_Free(distance_matrix[i]); } R_Free(distance_matrix); - UNPROTECT(7); + UNPROTECT(6); return R_out; } @@ -1047,8 +1046,7 @@ SEXP association_index_haploid(SEXP genlight, SEXP missing, SEXP requested_threa } // Get the distance matrix from bitwise_distance - PROTECT(R_dists = allocVector(INTSXP, num_gens*num_gens)); - R_dists = bitwise_distance_haploid(genlight, missing, requested_threads); + R_dists = PROTECT(bitwise_distance_haploid(genlight, missing, requested_threads)); // Calculate the sum and squared sum of distances between samples D = 0; @@ -1307,14 +1305,13 @@ SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_onl nap1_length = 0; nap2_length = 0; chr_length = 0; - PROTECT(missing_match = asLogical(missing)); - PROTECT(only_differences = asLogical(differences_only)); + missing_match = PROTECT(asLogical(missing)); + only_differences = PROTECT(asLogical(differences_only)); // Get the distance matrix from bitwise_distance - PROTECT(R_dists = allocVector(INTSXP, num_gens*num_gens)); - PROTECT(euclid = ScalarLogical(0)); - R_dists = bitwise_distance_diploid(genlight, missing, euclid, differences_only, requested_threads); + euclid = PROTECT(ScalarInteger(0)); + R_dists = PROTECT(bitwise_distance_diploid(genlight, missing, euclid, differences_only, requested_threads)); // Loop through all SNP chunks #ifdef _OPENMP From 2aab36daea54052231775d9f761387f9ee62a0a2 Mon Sep 17 00:00:00 2001 From: Zhian Kamvar Date: Thu, 22 Mar 2018 17:20:19 +0000 Subject: [PATCH 05/19] Standardize PROTECTS This no longer works :( --- src/bitwise_distance.c | 76 +++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/src/bitwise_distance.c b/src/bitwise_distance.c index 9e54b39a..046257a4 100644 --- a/src/bitwise_distance.c +++ b/src/bitwise_distance.c @@ -172,10 +172,10 @@ SEXP bitwise_distance_haploid(SEXP genlight, SEXP missing, SEXP requested_thread // genlight object. ie, R_gen_symbol is being set up as an equivalent to the // @gen accessor for genlights. - PROTECT(R_gen_symbol = install("gen")); // Used for accessing the named + R_gen_symbol = PROTECT(install("gen")); // Used for accessing the named // elements of the genlight object - PROTECT(R_chr_symbol = install("snp")); - PROTECT(R_nap_symbol = install("NA.posi")); + R_chr_symbol = PROTECT(install("snp")); + R_nap_symbol = PROTECT(install("NA.posi")); // This will be a LIST of type LIST:RAW // Set R_gen to genlight@gen, a vector of genotypes in the genlight object @@ -188,7 +188,7 @@ SEXP bitwise_distance_haploid(SEXP genlight, SEXP missing, SEXP requested_thread // Set up and initialize the matrix for storing total distance between each // pair of genotypes - PROTECT(R_out = allocVector(INTSXP, num_gens*num_gens)); + R_out = PROTECT(allocVector(INTSXP, num_gens*num_gens)); distance_matrix = R_Calloc(num_gens,int*); for(i = 0; i < num_gens; i++) { @@ -223,7 +223,7 @@ SEXP bitwise_distance_haploid(SEXP genlight, SEXP missing, SEXP requested_thread nap1_length = 0; nap2_length = 0; chr_length = 0; - PROTECT(missing_match = asLogical(missing)); + missing_match = PROTECT(asLogical(missing)); // Loop through every genotype for(i = 0; i < num_gens; i++) @@ -505,9 +505,9 @@ SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP euclid, SEXP dif // These variables and function calls are used to access elements of the genlight object. // ie, R_gen_symbol is being set up as an equivalent to the @gen accessor for genlights. - PROTECT(R_gen_symbol = install("gen")); // Used for accessing the named elements of the genlight object - PROTECT(R_chr_symbol = install("snp")); - PROTECT(R_nap_symbol = install("NA.posi")); + R_gen_symbol = PROTECT(install("gen")); // Used for accessing the named elements of the genlight object + R_chr_symbol = PROTECT(install("snp")); + R_nap_symbol = PROTECT(install("NA.posi")); // This will be a LIST of type LIST:RAW // Set R_gen to genlight@gen, a vector of genotypes in the genlight object stored as SNPbin objects. @@ -516,7 +516,7 @@ SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP euclid, SEXP dif num_gens = XLENGTH(R_gen); // Set up and initialize the matrix for storing total distance between each pair of genotypes - PROTECT(R_out = allocVector(INTSXP, num_gens*num_gens)); + R_out = PROTECT(allocVector(INTSXP, num_gens*num_gens)); distance_matrix = R_Calloc(num_gens,int*); for(i = 0; i < num_gens; i++) { @@ -848,10 +848,10 @@ SEXP association_index_haploid(SEXP genlight, SEXP missing, SEXP requested_threa // These variables and function calls are used to access elements of the genlight object. // ie, R_gen_symbol is being set up as an equivalent to the @gen accessor for genlights. - PROTECT(R_gen_symbol = install("gen")); - PROTECT(R_chr_symbol = install("snp")); - PROTECT(R_nap_symbol = install("NA.posi")); - PROTECT(R_nloc_symbol = install("n.loc")); + R_gen_symbol = PROTECT(install("gen")); + R_chr_symbol = PROTECT(install("snp")); + R_nap_symbol = PROTECT(install("NA.posi")); + R_nloc_symbol = PROTECT(install("n.loc")); // This will be a LIST of type LIST:RAW // Set R_gen to genlight@gen, a vector of genotypes in the genlight object stored as SNPbin objects. @@ -871,7 +871,7 @@ SEXP association_index_haploid(SEXP genlight, SEXP missing, SEXP requested_threa num_loci = INTEGER(R_nloc)[0]; // Prepare and allocate the output matrix - PROTECT(R_out = allocVector(REALSXP, 1)); + R_out = PROTECT(allocVector(REALSXP, 1)); // Prepare and allocate a matrix to store the SNPbin data from all samples // so that we don't need to fetch them over and over. chunk_matrix = R_Calloc(num_gens,char*); @@ -924,7 +924,7 @@ SEXP association_index_haploid(SEXP genlight, SEXP missing, SEXP requested_threa nap1_length = 0; nap2_length = 0; chr_length = 0; - PROTECT(missing_match = asLogical(missing)); + missing_match = PROTECT(asLogical(missing)); // Loop through all SNP chunks #ifdef _OPENMP @@ -1224,10 +1224,10 @@ SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_onl // These variables and function calls are used to access elements of the genlight object. // ie, R_gen_symbol is being set up as an equivalent to the @gen accessor for genlights. - PROTECT(R_gen_symbol = install("gen")); // Used for accessing the named elements of the genlight object - PROTECT(R_chr_symbol = install("snp")); - PROTECT(R_nap_symbol = install("NA.posi")); - PROTECT(R_nloc_symbol = install("n.loc")); + R_gen_symbol = PROTECT(install("gen")); // Used for accessing the named elements of the genlight object + R_chr_symbol = PROTECT(install("snp")); + R_nap_symbol = PROTECT(install("NA.posi")); + R_nloc_symbol = PROTECT(install("n.loc")); // This will be a LIST of type LIST:RAW // Set R_gen to genlight@gen, a vector of genotypes in the genlight object stored as SNPbin objects. @@ -1247,7 +1247,7 @@ SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_onl num_loci = INTEGER(R_nloc)[0]; // Prepare and allocate the output matrix - PROTECT(R_out = allocVector(REALSXP, 1)); + R_out = PROTECT(allocVector(REALSXP, 1)); // Prepare and allocate a matrix to store the SNPbin data from all samples // so that we don't need to fetch them over and over. chunk_matrix = R_Calloc(num_gens*2,char*); @@ -1549,9 +1549,9 @@ SEXP get_pgen_matrix_genind(SEXP genind, SEXP freqs, SEXP pops, SEXP npop) // *ploidy // Syntax for accessing the named elements of the genind object - PROTECT(R_tab_symbol = install("tab")); - PROTECT(R_loc_symbol = install("loc.n.all")); - PROTECT(R_ploidy_symbol = install("ploidy")); + R_tab_symbol = PROTECT(install("tab")); + R_loc_symbol = PROTECT(install("loc.n.all")); + R_ploidy_symbol = PROTECT(install("ploidy")); // Data in and data out ------------------------------ int* ploidy; // Ploidy per sample @@ -1585,7 +1585,7 @@ SEXP get_pgen_matrix_genind(SEXP genind, SEXP freqs, SEXP pops, SEXP npop) num_loci = XLENGTH(R_nall); alleles_per_locus = INTEGER(R_nall); num_pops = INTEGER(npop)[0]; - PROTECT(R_out = allocMatrix(REALSXP, num_gens, num_loci)); + R_out = PROTECT(allocMatrix(REALSXP, num_gens, num_loci)); pgens = REAL(R_out); // Tue Sep 1 11:19:55 2015 ------------------------------ @@ -1671,9 +1671,9 @@ SEXP get_pgen_matrix_genlight(SEXP genlight, SEXP window) SEXP R_gen_symbol; SEXP R_loc_symbol; SEXP R_pop_symbol; - PROTECT(R_gen_symbol = install("gen")); // Used for accessing the named elements of the genlight object - PROTECT(R_loc_symbol = install("n.loc")); - PROTECT(R_pop_symbol = install("pop")); + R_gen_symbol = PROTECT(install("gen")); // Used for accessing the named elements of the genlight object + R_loc_symbol = PROTECT(install("n.loc")); + R_pop_symbol = PROTECT(install("pop")); struct locus* loci; double *pgens; int num_gens; @@ -1690,7 +1690,7 @@ SEXP get_pgen_matrix_genlight(SEXP genlight, SEXP window) num_sets = ceil((double)num_loci/(double)interval); // Number of sets of loci for which pgen values should be computed size = num_gens*num_sets; pgens = R_Calloc(size, double); - PROTECT(R_out = allocVector(REALSXP,size)); + R_out = PROTECT(allocVector(REALSXP,size)); // Find the number of populations by taking the max over all genotypes num_pops = 0; @@ -1763,11 +1763,11 @@ void fill_Pgen(double *pgen, struct locus *loci, int interval, SEXP genlight) int next_missing_index; int next_missing; - PROTECT(R_gen_symbol = install("gen")); // Used for accessing the named elements of the genlight object - PROTECT(R_loc_symbol = install("n.loc")); - PROTECT(R_chr_symbol = install("snp")); - PROTECT(R_nap_symbol = install("NA.posi")); - PROTECT(R_pop_symbol = install("pop")); + R_gen_symbol = PROTECT(install("gen")); // Used for accessing the named elements of the genlight object + R_loc_symbol = PROTECT(install("n.loc")); + R_chr_symbol = PROTECT(install("snp")); + R_nap_symbol = PROTECT(install("NA.posi")); + R_pop_symbol = PROTECT(install("pop")); R_gen = getAttrib(genlight, R_gen_symbol); num_gens = XLENGTH(R_gen); @@ -1925,11 +1925,11 @@ void fill_loci(struct locus *loc, SEXP genlight) int byte; int pop; - PROTECT(R_gen_symbol = install("gen")); // Used for accessing the named elements of the genlight object - PROTECT(R_loc_symbol = install("n.loc")); - PROTECT(R_chr_symbol = install("snp")); - PROTECT(R_nap_symbol = install("NA.posi")); - PROTECT(R_pop_symbol = install("pop")); + R_gen_symbol = PROTECT(install("gen")); // Used for accessing the named elements of the genlight object + R_loc_symbol = PROTECT(install("n.loc")); + R_chr_symbol = PROTECT(install("snp")); + R_nap_symbol = PROTECT(install("NA.posi")); + R_pop_symbol = PROTECT(install("pop")); // This will be a LIST of type LIST:RAW R_gen = getAttrib(genlight, R_gen_symbol); From 5fe11b07ea39ddb99e617f62b7f6b3ac2868d18d Mon Sep 17 00:00:00 2001 From: Zhian Kamvar Date: Sun, 25 Mar 2018 11:05:44 -0500 Subject: [PATCH 06/19] change in protection --- src/bitwise_distance.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/bitwise_distance.c b/src/bitwise_distance.c index 046257a4..d743cef5 100644 --- a/src/bitwise_distance.c +++ b/src/bitwise_distance.c @@ -1305,12 +1305,12 @@ SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_onl nap1_length = 0; nap2_length = 0; chr_length = 0; - missing_match = PROTECT(asLogical(missing)); - only_differences = PROTECT(asLogical(differences_only)); + missing_match = asLogical(missing); + only_differences = asLogical(differences_only); // Get the distance matrix from bitwise_distance - euclid = PROTECT(ScalarInteger(0)); + euclid = PROTECT(ScalarLogical(0)); R_dists = PROTECT(bitwise_distance_diploid(genlight, missing, euclid, differences_only, requested_threads)); // Loop through all SNP chunks @@ -1521,7 +1521,7 @@ SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_onl R_Free(vars); R_Free(M); R_Free(M2); - UNPROTECT(9); + UNPROTECT(7); return R_out; } From 9996a80b747b8a4b47b2b5cd26bf3f9287d2e91d Mon Sep 17 00:00:00 2001 From: Zhian Kamvar Date: Sun, 25 Mar 2018 12:35:47 -0500 Subject: [PATCH 07/19] turn off WAE --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 9336b706..770542e5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,6 +25,8 @@ notifications: on_success: change on_failure: always +warings_are_errors: false + env: global: - NOT_CRAN: true From cbfc34b10ae6c14af33824d7417d3c7179c71afb Mon Sep 17 00:00:00 2001 From: Zhian Kamvar Date: Sun, 25 Mar 2018 12:58:55 -0500 Subject: [PATCH 08/19] :fire: spelling :fire: --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 770542e5..153accfa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,7 @@ r: sudo: false cran: https://cran.r-project.org +warnings_are_errors: false cache: packages @@ -25,7 +26,6 @@ notifications: on_success: change on_failure: always -warings_are_errors: false env: global: From 7cb469a6b73eac90e1605ca2078e4d22c4b84f46 Mon Sep 17 00:00:00 2001 From: Zhian Kamvar Date: Sun, 25 Mar 2018 13:09:36 -0500 Subject: [PATCH 09/19] fix warnings --- src/bitwise_distance.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/bitwise_distance.c b/src/bitwise_distance.c index d743cef5..6cf2a278 100644 --- a/src/bitwise_distance.c +++ b/src/bitwise_distance.c @@ -223,7 +223,7 @@ SEXP bitwise_distance_haploid(SEXP genlight, SEXP missing, SEXP requested_thread nap1_length = 0; nap2_length = 0; chr_length = 0; - missing_match = PROTECT(asLogical(missing)); + missing_match = asLogical(missing); // Loop through every genotype for(i = 0; i < num_gens; i++) @@ -429,7 +429,7 @@ SEXP bitwise_distance_haploid(SEXP genlight, SEXP missing, SEXP requested_thread R_Free(distance_matrix[i]); } R_Free(distance_matrix); - UNPROTECT(5); + UNPROTECT(4); return R_out; } @@ -551,8 +551,8 @@ SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP euclid, SEXP dif nap1_length = 0; nap2_length = 0; chr_length = 0; - missing_match = PROTECT(asLogical(missing)); - only_differences = PROTECT(asLogical(differences_only)); + missing_match = asLogical(missing); + only_differences = asLogical(differences_only); // Loop through every genotype for(i = 0; i < num_gens - 1; i++) @@ -747,7 +747,7 @@ SEXP bitwise_distance_diploid(SEXP genlight, SEXP missing, SEXP euclid, SEXP dif R_Free(distance_matrix[i]); } R_Free(distance_matrix); - UNPROTECT(6); + UNPROTECT(4); return R_out; } @@ -924,7 +924,7 @@ SEXP association_index_haploid(SEXP genlight, SEXP missing, SEXP requested_threa nap1_length = 0; nap2_length = 0; chr_length = 0; - missing_match = PROTECT(asLogical(missing)); + missing_match = asLogical(missing); // Loop through all SNP chunks #ifdef _OPENMP @@ -1116,7 +1116,7 @@ SEXP association_index_haploid(SEXP genlight, SEXP missing, SEXP requested_threa R_Free(vars); R_Free(M); R_Free(M2); - UNPROTECT(7); + UNPROTECT(6); return R_out; } From f6c1d5e4cc5d8288ee35de6ac0af2130d84ba9de Mon Sep 17 00:00:00 2001 From: Zhian Kamvar Date: Tue, 27 Mar 2018 12:41:46 -0500 Subject: [PATCH 10/19] add kludge to force bitwise.ia to work Something about the shared parallelization was screwing up the code. Unfortunately, I can't for the life of me figure out why since the same process happens with haploids. --- src/bitwise_distance.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/bitwise_distance.c b/src/bitwise_distance.c index 6cf2a278..f34bf4c4 100644 --- a/src/bitwise_distance.c +++ b/src/bitwise_distance.c @@ -1308,10 +1308,11 @@ SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_onl missing_match = asLogical(missing); only_differences = asLogical(differences_only); - // Get the distance matrix from bitwise_distance euclid = PROTECT(ScalarLogical(0)); - R_dists = PROTECT(bitwise_distance_diploid(genlight, missing, euclid, differences_only, requested_threads)); + SEXP one_thread = PROTECT(ScalarInteger(1)); + R_dists = PROTECT(bitwise_distance_diploid(genlight, missing, euclid, differences_only, one_thread)); + // Loop through all SNP chunks #ifdef _OPENMP @@ -1521,7 +1522,7 @@ SEXP association_index_diploid(SEXP genlight, SEXP missing, SEXP differences_onl R_Free(vars); R_Free(M); R_Free(M2); - UNPROTECT(7); + UNPROTECT(8); return R_out; } From dc37eb7f531c421f0f0de530d9215e3414e6d114 Mon Sep 17 00:00:00 2001 From: Zhian Kamvar Date: Tue, 27 Mar 2018 12:49:22 -0500 Subject: [PATCH 11/19] add test for euclidean --- tests/testthat/test-bitwise.R | 18 ++++++++++++------ tests/testthat/test-values.R | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test-bitwise.R b/tests/testthat/test-bitwise.R index f00a6f1d..ef33a58e 100644 --- a/tests/testthat/test-bitwise.R +++ b/tests/testthat/test-bitwise.R @@ -1,23 +1,29 @@ -context("Bitwise distance and pgen calculations") +context("Bitwise distance calculations") set.seed(991) dat <- sample(c(0, 1, NA), 50, replace = TRUE, prob = c(0.49, 0.5, 0.01)) mat <- matrix(dat, nrow = 5, ncol = 10) +mat2 <- mat +mat2[-1, ] <- mat2[-1, ] * 2 test_that("bitwise.dist works for haploids", { mat.gl <- new("genlight", mat, parallel = FALSE) expected <- rowSums(apply(mat, 2, dist), na.rm = TRUE)/10 - expect_equivalent(as.vector(bitwise.dist(mat.gl)), expected) + expect_equivalent(as.vector(bitwise.dist(mat.gl, threads = 1L)), expected) expect_true(all(ploidy(mat.gl) == 1)) }) test_that("bitwise.dist works for diploids with only one type of homozygote", { - mat2 <- mat - mat2[-1, ] <- mat2[-1, ] * 2 mat2.gl <- new("genlight", mat2, parallel = FALSE) - expect_error(bitwise.dist(mat2.gl), "ploidy") + expect_error(bitwise.dist(mat2.gl, threads = 1L), "ploidy") ploidy(mat2.gl) <- rep(2, 5) expected <- rowSums(apply(mat2, 2, dist), na.rm = TRUE)/20 - expect_equivalent(as.vector(bitwise.dist(mat2.gl)), expected) + expect_equivalent(as.vector(bitwise.dist(mat2.gl, threads = 1L)), expected) +}) + +test_that("bitwise.dist can do euclidean", { + mat2.gl <- new("genlight", mat2, parallel = FALSE) + ploidy(mat2.gl) <- rep(2, 5) + expect_equivalent(bitwise.dist(mat2.gl, scale_missing = TRUE, euclid = TRUE, threads = 1L), dist(mat2.gl)) }) diff --git a/tests/testthat/test-values.R b/tests/testthat/test-values.R index acb98b15..5a0b84c0 100644 --- a/tests/testthat/test-values.R +++ b/tests/testthat/test-values.R @@ -394,4 +394,4 @@ test_that("samp.ia works",{ test_that("poppr_has_parallel returns something logical", { expect_is(poppr_has_parallel(), "logical") -}) \ No newline at end of file +}) From daac5f88f62900119539d45c6ee264dcedb8aaf8 Mon Sep 17 00:00:00 2001 From: Zhian Kamvar Date: Tue, 27 Mar 2018 13:11:54 -0500 Subject: [PATCH 12/19] fix R check complaint --- DESCRIPTION | 4 ++-- R/bitwise.r | 2 +- README.md | 6 +++--- man/bitwise.dist.Rd | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5fe111ef..48067807 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: poppr Type: Package Title: Genetic Analysis of Populations with Mixed Reproduction -Version: 2.7.1 -Date: 2018-03-16 +Version: 2.7.1.99-14 +Date: 2018-03-27 Authors@R: c(person(c("Zhian", "N."), "Kamvar", role = c("cre", "aut"), email = "zkamvar@gmail.com", comment = c(ORCID = "0000-0003-1458-7108")), person(c("Javier", "F."), "Tabima", role = "aut", diff --git a/R/bitwise.r b/R/bitwise.r index eb45f0fa..f07198c3 100644 --- a/R/bitwise.r +++ b/R/bitwise.r @@ -68,7 +68,7 @@ #' data is scaled up proportionally to the number of columns used by #' multiplying the value by \code{m / (m - x)} where m is the number of #' loci and x is the number of missing sites. This option matches the behavior -#' of base R's \code{\link[package=base]{dist}} function. +#' of base R's \code{\link{dist}} function. #' Defaults to \code{FALSE}. #' #' @param euclidean \code{logical}. if \code{TRUE}, the Euclidean distance will diff --git a/README.md b/README.md index 11e6beac..386fe522 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Poppr version 2 -[![Build Status](https://travis-ci.org/grunwaldlab/poppr.svg?branch=update-amova)](https://travis-ci.org/grunwaldlab/poppr) -[![AppVeyor build status](https://ci.appveyor.com/api/projects/status/github/grunwaldlab/poppr?branch=master&svg=true)](https://ci.appveyor.com/project/grunwaldlab/poppr) -[![Coverage Status](https://coveralls.io/repos/grunwaldlab/poppr/badge.svg?branch=update-amova)](https://coveralls.io/r/grunwaldlab/poppr?branch=update-amova) +[![Build Status](https://travis-ci.org/grunwaldlab/poppr.svg?branch=euclidean-bitwise)](https://travis-ci.org/grunwaldlab/poppr) +[![AppVeyor build status](https://ci.appveyor.com/api/projects/status/github/grunwaldlab/poppr?branch=euclidean-bitwise)](https://ci.appveyor.com/project/grunwaldlab/poppr) +[![Coverage Status](https://coveralls.io/repos/grunwaldlab/poppr/badge.svg?branch=euclidean-bitwise)](https://coveralls.io/r/grunwaldlab/poppr?branch=euclidean-bitwise) [![CRAN version](http://www.r-pkg.org/badges/version/poppr)](https://cran.r-project.org/package=poppr)