Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add euclidean option and missing correction to bitwise.dist #176

Merged
merged 20 commits into from
Apr 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
^\.travis\.yml$
^vignettes/figure
^tests/testthat/Rplots.pdf
^CONDUCT\.md$
^CODE_OF_CONDUCT\.md$
^NEWS\.md$
^cran-comments\.md$
^docs$
Expand Down
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ r:

sudo: false
cran: https://cran.r-project.org
warnings_are_errors: true

cache: packages

Expand All @@ -25,6 +26,7 @@ notifications:
on_success: change
on_failure: always


env:
global:
- NOT_CRAN: true
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: poppr
Type: Package
Title: Genetic Analysis of Populations with Mixed Reproduction
Version: 2.7.1
Date: 2018-03-16
Version: 2.7.1.99-14
Date: 2018-03-27
Authors@R: c(person(c("Zhian", "N."), "Kamvar", role = c("cre", "aut"),
email = "zkamvar@gmail.com", comment = c(ORCID = "0000-0003-1458-7108")),
person(c("Javier", "F."), "Tabima", role = "aut",
Expand Down
55 changes: 47 additions & 8 deletions R/bitwise.r
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,16 @@
#' with anything. \code{FALSE} forces missing data to not match with any other
#' information, \strong{including other missing data}.
#'
#' @param scale_missing A logical. If \code{TRUE}, comparisons with missing
#' data is scaled up proportionally to the number of columns used by
#' multiplying the value by \code{m / (m - x)} where m is the number of
#' loci and x is the number of missing sites. This option matches the behavior
#' of base R's \code{\link{dist}} function.
#' Defaults to \code{FALSE}.
#'
#' @param euclidean \code{logical}. if \code{TRUE}, the Euclidean distance will
#' be calculated.
#'
#' @param differences_only \code{logical}. When \code{differences_only = TRUE},
#' the output will reflect the number of different loci. The default setting,
#' \code{differences_only = FALSE}, reflects the number of different alleles.
Expand Down Expand Up @@ -107,6 +117,7 @@
#' xdt
#==============================================================================#
bitwise.dist <- function(x, percent = TRUE, mat = FALSE, missing_match = TRUE,
scale_missing = FALSE, euclidean = FALSE,
differences_only = FALSE, threads = 0){
stopifnot(inherits(x, c("genlight", "genclone", "genind", "snpclone")))
# Stop if the ploidy of the genlight object is not consistent
Expand Down Expand Up @@ -153,14 +164,21 @@ bitwise.dist <- function(x, percent = TRUE, mat = FALSE, missing_match = TRUE,
}
else
{
pairwise_dist <- .Call("bitwise_distance_diploid", x, missing_match, differences_only, threads)
pairwise_dist <- .Call("bitwise_distance_diploid", x, missing_match, euclidean, differences_only, threads)
}
dist.mat <- pairwise_dist
dim(dist.mat) <- c(inds,inds)
colnames(dist.mat) <- ind.names
rownames(dist.mat) <- ind.names
if (percent){
if(differences_only)
nas <- NA.posi(x)
if (scale_missing && sum(lengths(nas)) > 0) {
adj <- missing_correction(nas, nLoc(x))
dist.mat <- dist.mat * adj
}
if (euclidean) {
dist.mat <- sqrt(dist.mat)
} else if (percent) {
if (differences_only)
{
dist.mat <- dist.mat/(numPairs)
}
Expand All @@ -169,7 +187,7 @@ bitwise.dist <- function(x, percent = TRUE, mat = FALSE, missing_match = TRUE,
dist.mat <- dist.mat/(numPairs*ploid)
}
}
if (mat == FALSE){
if (mat == FALSE) {
dist.mat <- as.dist(dist.mat)
}
return(dist.mat)
Expand All @@ -189,15 +207,32 @@ poppr_has_parallel <- function(){

supported <- .Call("omp_test", PACKAGE = "poppr")

if(supported == 0) {
if (supported == 0) {
return(FALSE)
} else {
return(TRUE)
}

}


#' Calculate correction for genetic distances
#'
#' @param nas a list of missing positions per sample
#' @param nloc the number of loci
#' @param mat a logical specifying whether or not a matrix should be returned
#' (default: TRUE)
#'
#' @return an n x n matrix or a choose(n, 2) length vector of values that scale
#' from 1 to the number of loci.
#' @noRd
missing_correction <- function(nas, nloc, mat = TRUE){
res <- .Call("adjust_missing", nas, nloc, PACKAGE = "poppr")
if (mat) {
return(res)
} else {
return(res[lower.tri(res)])
}
}

#==============================================================================#
#' Calculate the index of association between samples in a genlight object.
Expand Down Expand Up @@ -255,8 +290,12 @@ bitwise.ia <- function(x, missing_match=TRUE, differences_only=FALSE, threads=0)
# Ensure that every SNPbin object has data for all chromosomes
if (ploid == 2){
x <- fix_uneven_diploid(x)
IA <- .Call("association_index_diploid", x, missing_match, differences_only,
threads, PACKAGE = "poppr")
IA <- .Call("association_index_diploid",
genlight = x,
missing_match = missing_match,
differences_only = differences_only,
requested_threads = threads,
PACKAGE = "poppr")
}
else if(ploid == 1)
{
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Poppr version 2 <img src="vignettes/small_logo.png" align="right"/>

[![Build Status](https://travis-ci.org/grunwaldlab/poppr.svg?branch=update-amova)](https://travis-ci.org/grunwaldlab/poppr)
[![AppVeyor build status](https://ci.appveyor.com/api/projects/status/github/grunwaldlab/poppr?branch=master&svg=true)](https://ci.appveyor.com/project/grunwaldlab/poppr)
[![Coverage Status](https://coveralls.io/repos/grunwaldlab/poppr/badge.svg?branch=update-amova)](https://coveralls.io/r/grunwaldlab/poppr?branch=update-amova)
[![Build Status](https://travis-ci.org/grunwaldlab/poppr.svg?branch=euclidean-bitwise)](https://travis-ci.org/grunwaldlab/poppr)
[![AppVeyor build status](https://ci.appveyor.com/api/projects/status/github/grunwaldlab/poppr?branch=euclidean-bitwise)](https://ci.appveyor.com/project/grunwaldlab/poppr)
[![Coverage Status](https://coveralls.io/repos/grunwaldlab/poppr/badge.svg?branch=euclidean-bitwise)](https://coveralls.io/r/grunwaldlab/poppr?branch=euclidean-bitwise)
[![CRAN version](http://www.r-pkg.org/badges/version/poppr)](https://cran.r-project.org/package=poppr)
<!--
[![Downloads from Rstudio mirror per month](https://cranlogs.r-pkg.org/badges/poppr)](http://www.r-pkg.org/pkg/poppr)
Expand Down
13 changes: 12 additions & 1 deletion man/bitwise.dist.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

96 changes: 96 additions & 0 deletions src/adjust_missing.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#include <Rinternals.h>
#include <Rdefines.h>
#include <R.h>

/*
#' Calculate correction for genetic distances
#'
#' @param nas a list of missing positions per sample
#' @param nloc the number of loci
#' @param mat a logical specifying whether or not a matrix should be returned
#' (default: TRUE)
#'
#' @return an n x n matrix or a choose(n, 2) length vector of values that scale
#' from 1 to the number of loci.
#' @noRd
missing_correction <- function(nas, nloc, mat = TRUE){
n <- length(nas)
correction <- matrix(0, nrow = n, ncol = n)
for (i in seq(n - 1)) {
for (j in seq(from = i + 1, to = n)) {
res <- length(unique(unlist(nas[c(i, j)], use.names = FALSE)))
correction[j, i] <- res -> correction[i, j]
}
}
res <- nloc/(nloc - correction)
if (mat) {
return(res)
} else {
return(res[lower.tri(res)])
}
}

*/

int count_unique(SEXP arr1, SEXP arr2);
SEXP adjust_missing(SEXP nas, SEXP nloc);

int count_unique(SEXP arr1, SEXP arr2)
{
int i = 0;
int j = 0;
int duplicates = 0;
int len1 = length(arr1);
int len2 = length(arr2);
while (i < len1 && j < len2)
{
if (INTEGER(arr1)[i] < INTEGER(arr2)[j])
{
i++;
}
else if (INTEGER(arr1)[i] > INTEGER(arr2)[j])
{
j++;
}
else
{
i++;
j++;
duplicates++;
}
}
return len1 + len2 - duplicates;
}

SEXP adjust_missing(SEXP nas, SEXP nloc)
{
int i;
int j;
int NLOC = asInteger(nloc);
SEXP nai;
SEXP naj;
double u;
int n = length(nas);
SEXP out = PROTECT(allocMatrix(REALSXP, n, n));
for (i = 0; i < n - 1; i++)
{
// set diag to one
REAL(out)[i + i*n] = 1.0;
// GET NA list for i
nai = VECTOR_ELT(nas, i);
for (j = i + 1; j < n; j++)
{
// Get NA list for j
naj = VECTOR_ELT(nas, j);
// Scale by N/(N - M)
u = (double)NLOC/(double)(NLOC - count_unique(nai, naj));

REAL(out)[i + j*n] = u;
REAL(out)[i*n + j] = u;
}
}
// fencepost for identity
REAL(out)[(n*n) - 1] = 1.0;
UNPROTECT(1);
return(out);
}
Loading