Skip to content

Commit

Permalink
Merge branch 'devel'
Browse files Browse the repository at this point in the history
  • Loading branch information
lutsik committed Dec 16, 2016
2 parents 8e63c53 + 499d727 commit cf2bf53
Show file tree
Hide file tree
Showing 29 changed files with 525 additions and 221 deletions.
14 changes: 10 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,30 @@ Maintainer: Pavlo Lutsik <p.lutsik@mx.uni-saarland.de>
Authors@R: c(
person ( "Pavlo", "Lutsik", email="p.lutsik@mx.uni-saarland.de", role = c("cre")),
person ( "Martin", "Slawski", email="ms@cs.uni-saarland.de", role = c("aut")),
person ( "Nik", "Vedeneev", email="nikitavedeneev@gmail.com", role = c("aut")),
person ( "Gilles", "Gasparoni", email="gillesgasparoni@gmail.com", role = c("aut")),
person ( "Matthias", "Hein", email="hein@cs.uni-saarland.de", role = c("aut")),
person ( "Joern", "Walter", email="j.walter@mx.uni-saarland.de", role = c("aut"))
)
Date: 2016-06-30
Version: 0.11
Date: 2016-11-02
Version: 0.2
Depends:
R (>= 3.2.0),
Rcpp,
pracma,
gtools,
gplots,
parallel,
parallel
Suggests:
BiocStyle,
knitr,
rmarkdown
Imports:
Rcpp (>= 0.12),
RcppEigen (>= 0.3)
LinkingTo:
Rcpp,
RcppEigen
VignetteBuilder: knitr
License: GPL-3
LinkingTo: Rcpp
RoxygenNote: 5.0.1
11 changes: 11 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
MeDeCom 0.2
=============

* Added improved optimization routines (cppTAfact by Nik Vedeneev) which are now used by default
* Improved R-side parallelization in the single machine mode
* Multiple minor fixes and improvements

MeDeCom 0.1
=============

* Initial release
10 changes: 5 additions & 5 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# This file was generated by Rcpp::compileAttributes
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

RHLasso <- function(Ginp, Winp, Ainp, l) {
.Call('MeDeCom_RHLasso', PACKAGE = 'MeDeCom', Ginp, Winp, Ainp, l)
cppTAfact <- function(mDtSEXP, mTtinitSEXP, mAinitSEXP, lambda = 0.0, itersMax = 1000L, tol = 1e-8, tolA = 1e-7, tolT = 1e-7) {
.Call('MeDeCom_cppTAfact', PACKAGE = 'MeDeCom', mDtSEXP, mTtinitSEXP, mAinitSEXP, lambda, itersMax, tol, tolA, tolT)
}

matMult <- function(Ainp, Binp) {
.Call('MeDeCom_matMult', PACKAGE = 'MeDeCom', Ainp, Binp)
RHLasso <- function(Ginp, Winp, Ainp, l) {
.Call('MeDeCom_RHLasso', PACKAGE = 'MeDeCom', Ginp, Winp, Ainp, l)
}

RQuadHC <- function(Ginp, Winp, Ainp, otol, lconstr, uconstr) {
Expand Down
15 changes: 11 additions & 4 deletions R/allGlobals.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

#######################################################################################################################
# GLOBALS
#######################################################################################################################
Expand All @@ -7,7 +8,9 @@ ALGORITHMS<-c(
"regression",
"houseman2012",
"houseman2016",
"MeDeCom",
"MeDeCom",
"MeDeCom.quadPen",
"MeDeCom.cppTAfact",
"HLasso",
"IntFac",
"IntEmpirical",
Expand All @@ -19,7 +22,9 @@ T_METHODS<-c(
NA,
NA,
NA,
"quadPen",
"quadPen",
"quadPen",
"cppTAfact",
"Hlasso",
"integer",
"empirical",
Expand All @@ -33,14 +38,16 @@ ALGORITHM.COLS=c(
"regression"="blue",
"houseman2012"="deepskyblue",
"houseman2016"="skyblue",
"MeDeCom"="red",
"MeDeCom"="red",
"MeDeCom.quadPen"="red",
"MeDeCom.cppTAfact"="tomato",
"HLasso"="orange",
"IntFac"="green",
"IntEmpirical"="plum",
"Resample"="magenta",
"VertexSearch"="brown")

ALGORITHM.PCH=c(15,0,0,0,2,3,4,5,6,1)
ALGORITHM.PCH=c(15,0,0,0,2,2,2,3,4,5,6,1)

names(ALGORITHM.PCH)<-ALGORITHMS

Expand Down
133 changes: 122 additions & 11 deletions R/factorizations.R
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ updateT_gini<-function(G, W, Tk, lambdaT, tol, f0, lower=0, upper=1){
return(list(Tk1, fk1, iter))
}


#######################################################################################################################

updateT_empirical<-function(G, W, Poss, lambda){
Expand Down Expand Up @@ -239,7 +240,25 @@ updateT_empirical<-function(G, W, Poss, lambda){
# return(Tnew)
#
#}
#######################################################################################################################

updateT_multicore<-function(method="gini", G, W, Tk, lambdaT, tol, f0, lower=0, upper=1, ncores=2){

if(method=="gini"){
update.func<-updateT_gini
}

row.partition<-lapply(0:(ncores-1), function(chunck) (1:(ncol(W)%/%ncores))+chunck*(ncol(W)%/%ncores))

opt_res<-mclapply(row.partition, function(row.ind) update.func(G, W[,row.ind], Tk[,row.ind], lambdaT, tol, f0, lower=0, upper=1), mc.cores=ncores)
#opt_res<-lapply(row.partition, function(row.ind) update.func(G, W[,row.ind], Tk[,row.ind], lambdaT, tol, f0, lower=0, upper=1))

Tk<-do.call("cbind", lapply(opt_res, el, 1))
fk<-sum(sapply(opt_res, el, 2))
avg_iter<-mean(sapply(opt_res, el, 3))

return(list(Tk,fk,avg_iter))
}

#######################################################################################################################
# Wrappers for the T update methods
Expand Down Expand Up @@ -393,7 +412,8 @@ onerun.alternate<-function(
eps=1e-8,
blocks=NULL,
na.values=FALSE,
verbose=TRUE){
verbose=TRUE,
ncores=1){

k<-ncol(T0)
if(!is.null(Tfix)){
Expand Down Expand Up @@ -586,7 +606,11 @@ onerun.alternate<-function(
G <- A %*% t(A); W <- A %*% Dt4T
##%%%mexHCLasso(G,W,T',lambda);

res <- updateT_gini(G, W, Tstart, lambda, eps, f - norm_val, lower=qp.rangeT[1], upper=qp.rangeT[2]);
if(ncores>1){
res <- updateT_multicore("gini", G, W, Tstart, lambda, eps, f - norm_val, lower=qp.rangeT[1], upper=qp.rangeT[2], ncores=ncores);
}else{
res <- updateT_gini(G, W, Tstart, lambda, eps, f - norm_val, lower=qp.rangeT[1], upper=qp.rangeT[2]);
}
Trecov <- t(res[[1]]); ftemp<-res[[2]]

if(!is.null(Tfix)){
Expand Down Expand Up @@ -750,13 +774,86 @@ onerun.alternate<-function(
}

rmse<-sqrt(sum((D-TT%*%A)^2)/ncol(D)/nrow(D))
result<-list("T" = TT, "A" = A, "Fval" = Fval, "Conv" = Conv, "rmse"=rmse)
result<-list("T" = TT, "A" = A, "Fval" = Fval, "Conv" = Conv, "rmse" = rmse)
if(!is.null(Tfix)){
result$Afix<-Afix
}
return(result)
}



#######################################################################################################################
# Implemenation of the alternating optimization scheme
#######################################################################################################################
#
# a wrapper for cppTAfact
#
# cppTAfact - alternating optimization framework to solve the following
# problem:
# find T, A such that 0.5 * (||D - TA||_F)^2 + lambda ,
# where D is a mxn matrix with entries between 0 and 1,
# T is a mxr matrix with entries between 0 and 1,
# A is a rxn matrix with nonnegative values and columns summing up
# to 1.
#
# author: Nikita Vedeneev
#
# R port by Pavlo Lutsik
#

onerun.cppTAfact<-function(
D,
T0,
A0,
Tfix=NULL,
Tpartial=NULL,
Tpartial.rows=NULL,
Apartial=NULL,
Apartial.cols=NULL,
t.method="quadPen",
lambda = 0,
t.Poss = NULL,
normD = NULL,
itermax=100,
qp.rangeT=c(0,1),
#qp.Alower=rep(0,ncol(T0)),
#qp.Aupper=rep(1,ncol(T0)),
qp.Alower=NULL,
qp.Aupper=NULL,
emp.dim=500,
emp.resample=TRUE,
emp.vsf=1,
emp.borders=c(0,1),
trace=FALSE,
eps=1e-8,
blocks=NULL,
na.values=FALSE,
verbose=TRUE,
ncores=1){

res<-cppTAfact(
t(D), #- a transposed D matrix,
t(T0), #-Ttinit - a transposed init for T matrix,
A0, # - an initial value for A matrix,
lambda,# - regularizer parameter (0.0 by default),
itermax, #itersMax, - a max number of alternations (1000 by default),
eps, #tol - tolerance for alternations (1e-8 by default),
10*eps, #tolA - tolerance for opt wrt A (1e-7 by default),
10*eps #tolT - tolerance for opt wrt T (1e-7 by default)
)
### TODO: modify cppTAfact to output the list is identical to the output of onerun.alternate
#
#cppTAfact returns a named list where:
#res$Tt - a transposed estimated of T matrix,
#res$A - an estimate of A matrix,
#res$niter - a total number of alternations
#res$objF - objective value at res$Tt and res$A
#
result<-list("T" = t(res$Tt), "A" = res$A, "Fval" = res$objF, "Conv" = res$niter, "rmse"= res$rmse)
return(result)
}

#######################################################################################################################
#'
#' factorize.alternate
Expand Down Expand Up @@ -823,7 +920,8 @@ onerun.alternate<-function(
#' @export
#'
factorize.alternate<-function(D,
k,
k,
method="MeDeCom.quadPen",
t.method="quadPen",
Tfix=NULL,
Tpartial=NULL,
Expand All @@ -849,9 +947,10 @@ factorize.alternate<-function(D,
ncores=1,
pheno=NULL,
na.values=FALSE,
seed=NULL,
verbosity=0L){

if(!t.method %in% c("integer", "empirical", "resample", "Hlasso", "optim", "quadPen")){
if(!t.method %in% c("integer", "empirical", "resample", "Hlasso", "optim", "quadPen", "cppTAfact")){
stop("supplied optimization method for T is not implemented")
}

Expand Down Expand Up @@ -944,9 +1043,11 @@ factorize.alternate<-function(D,
}
}


if(init=="random"){
numruns <- opt
if(!is.null(seed)){
set.seed(seed)
}
}else if(init=="fixed"){
numruns <- 1
}else{
Expand Down Expand Up @@ -1008,8 +1109,15 @@ factorize.alternate<-function(D,
}
}
}

# solve the topic model
onerun <- onerun.alternate(
if(method == "MeDeCom.cppTAfact"){
onerun.function<-onerun.cppTAfact
}else{
onerun.function<-onerun.alternate
}

onerun <- onerun.function(
D, T0, A0,
Tfix = Tfix,
Tpartial=NULL,
Expand All @@ -1032,7 +1140,8 @@ factorize.alternate<-function(D,
eps=eps,
blocks=blocks,
na.values=na.values,
verbose=verbosity>1L);
verbose=verbosity>1L,
ncores=ncores);

# Ts[[run]]<<-onerun[[1]]
# As[[run]]<<-onerun[[2]]
Expand All @@ -1050,7 +1159,8 @@ factorize.alternate<-function(D,
# pcoordinates <- foreach(target = targets) %dopar%
# tryCatch(RnBeads::rnb.execute.dreduction(rnb.set, target = target), error = function(e) { e$message } )

if(ncores>1){
#if(ncores>1){
if(FALSE){
#require(doMC)
#registerDoMC(N_CORES)
#cl<-makeCluster(N_CORES)
Expand Down Expand Up @@ -1084,7 +1194,8 @@ factorize.alternate<-function(D,
Conv <- Convs[[idx]]

if(!na.values){
rmse<-sqrt(sum((D-TT%*%A)^2)/ncol(D)/nrow(D))
#rmse<-sqrt(sum((D-TT%*%A)^2)/ncol(D)/nrow(D))
rmse <- Fval - lambda*sum(TT-TT^2)
}else{
diff_mat<-D-TT%*%A
rmse<-sqrt(sum(diff_mat[!is.na(diff_mat)]^2)/ncol(D)/nrow(D))
Expand Down Expand Up @@ -1614,4 +1725,4 @@ factorize.exact<-function(D, r, V=c(0,1), opt=NULL, zeroprob = (0 %in% V)){
return(list(T=T, A=A, status=status))

}
#######################################################################################################################
#######################################################################################################################
Loading

0 comments on commit cf2bf53

Please sign in to comment.