Skip to content

Moonerss/diffexpR

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

14 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

diffexpR

The goal of diffexpR is to easy do differential expression analysis.

Installation

You can install the released version of diffexpR from Github with:

install.packages('remotes')
remotes::install_github('Moonerss/diffexpR')

Usage

We can do differential expression analysis underline the methods apply by limma, DESeq2, and edgeR.

library(diffexpR)

## add data
data("OSCC")
data("eset")

## do differential expression analysis in array data
array_res <- diff_limma_array(array_mat = eset$array, label_list = eset$group, groups = 'brain-liver')
head(array_res)
#>                logFC   AveExpr         t      P.Value    adj.P.Val        B
#> 36781_at   -6.328881 11.701151 -66.58976 7.546173e-13 9.527798e-09 18.80203
#> 38839_at    4.690396  9.537184  53.75160 4.636910e-12 2.927281e-08 17.65523
#> 36984_f_at -5.920635 11.063179 -48.29727 1.147711e-11 3.451344e-08 17.00528
#> 35822_at   -4.535625  9.861791 -48.09034 1.190205e-11 3.451344e-08 16.97819
#> 33990_at   -5.881176  9.124178 -45.41456 1.932413e-11 3.451344e-08 16.60973
#> 33377_at   -6.686242 11.047807 -44.95467 2.106257e-11 3.451344e-08 16.54285

## do differential expression analysis in count data
limma_count_res <- diff_limma_count(count_mat = OSCC$count, label_list = OSCC$group, groups = 'tumor-normal', methods = 'voom')
head(limma_count_res)
#>           logFC  AveExpr         t      P.Value  adj.P.Val        B
#> 23328 -3.359522 6.020708 -9.962093 6.607450e-06 0.03906681 4.286401
#> 487   -4.482566 4.691778 -9.407894 1.026908e-05 0.03906681 3.590750
#> 5744   3.997323 5.182639  9.017082 1.420378e-05 0.03906681 3.541065
#> 4008  -2.461583 6.656455 -8.557298 2.112511e-05 0.03906681 3.304213
#> 7049  -3.428197 4.580525 -8.486091 2.249960e-05 0.03906681 3.109346
#> 23428  2.705798 6.845441  8.221803 2.853915e-05 0.04247441 3.017427

deseq2_count_res <- diff_deseq2_count(count_mat = OSCC$count, label_list = OSCC$group, groups = c('condition', 'tumor', 'normal'))
#> converting counts to integer mode
#> estimating size factors
#> estimating dispersions
#> gene-wise dispersion estimates
#> mean-dispersion relationship
#> final dispersion estimates
#> fitting model and testing
head(deseq2_count_res)
#>        baseMean log2FoldChange     lfcSE       stat      pvalue       padj
#> 378938 442800.2    -0.15972504 0.3942520 -0.4051344 0.685378739 0.88153819
#> 7273   299965.3    -3.57681760 1.1699933 -3.0571265 0.002234699 0.02918056
#> 9301   217031.0    -1.04421957 0.7789888 -1.3404808 0.180089085 0.49395863
#> 9302   220624.2    -0.38784864 0.6390947 -0.6068719 0.543935915 0.80897043
#> 3860   200614.6    -3.41785404 1.1456725 -2.9832732 0.002851833 0.03532593
#> 283131 146658.3    -0.08613695 0.5716606 -0.1506785 0.880229351 0.96181284

edger_count_res <- diff_edger_count(count_mat = OSCC$count, label_list = OSCC$group, groups = 'tumor-normal')
head(edger_count_res)
#>              logFC   logCPM          LR     PValue       FDR
#> 378938  0.09985532 15.20968 0.023583963 0.87794823 0.9611649
#> 7273   -3.34074199 14.64168 6.012740136 0.01420296 0.1334233
#> 9301   -0.61495824 14.00558 0.595193802 0.44041771 0.7452122
#> 9302   -0.03235385 14.11700 0.002089412 0.96354132 0.9897320
#> 3860   -3.27177747 13.98894 6.424946563 0.01125283 0.1156844
#> 283131  0.06461631 13.67219 0.007508207 0.93094979 0.9795611

## do differential expression analysis in normalized rna-seq data
limma_normalize_res <- diff_limma_normalize(expr_mat = OSCC$rpkm, label_list = OSCC$group, groups = 'tumor-normal')
#> The data didn't log2 tranform, do it ...
head(limma_normalize_res)
#>           logFC  AveExpr         t      P.Value   adj.P.Val        B
#> 487   -2.162600 1.389077 -9.495564 1.568959e-06 0.009874003 5.409303
#> 5837  -2.973479 1.836746 -9.265067 1.986457e-06 0.009874003 5.212258
#> 5744   2.882265 2.099697  8.701345 3.609777e-06 0.009874003 4.705221
#> 844   -2.367279 1.392456 -8.656301 3.791132e-06 0.009874003 4.663107
#> 54209  1.568041 1.023950  8.220780 6.153340e-06 0.012821099 4.243103
#> 7148  -3.149256 2.565681 -7.802885 9.976082e-06 0.013621104 3.817375

Compare the result of three different methods:

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggVennDiagram)

edger_res <- rownames(filter(edger_count_res, FDR < 0.05, abs(logFC) > 1))
deseq2_res <- rownames(filter(deseq2_count_res, padj < 0.05, abs(log2FoldChange) > 1))
limma_res <- rownames(filter(limma_count_res, adj.P.Val < 0.05, abs(logFC) > 1))

## veen 
x <- list(limma_res = limma_res, edger_res = edger_res, deseq2_res = deseq2_res)
ggVennDiagram(x)

About

No description, website, or topics provided.

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages