From 02a23af9671a7967b2b5fae327c1125bd809b808 Mon Sep 17 00:00:00 2001 From: "Gregory W. Schwartz" Date: Mon, 17 Jul 2017 13:13:31 -0400 Subject: [PATCH 1/4] Added standardize normalization technique. --- Statistics/Sample/Normalize.hs | 27 +++++++++++++++++++++++++++ statistics.cabal | 1 + 2 files changed, 28 insertions(+) create mode 100644 Statistics/Sample/Normalize.hs diff --git a/Statistics/Sample/Normalize.hs b/Statistics/Sample/Normalize.hs new file mode 100644 index 00000000..cd0274a0 --- /dev/null +++ b/Statistics/Sample/Normalize.hs @@ -0,0 +1,27 @@ +{-# LANGUAGE FlexibleContexts #-} + +-- | +-- Module : Statistics.Sample.Normalize +-- Copyright : (c) 2017 Gregory W. Schwartz +-- License : BSD3 +-- +-- Maintainer : gsch@mail.med.upenn.edu +-- Stability : experimental +-- Portability : portable +-- +-- Functions for normalizing samples. + +module Statistics.Sample.Normalize + ( + standardize + ) where + +import Statistics.Sample +import qualified Data.Vector.Generic as G + +-- | /O(n)/ Normalize a sample using standard scores. +standardize :: (G.Vector v Double) => v Double -> v Double +standardize xs = G.map (\x -> (x - mu) / sigma) xs + where + mu = mean xs + sigma = stdDev xs diff --git a/statistics.cabal b/statistics.cabal index ef52b841..6ffb0ac3 100644 --- a/statistics.cabal +++ b/statistics.cabal @@ -83,6 +83,7 @@ library Statistics.Sample.Histogram Statistics.Sample.KernelDensity Statistics.Sample.KernelDensity.Simple + Statistics.Sample.Normalize Statistics.Sample.Powers Statistics.Test.ChiSquared Statistics.Test.KolmogorovSmirnov From d04a271587030ef73238601dfed7b9c6c65262b7 Mon Sep 17 00:00:00 2001 From: "Gregory W. Schwartz" Date: Mon, 17 Jul 2017 13:39:52 -0400 Subject: [PATCH 2/4] Added p-value to the Spearman correlation. --- Statistics/Correlation.hs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/Statistics/Correlation.hs b/Statistics/Correlation.hs index 15ced2b4..e1f7a761 100644 --- a/Statistics/Correlation.hs +++ b/Statistics/Correlation.hs @@ -14,6 +14,8 @@ module Statistics.Correlation import qualified Data.Vector.Generic as G import qualified Data.Vector.Unboxed as U +import Statistics.Distribution +import Statistics.Distribution.StudentT import Statistics.Matrix import Statistics.Sample import Statistics.Test.Internal (rankUnsorted) @@ -43,7 +45,8 @@ pearsonMatByRow m -- Spearman ---------------------------------------------------------------- --- | compute spearman correlation between two samples +-- | Compute spearman correlation between two samples with p value. P value is +-- calculated using Student's /t/ distribution with /n - 2/ degrees of freedom spearman :: ( Ord a , Ord b , G.Vector v a @@ -56,12 +59,15 @@ spearman :: ( Ord a , G.Vector v (Int, b) ) => v (a, b) - -> Double + -> (Double, Double) spearman xy - = pearson - $ G.zip (rankUnsorted x) (rankUnsorted y) + = (rho, p) where (x, y) = G.unzip xy + rho = pearson $ G.zip (rankUnsorted x) (rankUnsorted y) + n = fromIntegral . G.length $ xy + stat = rho * ((sqrt (n - 2)) / (1 - (rho ^ 2))) + p = 2 * (complCumulative (studentT (n - 2)) . abs $ stat) {-# INLINE spearman #-} -- | compute pairwise spearman correlation between rows of a matrix From 74ba3c83212a8512a21952faccdd7ea5549e16c8 Mon Sep 17 00:00:00 2001 From: "Gregory W. Schwartz" Date: Mon, 17 Jul 2017 13:44:33 -0400 Subject: [PATCH 3/4] Revert "Added standardize normalization technique." This reverts commit 02a23af9671a7967b2b5fae327c1125bd809b808. --- Statistics/Sample/Normalize.hs | 27 --------------------------- statistics.cabal | 1 - 2 files changed, 28 deletions(-) delete mode 100644 Statistics/Sample/Normalize.hs diff --git a/Statistics/Sample/Normalize.hs b/Statistics/Sample/Normalize.hs deleted file mode 100644 index cd0274a0..00000000 --- a/Statistics/Sample/Normalize.hs +++ /dev/null @@ -1,27 +0,0 @@ -{-# LANGUAGE FlexibleContexts #-} - --- | --- Module : Statistics.Sample.Normalize --- Copyright : (c) 2017 Gregory W. Schwartz --- License : BSD3 --- --- Maintainer : gsch@mail.med.upenn.edu --- Stability : experimental --- Portability : portable --- --- Functions for normalizing samples. - -module Statistics.Sample.Normalize - ( - standardize - ) where - -import Statistics.Sample -import qualified Data.Vector.Generic as G - --- | /O(n)/ Normalize a sample using standard scores. -standardize :: (G.Vector v Double) => v Double -> v Double -standardize xs = G.map (\x -> (x - mu) / sigma) xs - where - mu = mean xs - sigma = stdDev xs diff --git a/statistics.cabal b/statistics.cabal index 6ffb0ac3..ef52b841 100644 --- a/statistics.cabal +++ b/statistics.cabal @@ -83,7 +83,6 @@ library Statistics.Sample.Histogram Statistics.Sample.KernelDensity Statistics.Sample.KernelDensity.Simple - Statistics.Sample.Normalize Statistics.Sample.Powers Statistics.Test.ChiSquared Statistics.Test.KolmogorovSmirnov From 612efa8e04d12e9ac5cbd16e42809cc0bb344d03 Mon Sep 17 00:00:00 2001 From: "Gregory W. Schwartz" Date: Fri, 11 Aug 2017 11:22:18 -0400 Subject: [PATCH 4/4] Moved p-value to pearson. --- Statistics/Correlation.hs | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/Statistics/Correlation.hs b/Statistics/Correlation.hs index e1f7a761..81cf6ae0 100644 --- a/Statistics/Correlation.hs +++ b/Statistics/Correlation.hs @@ -19,6 +19,7 @@ import Statistics.Distribution.StudentT import Statistics.Matrix import Statistics.Sample import Statistics.Test.Internal (rankUnsorted) +import Statistics.Types (mkPValue, PValue) ---------------------------------------------------------------- @@ -28,15 +29,20 @@ import Statistics.Test.Internal (rankUnsorted) -- | Pearson correlation for sample of pairs. Exactly same as -- 'Statistics.Sample.correlation' pearson :: (G.Vector v (Double, Double), G.Vector v Double) - => v (Double, Double) -> Double -pearson = correlation + => v (Double, Double) -> (Double, PValue Double) +pearson xy = (coeff, p) + where + coeff = correlation xy + n = fromIntegral . G.length $ xy + stat = coeff * ((sqrt (n - 2)) / (1 - (coeff ** 2))) + p = mkPValue $ 2 * (complCumulative (studentT (n - 2)) . abs $ stat) {-# INLINE pearson #-} -- | Compute pairwise pearson correlation between rows of a matrix pearsonMatByRow :: Matrix -> Matrix pearsonMatByRow m = generateSym (rows m) - (\i j -> pearson $ row m i `U.zip` row m j) + (\i j -> fst . pearson $ row m i `U.zip` row m j) {-# INLINE pearsonMatByRow #-} @@ -59,15 +65,12 @@ spearman :: ( Ord a , G.Vector v (Int, b) ) => v (a, b) - -> (Double, Double) + -> (Double, PValue Double) spearman xy - = (rho, p) + = pearson + $ G.zip (rankUnsorted x) (rankUnsorted y) where (x, y) = G.unzip xy - rho = pearson $ G.zip (rankUnsorted x) (rankUnsorted y) - n = fromIntegral . G.length $ xy - stat = rho * ((sqrt (n - 2)) / (1 - (rho ^ 2))) - p = 2 * (complCumulative (studentT (n - 2)) . abs $ stat) {-# INLINE spearman #-} -- | compute pairwise spearman correlation between rows of a matrix