Skip to content

Commit

Permalink
Merge pull request #64 from supernifty/compare_analyses
Browse files Browse the repository at this point in the history
Compare analyses CPIPE-13
  • Loading branch information
supernifty committed Nov 12, 2015
2 parents e0645d8 + 0db5ac9 commit 0337830
Showing 1 changed file with 21 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@
# along with Cpipe. If not, see <http:#www.gnu.org/licenses/>.
#
###########################################################################
# Compare two analyses.
# This looks at the annovarx.csv output and shows differences between the two
# python ~/compare_analyses.py --dir1 ./prod/batches/b1 --dir2 ./stage/batches/b2 --sample1 123456789 --sample2 9877654321
#
# In the future we may look at previous stages to determine why a variant was filtered
# In the meantime, look at evaluate_variant.py
###########################################################################

import argparse
Expand All @@ -32,30 +37,34 @@ def find_variants( fh ):
result = set()
for line in csvfh:
if not indexes:
indexes = [ line.index(x) for x in ('Gene','Chr','Start') ]
indexes = [ line.index(x) for x in ('Gene','Chr','Start', 'Func') ]
else:
key = '\t'.join( [ line[i] for i in indexes ] )
result.add( key )
return result

def compare( d1, d2, s1, s2, out ):
def compare( d1, d2, s1, s2, out, common=False ):
# compare the annovars
a1fn = glob.glob( '{0}/analysis/results/*{1}.annovarx.csv'.format( d1, s1 ) )[0]
a2fn = glob.glob( '{0}/analysis/results/*{1}.annovarx.csv'.format( d2, s2 ) )[0]
a1 = find_variants( open( a1fn, 'r' ) )
a2 = find_variants( open( a2fn, 'r' ) )
out.write( '{0} total variants in {1} {2}\n'.format( len(a1), d1, s1 ) )
out.write( '{0} total variants in {1} {2}\n'.format( len(a2), d2, s2 ) )
# common
both = a1.intersection(a2)
out.write( '----- {0} variants in common -----\n'.format( both ) )
for x in sorted( list( both ) ):
out.write( '{0}\n'.format( x ) )
if common:
both = a1.intersection(a2)
out.write( '----- {0} variants in common -----\n'.format( len(both) ) )
for x in sorted( list( both ) ):
out.write( '{0}\n'.format( x ) )
# only s1
s1only = s1.difference( s2 )
out.write( '----- {0} variants only in {1} -----\n'.format( s1only, s1 ) )
s1only = a1.difference( a2 )
out.write( '----- {0} variants only in {1} {2} -----\n'.format( len(s1only), d1, s1 ) )
for x in sorted( list( s1only ) ):
out.write( '{0}\n'.format( x ) )
# only s2
s2only = s2.difference( s1 )
out.write( '----- {0} variants only in {1} -----\n'.format( s2only, s2 ) )
s2only = a2.difference( a1 )
out.write( '----- {0} variants only in {1} {2} -----\n'.format( len(s2only), d2, s2 ) )
for x in sorted( list( s2only ) ):
out.write( '{0}\n'.format( x ) )

Expand All @@ -65,5 +74,6 @@ def compare( d1, d2, s1, s2, out ):
parser.add_argument('--dir2', required=True, help='batch 2 directory')
parser.add_argument('--sample1', required=True, help='sample 1 name')
parser.add_argument('--sample2', required=True, help='sample 2 name')
parser.add_argument('--common', action='store_true', required=False, default=False, help='show variants in common' )
args = parser.parse_args()
compare( args.dir1, args.dir2, args.sample1, args.sample2, sys.stdout )
compare( args.dir1, args.dir2, args.sample1, args.sample2, sys.stdout, common=args.common )

0 comments on commit 0337830

Please sign in to comment.