From 00893a7ab867d3f92a8439fdd40cca276f8105dd Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Tue, 17 Sep 2024 10:04:32 +0200 Subject: [PATCH 1/4] update doc --- bin/agat_sp_compare_two_annotations.pl | 6 +++--- docs/tools/agat_sp_compare_two_annotations.md | 20 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/bin/agat_sp_compare_two_annotations.pl b/bin/agat_sp_compare_two_annotations.pl index 7c7a5ff8..e2a94723 100755 --- a/bin/agat_sp_compare_two_annotations.pl +++ b/bin/agat_sp_compare_two_annotations.pl @@ -729,18 +729,18 @@ =head1 DESCRIPTION =head1 SYNOPSIS - agat_sp_compare_two_annotations.pl -gff1 infile.gff [ -o outfile ] + agat_sp_compare_two_annotations.pl -gff1 infile1.gff -gff2 infile2.gff [ -o outfile ] agat_sp_compare_two_annotations.pl --help =head1 OPTIONS =over 8 -=item B<-gff1> +=item B<--gff1> Input GTF/GFF file1. -=item B<-gff2> +=item B<--gff2> Input GTF/GFF file2. diff --git a/docs/tools/agat_sp_compare_two_annotations.md b/docs/tools/agat_sp_compare_two_annotations.md index c990af48..82234bc6 100644 --- a/docs/tools/agat_sp_compare_two_annotations.md +++ b/docs/tools/agat_sp_compare_two_annotations.md @@ -3,15 +3,15 @@ ## DESCRIPTION The script aims to compare two annotation of the same assembly. It provided -information about split/fusion of genes between the two annotations. -The most common case are: -1 => 0 ( gene uniq to file1) -0 => 1 ( gene uniq to file2) -1 => 1 ( 1 gene from file 1 overlaps only 1 gene from file2) -1 => <many> ( 1 gene from file 1 overlaps <many> genes from file2) => split case (with file 1 as reference) +information about split/fusion of genes between the two annotations. +The most common case are: +1 => 0 ( gene uniq to file1) +0 => 1 ( gene uniq to file2) +1 => 1 ( 1 gene from file 1 overlaps only 1 gene from file2) +1 => <many> ( 1 gene from file 1 overlaps <many> genes from file2) => split case (with file 1 as reference) <many> => 1 ( <many> genes from file 1 overlap only 1 gene from file2) => fusion case (with file 1 as reference) -Then you can get more complex cases: +Then you can get more complex cases: <many> => <many> (<many> genes from file 1 overlap <many> genes from file2) The script output a folder containing a report of number of different cases as well as a file @@ -20,17 +20,17 @@ per case type listing per line the gene feature's ID involved in each case. ## SYNOPSIS ``` -agat_sp_compare_two_annotations.pl -gff1 infile.gff [ -o outfile ] +agat_sp_compare_two_annotations.pl --gff1 infile1.gff -gff2 infile2.gff [ -o outfile ] agat_sp_compare_two_annotations.pl --help ``` ## OPTIONS -- **-gff1** +- **--gff1** Input GTF/GFF file1. -- **-gff2** +- **--gff2** Input GTF/GFF file2. From 6b132680ae4fc5f2eac7227a73080f6ce469a512 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Tue, 17 Sep 2024 10:23:40 +0200 Subject: [PATCH 2/4] remive transcript ID in Leve1 feature when using L2 as template by clean_clone --- lib/AGAT/OmniscientTool.pm | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/AGAT/OmniscientTool.pm b/lib/AGAT/OmniscientTool.pm index 9e383cb5..097920c3 100644 --- a/lib/AGAT/OmniscientTool.pm +++ b/lib/AGAT/OmniscientTool.pm @@ -1314,7 +1314,8 @@ sub clean_clone{ } } - # remove Parent attribute if level1 + # remove Parent and transcript_id attributes if level1. + # We check is Level1 using the primary_tag of the feature if ($omniscient){ if ($cloned_feature->has_tag("Parent")){ my $hash_level = $omniscient->{'other'}{'level'}; @@ -1322,6 +1323,12 @@ sub clean_clone{ $cloned_feature->remove_tag("Parent"); } } + if ($cloned_feature->has_tag("transcript_id")){ + my $hash_level = $omniscient->{'other'}{'level'}; + if( exists_keys($hash_level,'level1',lc($cloned_feature->primary_tag)) ){ + $cloned_feature->remove_tag("transcript_id"); + } + } } # new id create_or_replace_tag($cloned_feature,'ID',$new_id) if $new_id; From 3cb2ae97b69a5832e48d080f4e4c26bb059b2c23 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Tue, 17 Sep 2024 11:23:39 +0200 Subject: [PATCH 3/4] fix test to reflect that transcript_id not part of level1 --- t/gff_other/out/issue441.gtf | 2 +- t/gff_syntax/out/30_correct_output.gff | 2 +- t/gff_syntax/out/36_correct_output.gff | 6 +++--- t/gff_syntax/out/37_correct_output.gff | 4 ++-- .../out/stop_split_over_two_exons_correct_output.gff | 2 +- t/gff_syntax/out/stop_start_an_exon_correct_output.gff | 2 +- t/level_missing/out/testA_output.gff | 6 +++--- t/level_missing/out/testA_output2.gff | 4 ++-- t/level_missing/out/testA_output3.gff | 4 ++-- t/level_missing/out/testA_output4.gff | 6 +++--- t/scripts_output/out/agat_sp_kraken_assess_liftover_1.gff | 2 +- 11 files changed, 20 insertions(+), 20 deletions(-) diff --git a/t/gff_other/out/issue441.gtf b/t/gff_other/out/issue441.gtf index 0abfef70..dd74b7df 100644 --- a/t/gff_other/out/issue441.gtf +++ b/t/gff_other/out/issue441.gtf @@ -1,6 +1,6 @@ ##gtf-version X # GFF-like GTF i.e. not checked against any GTF specification. Conversion based on GFF input, standardised by AGAT. -Scaffold170 AGAT gene 2883887 2888594 . + . gene_id "GBI_15721"; transcript_id "GBI_15721-RE"; ID "GBI_15721"; gene_name "GBI_15721"; +Scaffold170 AGAT gene 2883887 2888594 . + . gene_id "GBI_15721"; ID "GBI_15721"; gene_name "GBI_15721"; Scaffold170 AGAT mRNA 2883887 2888594 . + . gene_id "GBI_15721"; transcript_id "GBI_15721-RE"; ID "GBI_15721-RE"; Parent "GBI_15721"; gene_name "GBI_15721"; Scaffold170 maker exon 2883887 2884591 . + . gene_id "GBI_15721"; transcript_id "GBI_15721-RE"; ID "agat-exon-1"; Parent "GBI_15721-RE"; gene_name "GBI_15721"; Scaffold170 maker exon 2885735 2885878 . + . gene_id "GBI_15721"; transcript_id "GBI_15721-RE"; ID "agat-exon-2"; Parent "GBI_15721-RE"; gene_name "GBI_15721"; diff --git a/t/gff_syntax/out/30_correct_output.gff b/t/gff_syntax/out/30_correct_output.gff index aeed1fa7..95eab3af 100644 --- a/t/gff_syntax/out/30_correct_output.gff +++ b/t/gff_syntax/out/30_correct_output.gff @@ -1,5 +1,5 @@ ##gff-version 3 -000000F|arrow AGAT gene 898 1804 . + . ID=P12103_109_S2_L002.1;gene_id=P12103_109_S2_L002.1;transcript_id=P12103_109_S2_L002.1.1 +000000F|arrow AGAT gene 898 1804 . + . ID=P12103_109_S2_L002.1;gene_id=P12103_109_S2_L002.1 000000F|arrow StringTie transcript 898 1804 1000 + . ID=P12103_109_S2_L002.1.1;Parent=P12103_109_S2_L002.1;gene_id=P12103_109_S2_L002.1;transcript_id=P12103_109_S2_L002.1.1 000000F|arrow StringTie exon 898 1804 1000 + . ID=agat-exon-1;Parent=P12103_109_S2_L002.1.1;gene_id=P12103_109_S2_L002.1;transcript_id=P12103_109_S2_L002.1.1 000000F|arrow StringTie pseudogene 1147 3802 1000 - . ID=P12103_109_S2_L002.2;gene_id=P12103_109_S2_L002.2;transcript_id=P12103_109_S2_L002.2.1 diff --git a/t/gff_syntax/out/36_correct_output.gff b/t/gff_syntax/out/36_correct_output.gff index a4aa6d96..1837cbb8 100644 --- a/t/gff_syntax/out/36_correct_output.gff +++ b/t/gff_syntax/out/36_correct_output.gff @@ -1,5 +1,5 @@ ##gff-version 3 -001269F_0 AGAT gene 540236 558650 . + . ID=ENSG00000105497.3;ccdsid=CCDS12837.1;exon_id=ENSE00000842706.1;exon_number=2;gene_id=ENSG00000105497.3;gene_name=ZNF175;gene_status=KNOWN;gene_type=protein_coding;havana_gene=OTTHUMG00000167771.3;havana_transcript=OTTHUMT00000396205.1;level=2;protein_id=ENSP00000262259.2;tag=basic,appris_principal,CCDS;transcript_id=ENST00000262259.2;transcript_name=ZNF175-001;transcript_status=KNOWN;transcript_type=protein_coding +001269F_0 AGAT gene 540236 558650 . + . ID=ENSG00000105497.3;ccdsid=CCDS12837.1;exon_id=ENSE00000842706.1;exon_number=2;gene_id=ENSG00000105497.3;gene_name=ZNF175;gene_status=KNOWN;gene_type=protein_coding;havana_gene=OTTHUMG00000167771.3;havana_transcript=OTTHUMT00000396205.1;level=2;protein_id=ENSP00000262259.2;tag=basic,appris_principal,CCDS;transcript_name=ZNF175-001;transcript_status=KNOWN;transcript_type=protein_coding 001269F_0 AGAT mRNA 542087 558627 . + . ID=ENST00000262259.2;Parent=ENSG00000105497.3;ccdsid=CCDS12837.1;exon_id=ENSE00000842706.1;exon_number=2;gene_id=ENSG00000105497.3;gene_name=ZNF175;gene_status=KNOWN;gene_type=protein_coding;havana_gene=OTTHUMG00000167771.3;havana_transcript=OTTHUMT00000396205.1;level=2;protein_id=ENSP00000262259.2;tag=basic,appris_principal,CCDS;transcript_id=ENST00000262259.2;transcript_name=ZNF175-001;transcript_status=KNOWN;transcript_type=protein_coding 001269F_0 HAVANA exon 542087 542338 . + . ID=exon:ENST00000262259.2:2;Parent=ENST00000262259.2;ccdsid=CCDS12837.1;exon_id=ENSE00000842706.1;exon_number=2;gene_id=ENSG00000105497.3;gene_name=ZNF175;gene_status=KNOWN;gene_type=protein_coding;havana_gene=OTTHUMG00000167771.3;havana_transcript=OTTHUMT00000396205.1;level=2;protein_id=ENSP00000262259.2;tag=basic,appris_principal,CCDS;transcript_id=ENST00000262259.2;transcript_name=ZNF175-001;transcript_status=KNOWN;transcript_type=protein_coding 001269F_0 HAVANA exon 550309 550435 . + . ID=exon:ENST00000262259.2:3;Parent=ENST00000262259.2;ccdsid=CCDS12837.1;exon_id=ENSE00002436360.1;exon_number=3;gene_id=ENSG00000105497.3;gene_name=ZNF175;gene_status=KNOWN;gene_type=protein_coding;havana_gene=OTTHUMG00000167771.3;havana_transcript=OTTHUMT00000396205.1;level=2;protein_id=ENSP00000262259.2;tag=basic,appris_principal,CCDS;transcript_id=ENST00000262259.2;transcript_name=ZNF175-001;transcript_status=KNOWN;transcript_type=protein_coding @@ -36,7 +36,7 @@ 001269F_0 HAVANA CDS 558460 558503 . + 2 ID=CDS:ENST00000436511.2:4;Parent=ENST00000436511.2;exon_id=ENSE00002268856.1;exon_number=4;gene_id=ENSG00000105497.3;gene_name=ZNF175;gene_status=KNOWN;gene_type=protein_coding;havana_gene=OTTHUMG00000167771.3;havana_transcript=OTTHUMT00000396206.1;level=1;protein_id=ENSP00000440578.1;tag=basic,exp_conf;transcript_id=ENST00000436511.2;transcript_name=ZNF175-003;transcript_status=PUTATIVE;transcript_type=protein_coding 001269F_0 AGAT five_prime_UTR 542265 542266 . + . ID=agat-five_prime_utr-3;Parent=ENST00000436511.2;exon_id=ENSE00002298653.1;exon_number=1;gene_id=ENSG00000105497.3;gene_name=ZNF175;gene_status=KNOWN;gene_type=protein_coding;havana_gene=OTTHUMG00000167771.3;havana_transcript=OTTHUMT00000396206.1;level=1;protein_id=ENSP00000440578.1;tag=basic,exp_conf;transcript_id=ENST00000436511.2;transcript_name=ZNF175-003;transcript_status=PUTATIVE;transcript_type=protein_coding 001269F_0 AGAT three_prime_UTR 558504 558650 . + . ID=agat-three_prime_utr-5;Parent=ENST00000436511.2;exon_id=ENSE00002298653.1;exon_number=1;gene_id=ENSG00000105497.3;gene_name=ZNF175;gene_status=KNOWN;gene_type=protein_coding;havana_gene=OTTHUMG00000167771.3;havana_transcript=OTTHUMT00000396206.1;level=1;protein_id=ENSP00000440578.1;tag=basic,exp_conf;transcript_id=ENST00000436511.2;transcript_name=ZNF175-003;transcript_status=PUTATIVE;transcript_type=protein_coding -001269F_0 AGAT gene 560695 563290 . - . ID=ENSG00000167765.3;gene_id=ENSG00000167765.3;gene_name=AC018755.1;gene_status=KNOWN;gene_type=protein_coding;level=1;protein_id=ENSP00000301439.2;tag=basic,appris_principal,exp_conf;transcript_id=ENST00000301439.3;transcript_name=AC018755.1-201;transcript_status=KNOWN;transcript_type=protein_coding +001269F_0 AGAT gene 560695 563290 . - . ID=ENSG00000167765.3;gene_id=ENSG00000167765.3;gene_name=AC018755.1;gene_status=KNOWN;gene_type=protein_coding;level=1;protein_id=ENSP00000301439.2;tag=basic,appris_principal,exp_conf;transcript_name=AC018755.1-201;transcript_status=KNOWN;transcript_type=protein_coding 001269F_0 ENSEMBL transcript 560695 563290 . - . ID=ENST00000301439.3;Parent=ENSG00000167765.3;gene_id=ENSG00000167765.3;gene_name=AC018755.1;gene_status=KNOWN;gene_type=protein_coding;level=1;protein_id=ENSP00000301439.2;tag=basic,appris_principal,exp_conf;transcript_id=ENST00000301439.3;transcript_name=AC018755.1-201;transcript_status=KNOWN;transcript_type=protein_coding 001269F_0 ENSEMBL exon 560695 561963 . - . ID=exon:ENST00000301439.3:2;Parent=ENST00000301439.3;exon_id=ENSE00001116661.1;exon_number=2;gene_id=ENSG00000167765.3;gene_name=AC018755.1;gene_status=KNOWN;gene_type=protein_coding;level=1;protein_id=ENSP00000301439.2;tag=basic,appris_principal,exp_conf;transcript_id=ENST00000301439.3;transcript_name=AC018755.1-201;transcript_status=KNOWN;transcript_type=protein_coding 001269F_0 ENSEMBL exon 562861 563290 . - . ID=exon:ENST00000301439.3:1;Parent=ENST00000301439.3;exon_id=ENSE00001116660.3;exon_number=1;gene_id=ENSG00000167765.3;gene_name=AC018755.1;gene_status=KNOWN;gene_type=protein_coding;level=1;protein_id=ENSP00000301439.2;tag=basic,appris_principal,exp_conf;transcript_id=ENST00000301439.3;transcript_name=AC018755.1-201;transcript_status=KNOWN;transcript_type=protein_coding @@ -44,7 +44,7 @@ 001269F_0 ENSEMBL CDS 562861 563234 . - 0 ID=CDS:ENST00000301439.3:1;Parent=ENST00000301439.3;exon_id=ENSE00001116660.3;exon_number=1;gene_id=ENSG00000167765.3;gene_name=AC018755.1;gene_status=KNOWN;gene_type=protein_coding;level=1;protein_id=ENSP00000301439.2;tag=basic,appris_principal,exp_conf;transcript_id=ENST00000301439.3;transcript_name=AC018755.1-201;transcript_status=KNOWN;transcript_type=protein_coding 001269F_0 AGAT five_prime_UTR 563235 563290 . - . ID=agat-five_prime_utr-2;Parent=ENST00000301439.3;exon_id=ENSE00001116661.1;exon_number=2;gene_id=ENSG00000167765.3;gene_name=AC018755.1;gene_status=KNOWN;gene_type=protein_coding;level=1;protein_id=ENSP00000301439.2;tag=basic,appris_principal,exp_conf;transcript_id=ENST00000301439.3;transcript_name=AC018755.1-201;transcript_status=KNOWN;transcript_type=protein_coding 001269F_0 AGAT three_prime_UTR 560695 561547 . - . ID=agat-three_prime_utr-3;Parent=ENST00000301439.3;exon_id=ENSE00001116661.1;exon_number=2;gene_id=ENSG00000167765.3;gene_name=AC018755.1;gene_status=KNOWN;gene_type=protein_coding;level=1;protein_id=ENSP00000301439.2;tag=basic,appris_principal,exp_conf;transcript_id=ENST00000301439.3;transcript_name=AC018755.1-201;transcript_status=KNOWN;transcript_type=protein_coding -001269F_0 AGAT gene 580439 599250 . - . ID=ENSG00000105501.7;ccdsid=CCDS33088.1;gene_id=ENSG00000105501.7;gene_name=SIGLEC5;gene_status=KNOWN;gene_type=protein_coding;havana_gene=OTTHUMG00000165510.5;level=3;protein_id=ENSP00000455510.2;tag=basic,appris_principal,CCDS;transcript_id=ENST00000570106.2;transcript_name=SIGLEC5-202;transcript_status=KNOWN;transcript_type=protein_coding +001269F_0 AGAT gene 580439 599250 . - . ID=ENSG00000105501.7;ccdsid=CCDS33088.1;gene_id=ENSG00000105501.7;gene_name=SIGLEC5;gene_status=KNOWN;gene_type=protein_coding;havana_gene=OTTHUMG00000165510.5;level=3;protein_id=ENSP00000455510.2;tag=basic,appris_principal,CCDS;transcript_name=SIGLEC5-202;transcript_status=KNOWN;transcript_type=protein_coding 001269F_0 ENSEMBL transcript 580439 598551 . - . ID=ENST00000222107.4;Parent=ENSG00000105501.7 001269F_0 ENSEMBL exon 580439 581333 . - . ID=exon:ENST00000222107.4:9;Parent=ENST00000222107.4;exon_id=ENSE00000842709.5;exon_number=9;gene_id=ENSG00000105501.7;gene_name=SIGLEC5;gene_status=KNOWN;gene_type=protein_coding;havana_gene=OTTHUMG00000165510.5;level=3;protein_id=ENSP00000222107.4;tag=basic,appris_principal;transcript_id=ENST00000222107.4;transcript_name=SIGLEC5-201;transcript_status=KNOWN;transcript_type=protein_coding 001269F_0 ENSEMBL exon 594943 595024 . - . ID=exon:ENST00000222107.4:8;Parent=ENST00000222107.4;exon_id=ENSE00003665149.1;exon_number=8;gene_id=ENSG00000105501.7;gene_name=SIGLEC5;gene_status=KNOWN;gene_type=protein_coding;havana_gene=OTTHUMG00000165510.5;level=3;protein_id=ENSP00000222107.4;tag=basic,appris_principal;transcript_id=ENST00000222107.4;transcript_name=SIGLEC5-201;transcript_status=KNOWN;transcript_type=protein_coding diff --git a/t/gff_syntax/out/37_correct_output.gff b/t/gff_syntax/out/37_correct_output.gff index eec46277..a2ad3e69 100644 --- a/t/gff_syntax/out/37_correct_output.gff +++ b/t/gff_syntax/out/37_correct_output.gff @@ -1,5 +1,5 @@ ##gff-version 3 -xfSc0000000 AGAT gene 7096 13319 . - . ID=BL03703;exon_number=1;gene_id=BL03703;gene_name=CHRND;oldID=BlTCONS00149301;status=both;transcript_id=BL03703_cuf1 +xfSc0000000 AGAT gene 7096 13319 . - . ID=BL03703;exon_number=1;gene_id=BL03703;gene_name=CHRND;oldID=BlTCONS00149301;status=both xfSc0000000 AGAT mRNA 7096 10149 . - . ID=BL03703_cuf1;Parent=BL03703;exon_number=1;gene_id=BL03703;gene_name=CHRND;oldID=BlTCONS00149301;status=both;transcript_id=BL03703_cuf1 xfSc0000000 protein_coding exon 7096 8697 . - . ID=agat-exon-6;Parent=BL03703_cuf1;exon_number=2;gene_id=BL03703;gene_name=CHRND;oldID=BlTCONS00149301;status=both;transcript_id=BL03703_cuf1 xfSc0000000 protein_coding exon 9847 10149 . - . ID=agat-exon-5;Parent=BL03703_cuf1;exon_number=1;gene_id=BL03703;gene_name=CHRND;oldID=BlTCONS00149301;status=both;transcript_id=BL03703_cuf1 @@ -14,7 +14,7 @@ xfSc0000000 protein_coding CDS 12803 13024 . - 2 ID=agat-cds-3;Parent=BL03703_ev xfSc0000000 protein_coding CDS 13283 13319 . - 0 ID=agat-cds-2;Parent=BL03703_evm0;exon_number=1;gene_id=BL03703;gene_name=CHRND;oldID=Blg02918.0;status=both;transcript_id=BL03703_evm0 xfSc0000000 AGAT RNA 7096 9184 . - . ID=BL03703_cuf0;Parent=BL03703;exon_number=1;gene_id=BL03703;gene_name=CHRND;oldID=BlTCONS00149300;status=both;transcript_id=BL03703_cuf0 xfSc0000000 protein_coding exon 7096 9184 . - . ID=agat-exon-4;Parent=BL03703_cuf0;exon_number=1;gene_id=BL03703;gene_name=CHRND;oldID=BlTCONS00149300;status=both;transcript_id=BL03703_cuf0 -xfSc0000437 AGAT gene 3712 3937 . - . ID=BL03702;exon_number=2;gene_id=BL03702;gene_name=AHR;oldID=Blg03689.3;status=both;transcript_id=BL03702_evm3 +xfSc0000437 AGAT gene 3712 3937 . - . ID=BL03702;exon_number=2;gene_id=BL03702;gene_name=AHR;oldID=Blg03689.3;status=both xfSc0000437 AGAT mRNA 3712 3937 . - . ID=BL03702_evm3;Parent=BL03702;exon_number=2;gene_id=BL03702;gene_name=AHR;oldID=Blg03689.3;status=both;transcript_id=BL03702_evm3 xfSc0000437 protein_coding exon 3712 3937 . - . ID=agat-exon-1;Parent=BL03702_evm3;exon_number=3;gene_id=BL03702;gene_name=AHR;oldID=Blg03689.3;status=both;transcript_id=BL03702_evm3 xfSc0000437 protein_coding CDS 3712 3937 . - 1 ID=agat-cds-1;Parent=BL03702_evm3;exon_number=2;gene_id=BL03702;gene_name=AHR;oldID=Blg03689.3;status=both;transcript_id=BL03702_evm3 diff --git a/t/gff_syntax/out/stop_split_over_two_exons_correct_output.gff b/t/gff_syntax/out/stop_split_over_two_exons_correct_output.gff index 1eaaa270..f772e31d 100644 --- a/t/gff_syntax/out/stop_split_over_two_exons_correct_output.gff +++ b/t/gff_syntax/out/stop_split_over_two_exons_correct_output.gff @@ -1,5 +1,5 @@ ##gff-version 3 -chr6 AGAT gene 143060901 143061606 . + . ID=ENSG00000146416.19;gene_id=ENSG00000146416.19;gene_name=AIG1;gene_type=protein_coding;transcript_id=ENST00000367596.5 +chr6 AGAT gene 143060901 143061606 . + . ID=ENSG00000146416.19;gene_id=ENSG00000146416.19;gene_name=AIG1;gene_type=protein_coding chr6 HAVANA transcript 143060901 143061606 . + . ID=ENST00000367596.5;Parent=ENSG00000146416.19;gene_id=ENSG00000146416.19;gene_name=AIG1;gene_type=protein_coding;transcript_id=ENST00000367596.5 chr6 HAVANA exon 143060901 143061066 . + . ID=agat-exon-1;Parent=ENST00000367596.5;gene_id=ENSG00000146416.19;gene_name=AIG1;gene_type=protein_coding;transcript_id=ENST00000367596.5 chr6 HAVANA exon 143061214 143061606 . + . ID=agat-exon-2;Parent=ENST00000367596.5;gene_id=ENSG00000146416.19;gene_name=AIG1;gene_type=protein_coding;transcript_id=ENST00000367596.5 diff --git a/t/gff_syntax/out/stop_start_an_exon_correct_output.gff b/t/gff_syntax/out/stop_start_an_exon_correct_output.gff index 96f24bc5..55a88808 100644 --- a/t/gff_syntax/out/stop_start_an_exon_correct_output.gff +++ b/t/gff_syntax/out/stop_start_an_exon_correct_output.gff @@ -1,5 +1,5 @@ ##gff-version 3 -chr6 AGAT gene 143060901 143061606 . + . ID=ENSG00000146416.19;gene_id=ENSG00000146416.19;gene_name=AIG1;gene_type=protein_coding;transcript_id=ENST00000367596.5 +chr6 AGAT gene 143060901 143061606 . + . ID=ENSG00000146416.19;gene_id=ENSG00000146416.19;gene_name=AIG1;gene_type=protein_coding chr6 HAVANA transcript 143060901 143061606 . + . ID=ENST00000367596.5;Parent=ENSG00000146416.19;gene_id=ENSG00000146416.19;gene_name=AIG1;gene_type=protein_coding;transcript_id=ENST00000367596.5 chr6 HAVANA exon 143060901 143061066 . + . ID=agat-exon-1;Parent=ENST00000367596.5;gene_id=ENSG00000146416.19;gene_name=AIG1;gene_type=protein_coding;transcript_id=ENST00000367596.5 chr6 HAVANA exon 143061214 143061606 . + . ID=agat-exon-2;Parent=ENST00000367596.5;gene_id=ENSG00000146416.19;gene_name=AIG1;gene_type=protein_coding;transcript_id=ENST00000367596.5 diff --git a/t/level_missing/out/testA_output.gff b/t/level_missing/out/testA_output.gff index 24052cde..078a77ae 100644 --- a/t/level_missing/out/testA_output.gff +++ b/t/level_missing/out/testA_output.gff @@ -1,13 +1,13 @@ ##gff-version 3 -chr12 AGAT gene 100 500 . + . ID=agat-gene-1;common_tag="gene1";gene_info="gene1";transcript_id="transcript1" +chr12 AGAT gene 100 500 . + . ID=agat-gene-1;common_tag="gene1";gene_info="gene1" chr12 HAVANA transcript 100 500 . + . ID="bbb";Parent=agat-gene-1;common_tag="gene1";gene_info="gene1";transcript_id="transcript1" chr12 HAVANA exon 100 500 . + . ID="ccc";Parent="bbb";common_tag="gene1" chr12 HAVANA CDS 100 500 . + 0 ID="ddd";Parent="bbb";common_tag="gene1" -chr12 AGAT gene 100 600 . + . ID=agat-gene-2;common_tag="gene1";gene_info="gene1";transcript_id="transcript2" +chr12 AGAT gene 100 600 . + . ID=agat-gene-2;common_tag="gene1";gene_info="gene1" chr12 HAVANA transcript 100 600 . + . ID="bbb2";Parent=agat-gene-2;common_tag="gene1";gene_info="gene1";transcript_id="transcript2" chr12 HAVANA exon 100 600 . + . ID="ccc2";Parent="bbb2";common_tag="gene1" chr12 HAVANA CDS 100 600 . + 0 ID="ddd2";Parent="bbb2";common_tag="gene1" -chr12 AGAT gene 1000 5000 . + . ID=agat-gene-3;common_tag="gene2";gene_info="gene2";transcript_id="transcript3" +chr12 AGAT gene 1000 5000 . + . ID=agat-gene-3;common_tag="gene2";gene_info="gene2" chr12 HAVANA transcript 1000 5000 . + . ID="yyy";Parent=agat-gene-3;common_tag="gene2";gene_info="gene2";transcript_id="transcript3" chr12 HAVANA exon 1000 5000 . + . ID="zzz";Parent="yyy";common_tag="gene2" chr12 HAVANA CDS 1000 5000 . + 0 ID="www";Parent="yyy";common_tag="gene2" diff --git a/t/level_missing/out/testA_output2.gff b/t/level_missing/out/testA_output2.gff index 0e833056..7ca0e40d 100644 --- a/t/level_missing/out/testA_output2.gff +++ b/t/level_missing/out/testA_output2.gff @@ -1,12 +1,12 @@ ##gff-version 3 -chr12 AGAT gene 100 600 . + . ID="gene1";common_tag="gene1";gene_info="gene1";transcript_id="transcript1" +chr12 AGAT gene 100 600 . + . ID="gene1";common_tag="gene1";gene_info="gene1" chr12 HAVANA transcript 100 500 . + . ID="bbb";Parent="gene1";common_tag="gene1";gene_info="gene1";transcript_id="transcript1" chr12 HAVANA exon 100 500 . + . ID="ccc";Parent="bbb";common_tag="gene1" chr12 HAVANA CDS 100 500 . + 0 ID="ddd";Parent="bbb";common_tag="gene1" chr12 HAVANA transcript 100 600 . + . ID="bbb2";Parent="gene1";common_tag="gene1";gene_info="gene1";transcript_id="transcript2" chr12 HAVANA exon 100 600 . + . ID="ccc2";Parent="bbb2";common_tag="gene1" chr12 HAVANA CDS 100 600 . + 0 ID="ddd2";Parent="bbb2";common_tag="gene1" -chr12 AGAT gene 1000 5000 . + . ID="gene2";common_tag="gene2";gene_info="gene2";transcript_id="transcript3" +chr12 AGAT gene 1000 5000 . + . ID="gene2";common_tag="gene2";gene_info="gene2" chr12 HAVANA transcript 1000 5000 . + . ID="yyy";Parent="gene2";common_tag="gene2";gene_info="gene2";transcript_id="transcript3" chr12 HAVANA exon 1000 5000 . + . ID="zzz";Parent="yyy";common_tag="gene2" chr12 HAVANA CDS 1000 5000 . + 0 ID="www";Parent="yyy";common_tag="gene2" diff --git a/t/level_missing/out/testA_output3.gff b/t/level_missing/out/testA_output3.gff index 0e833056..7ca0e40d 100644 --- a/t/level_missing/out/testA_output3.gff +++ b/t/level_missing/out/testA_output3.gff @@ -1,12 +1,12 @@ ##gff-version 3 -chr12 AGAT gene 100 600 . + . ID="gene1";common_tag="gene1";gene_info="gene1";transcript_id="transcript1" +chr12 AGAT gene 100 600 . + . ID="gene1";common_tag="gene1";gene_info="gene1" chr12 HAVANA transcript 100 500 . + . ID="bbb";Parent="gene1";common_tag="gene1";gene_info="gene1";transcript_id="transcript1" chr12 HAVANA exon 100 500 . + . ID="ccc";Parent="bbb";common_tag="gene1" chr12 HAVANA CDS 100 500 . + 0 ID="ddd";Parent="bbb";common_tag="gene1" chr12 HAVANA transcript 100 600 . + . ID="bbb2";Parent="gene1";common_tag="gene1";gene_info="gene1";transcript_id="transcript2" chr12 HAVANA exon 100 600 . + . ID="ccc2";Parent="bbb2";common_tag="gene1" chr12 HAVANA CDS 100 600 . + 0 ID="ddd2";Parent="bbb2";common_tag="gene1" -chr12 AGAT gene 1000 5000 . + . ID="gene2";common_tag="gene2";gene_info="gene2";transcript_id="transcript3" +chr12 AGAT gene 1000 5000 . + . ID="gene2";common_tag="gene2";gene_info="gene2" chr12 HAVANA transcript 1000 5000 . + . ID="yyy";Parent="gene2";common_tag="gene2";gene_info="gene2";transcript_id="transcript3" chr12 HAVANA exon 1000 5000 . + . ID="zzz";Parent="yyy";common_tag="gene2" chr12 HAVANA CDS 1000 5000 . + 0 ID="www";Parent="yyy";common_tag="gene2" diff --git a/t/level_missing/out/testA_output4.gff b/t/level_missing/out/testA_output4.gff index 13129e2e..c05c7064 100644 --- a/t/level_missing/out/testA_output4.gff +++ b/t/level_missing/out/testA_output4.gff @@ -1,13 +1,13 @@ ##gff-version 3 -chr12 AGAT gene 100 500 . + . ID="transcript1";common_tag="gene1";gene_info="gene1";transcript_id="transcript1" +chr12 AGAT gene 100 500 . + . ID="transcript1";common_tag="gene1";gene_info="gene1" chr12 HAVANA transcript 100 500 . + . ID="bbb";Parent="transcript1";common_tag="gene1";gene_info="gene1";transcript_id="transcript1" chr12 HAVANA exon 100 500 . + . ID="ccc";Parent="bbb";common_tag="gene1" chr12 HAVANA CDS 100 500 . + 0 ID="ddd";Parent="bbb";common_tag="gene1" -chr12 AGAT gene 100 600 . + . ID="transcript2";common_tag="gene1";gene_info="gene1";transcript_id="transcript2" +chr12 AGAT gene 100 600 . + . ID="transcript2";common_tag="gene1";gene_info="gene1" chr12 HAVANA transcript 100 600 . + . ID="bbb2";Parent="transcript2";common_tag="gene1";gene_info="gene1";transcript_id="transcript2" chr12 HAVANA exon 100 600 . + . ID="ccc2";Parent="bbb2";common_tag="gene1" chr12 HAVANA CDS 100 600 . + 0 ID="ddd2";Parent="bbb2";common_tag="gene1" -chr12 AGAT gene 1000 5000 . + . ID="transcript3";common_tag="gene2";gene_info="gene2";transcript_id="transcript3" +chr12 AGAT gene 1000 5000 . + . ID="transcript3";common_tag="gene2";gene_info="gene2" chr12 HAVANA transcript 1000 5000 . + . ID="yyy";Parent="transcript3";common_tag="gene2";gene_info="gene2";transcript_id="transcript3" chr12 HAVANA exon 1000 5000 . + . ID="zzz";Parent="yyy";common_tag="gene2" chr12 HAVANA CDS 1000 5000 . + 0 ID="www";Parent="yyy";common_tag="gene2" diff --git a/t/scripts_output/out/agat_sp_kraken_assess_liftover_1.gff b/t/scripts_output/out/agat_sp_kraken_assess_liftover_1.gff index b5aef8d9..74ef5a0c 100644 --- a/t/scripts_output/out/agat_sp_kraken_assess_liftover_1.gff +++ b/t/scripts_output/out/agat_sp_kraken_assess_liftover_1.gff @@ -1,5 +1,5 @@ ##gff-version 3 -scaffold_823 AGAT gene 105596 142103 . + . ID=PP1S9_164V6;kraken_mapped=partial;gene_id=PP1S9_164V6;protein_id=PP1S9_164V6.1;transcript_id=PP1S9_164V6.1 +scaffold_823 AGAT gene 105596 142103 . + . ID=PP1S9_164V6;kraken_mapped=partial;gene_id=PP1S9_164V6;protein_id=PP1S9_164V6.1 scaffold_823 AGAT mRNA 105596 129298 . + . ID=transcript:PP1S9_164V6.2;Parent=PP1S9_164V6;kraken_mapped=100.00%25;description=Mapped at 100.00%25;gene_id=PP1S9_164V6;protein_id=PP1S9_164V6.2;transcript_id=PP1S9_164V6.2 scaffold_823 jgi exon 105596 105903 . + . ID=PP1S9_164V6.2.exon1;Parent=transcript:PP1S9_164V6.2;kraken_mapped=TRUE;Name=PP1S9_164V6.2.exon1;constitutive=0;ensembl_end_phase=2;ensembl_phase=0;exon_id=PP1S9_164V6.2.exon1;gene_id=PP1S9_164V6;rank=1;transcript_id=PP1S9_164V6.2 scaffold_823 jgi exon 106102 106323 . + . ID=PP1S9_164V6.2.exon2;Parent=transcript:PP1S9_164V6.2;kraken_mapped=TRUE;Name=PP1S9_164V6.2.exon2;constitutive=0;ensembl_end_phase=2;ensembl_phase=2;exon_id=PP1S9_164V6.2.exon2;gene_id=PP1S9_164V6;rank=2;transcript_id=PP1S9_164V6.2 From abca51cefc613bb94015e703757cff450f122b7b Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Tue, 17 Sep 2024 14:59:07 +0200 Subject: [PATCH 4/4] fix test --- t/gff_syntax/out/30_correct_output.gff | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/gff_syntax/out/30_correct_output.gff b/t/gff_syntax/out/30_correct_output.gff index 95eab3af..d503d2a5 100644 --- a/t/gff_syntax/out/30_correct_output.gff +++ b/t/gff_syntax/out/30_correct_output.gff @@ -5,6 +5,6 @@ 000000F|arrow StringTie pseudogene 1147 3802 1000 - . ID=P12103_109_S2_L002.2;gene_id=P12103_109_S2_L002.2;transcript_id=P12103_109_S2_L002.2.1 000000F|arrow AGAT RNA 1147 3802 . - . ID=P12103_109_S2_L002.2.1;Parent=P12103_109_S2_L002.2;gene_id=P12103_109_S2_L002.2;transcript_id=P12103_109_S2_L002.2.1 000000F|arrow StringTie exon 1147 3802 1000 - . ID=agat-exon-2;Parent=P12103_109_S2_L002.2.1;gene_id=P12103_109_S2_L002.2;transcript_id=P12103_109_S2_L002.2.1 -000000F|arrow AGAT gene 6990 7449 . - . ID=P12103_109_S2_L002.4;gene_id=P12103_109_S2_L002.4;transcript_id=P12103_109_S2_L002.4.1 +000000F|arrow AGAT gene 6990 7449 . - . ID=P12103_109_S2_L002.4;gene_id=P12103_109_S2_L002.4 000000F|arrow StringTie transcript 6990 7449 1000 - . ID=P12103_109_S2_L002.4.1;Parent=P12103_109_S2_L002.4;gene_id=P12103_109_S2_L002.4;transcript_id=P12103_109_S2_L002.4.1 000000F|arrow StringTie exon 6990 7449 1000 - . ID=agat-exon-3;Parent=P12103_109_S2_L002.4.1;gene_id=P12103_109_S2_L002.4;transcript_id=P12103_109_S2_L002.4.1