#!/usr/local/bin/perl -w # ################################################################################ # Requirements # ################################################################################ # require 5.002; # # External References # use diagnostics; use strict; use Carp; use FileHandle; use File::Path; use File::Basename; use Getopt::Std; # use vars qw( $opt_f $opt_d $opt_h ); # ################################################################################ # Package revision string # ################################################################################ # my $prg = basename($0); my $rev = "1.00"; my $debug = 0; # ################################################################################ # Main program global variables # ################################################################################ # my @methylnam = (); my @methylseq = (); # my $FILE = FileHandle->new(); # my %trans = ( "aa"=>"a","at"=>"?","ag"=>"?","ac"=>"?","an"=>"n","a-"=>"-", "ta"=>"?","tt"=>"t","tg"=>"?","tc"=>"?","tn"=>"n","t-"=>"-", "ga"=>"?","gt"=>"?","gg"=>"g","gc"=>"?","gn"=>"n","g-"=>"-", "ca"=>"?","ct"=>"c","cg"=>"?","cc"=>"C","cn"=>"n","c-"=>"-", "na"=>"a","nt"=>"t","ng"=>"g","nc"=>"c","nn"=>"n","n-"=>"-", ); # my $index ; my $index2 ; my $pos ; my $seq ; my $name ; my $file ; my $base ; my $baseo ; my $basei ; my $pat ; my $pat1 ; my $pat2 ; my $cover ; my $meth_y ; my $meth_n ; my %freq ; my %stat ; # # MAN page # my $MAN = <file.log] OPTIONS -f string File name -d Switchs to debug mode -h Prints this message EXIT STATUS The following perl-like exit stati are returned: 0 Error 1 Successful completion AUTHOR(S) Ruben Schattevoy (schattev\@imb-jena.de) CHANGE LOG When Who What Jul 6th, 1988 RS Initial version MAN # # Command line parsing # getopts('f:dh') || croak($MAN); # # Check command line arguments # croak($MAN) if ( $opt_h || ! defined($opt_f) || $opt_f eq "" ); # # Read input file and split into fasta sequences on the fly # open($FILE,$opt_f) || croak(sprintf("Cannot open file \"%s\"",$opt_f)); while ( <$FILE> ) { chomp; tr /A-Z/a-z/; if ( /^>(\S+)/ ) { push(@methylnam,$1); push(@methylseq,""); } else { $methylseq[-1] .= $_; } } close($FILE) || croak(sprintf("Cannot close open file \"%s\"",$opt_f)); # # Error checking # if ( $#methylnam < 1 ) { croak(sprintf("Cannot find methylation data in input file")); } # # Reset frequency counter array # foreach $base ( qw(c C) ) { foreach $index2 ( 0..length($methylseq[0])-1 ) { $freq{$base}[$index2] = 0; } } # foreach $index ( 1..$#methylnam ) { # # Error checking # unless ( length($methylseq[0]) == length($methylseq[$index]) ) { croak(sprintf("Inconsistent length in entry #%d of methylation data", $index+1)); } # $name = $methylnam[$index]; $seq = ""; %stat = (); # printf("\nProcessing file \"%s\"\n",$name); # foreach $pos ( 0..length($methylseq[0])-1 ) { # $baseo = substr($methylseq[ 0],$pos,1); $basei = substr($methylseq[$index],$pos,1); # $base = $baseo . $basei; # $base = exists($trans{$base}) ? $trans{$base} : undef; # if ( defined($base) ) { unless ( $base eq "?" ) { $basei = $base; } else { printf("Unexpected nucleotide combination \"%s:%s\" in input ". "sequence at position \"%d\"\n",$baseo,$basei,$pos+1); } } else { printf("Unexpected nucleotide information \"%s:%s\" in input ". "sequence at position \"%d\"\n",$baseo,$basei,$pos+1); exit; } # $seq .= $basei; # $stat{$baseo,$basei}++; } # # Accumulate "c" and "C" frequency counters # foreach $index2 ( 0..length($methylseq[$index])-1 ) { $base = substr($seq,$index2,1); $freq{$base}[$index2]++ if ( exists($freq{$base}) ); } # # Write sequence file in "atgcC"-format # $file = $name . ".seq1"; open($FILE,">" . $file) || croak(sprintf("Cannot open file \"%s\"",$file)); printf $FILE (">%s\n",$file); printf $FILE ("%s\n",join("\n",grep($_,split(/(.{50})/,$seq)))); close($FILE) || croak(sprintf("Cannot close open file \"%s\"",$file)); # printf("\n"); printf("%-4s : %4s %4s (Length = %d)\n","Base","C","[Cc]",length($seq)); foreach $pat1 ( qw( a t g [Cc] n ) ) { printf("%-4s :",$pat1); foreach $pat2 ( qw( C [Cc] ) ) { $pat = $pat2 . $pat1; printf(" %4d",$seq =~ s/($pat)/$1/g); } printf("\n"); } printf("\n"); # # Write sequence file in "IO-? "-format # $seq =~ s/cg/Ig/g; $seq =~ s/Cg/Og/g; $seq =~ s/n/X/g; $seq =~ s/\-/ /g; $seq =~ s/[atgc]/-/g; # $file = $name . ".seq2"; open($FILE,">" . $file) || croak(sprintf("Cannot open file \"%s\"",$file)); printf $FILE (">%s\n",$file); printf $FILE ("%s\n",join("\n",grep($_,split(/(.{50})/,$seq)))); close($FILE) || croak(sprintf("Cannot close open file \"%s\"",$file)); # printf("%4s %4s %4s %4s %4s %4s %4s\n",qw( a t g c C n - )); # foreach $baseo ( qw( a t g c n ) ) { foreach $basei ( qw( a t g c C n - ) ) { printf("%4d ",exists($stat{$baseo,$basei}) ? $stat{$baseo,$basei} : 0); } printf("%4s\n",$baseo); } } # # Read input file and split into fasta sequences on the fly # $file = $opt_f . ".tab"; # open($FILE,">".$file) || croak(sprintf("Cannot open file \"%s\"",$file)); foreach $index2 ( 0..length($methylseq[0])-1 ) { $base = substr($methylseq[0],$index2,1); printf $FILE ("%4d %s",$index2+1,$base); if ( exists($freq{$base}) ) { $meth_y = $freq{$base}[$index2]; $base =~ tr/a-z/A-Z/; $meth_n = $freq{$base}[$index2]; # # counting number of analysed cytosines ("coverage") # $cover = $meth_y+$meth_n; if ( $meth_y || $meth_n ) { printf $FILE (" %3d",100.0*$meth_n/($meth_y+$meth_n)); printf $FILE (" %3d",$cover); } } printf $FILE ("\n"); } close($FILE) || croak(sprintf("Cannot close open file \"%s\"",$file));