#!/usr/local/public/bin/perl $VERSION = 0.1; ########################################################### # GGsnp_update.pl # # Read a singlet file and all contig files from a directory # Make a file containing all sequence in fasta format # # Laurence Amilhat le 21 Février 2003 # UMR INRA UBP-ASP # Clermont-ferrand France ########################################################### use strict; use vars qw($opt_h); use Getopt::Std; getopts('h'); ########################################################### # Recupere les noms de fichier et de repertoire ########################################################### print "Enter singlet file:\n"; my $FILE_S=; chomp $FILE_S; print "Enter contig directory:\n"; my $DIRECT=; chomp $DIRECT; open (OUT, "> GGupdate.tfa"); ########################################################### # Main ########################################################### # convert not ATGC caracters by N in the singlet file open (SINGLET, "$FILE_S") || die "cannot find $FILE_S\n"; my $id; my $sequence; my $seq_fasta; while () { chomp; if (/^>/) { $id=$_; print OUT "$id\n"; } else { $sequence=convert($_); print OUT "$sequence\n"; } } close (SINGLET); # read each file in the contig directory open (CONTIGDIR,"ls -R $DIRECT |")|| die "cannot find $DIRECT\n"; my $dir; while () { if (/^(\S+):$/) { $dir=$1; } elsif (/^(\S+)$/) { my $contig_file=$1; open (CONTIG, "$dir/$contig_file") || die "cannot find $contig_file\n"; undef $/; print " processing $dir/$contig_file \n"; my $contig=; $/="\n"; if ($contig =~ /(^>contig.*\n)(^(?!>).*\n)*/gm) { my $name=$1; my $seq=convert($2); $/="\n"; chomp ($name,$seq); my $seq_fasta= fasta(name=>$name,seq=>$seq); print OUT $seq_fasta; } close(CONTIG); } } close (OUT); ########################################################## # Subroutines ########################################################## # convert non ATGC letters to N sub convert { my $ligne=shift @_; $ligne =~ tr/[a-z]/[A-Z]/; $ligne =~ tr/[ATGCN\n]/N/c; return $ligne; } # print a sequence in fasta format sub fasta { my %param=( seq=>"", name=>"", @_); my $fasta=$param{name}."\n"; my $line; my $i=0; my $COL=60; while($line=substr($param{seq},$i,$COL)) { $i+=$COL; $fasta .= $line."\n"; } return $fasta; } # Help sub help { my $message = shift; print "\n$message\n"; print <