#!/usr/bin/perl -w

#This program is used to convert sequence files in fasta format
#into GenBank submission EST file format.
#At command line type genbank.pl and you will be prompted to enter information
#Author: Shiaoman Chao


print "Enter filename:";
chomp ($fastafile = <>);

print "Enter Library name:";
chomp($library = <STDIN>);

print "Enter Citation name:";
chomp($citation = <STDIN>);

print "Enter SEQ primer description:";
chomp($primer = <STDIN>);

open (SOURCE, $fastafile);
@lines = <SOURCE>;
close (SOURCE);

$estfile = $fastafile . ".genbank";

$file = join ('', @lines);
@reads = split (/\>/, $file);
shift (@reads);

open (ESTFILE, ">$estfile");
foreach $read (@reads) {
    @estname  = split (/ABI/, $read);
    @estnumber = split (/\s+/, $estname[0]);
    @new = split (/\_/, $estnumber[0]);
    print ESTFILE "TYPE: EST \n";
    print ESTFILE "STATUS: New \n";
    print ESTFILE "CONT_NAME: Olin Anderson \n";
    print ESTFILE "CITATION:\n$citation \n";
    print ESTFILE "LIBRARY: $library \n";
    @new1 = split (//, $new[2]);
    $new2 = $new1[0] . $new1[1] . $new1[2] . $new1[3] . $new1[4];
    $new3 = $new1[0] . $new1[1] . $new1[2] . $new1[3];
    $newname = 'WHE' . $new[0] . '_' . $new[1] . '_' . $new2;
    print ESTFILE "EST#: $newname \n";
    $newname1 = 'WHE' . $new[0] . '_' . $new[1] . '_' . $new3;
    chop($newname1);
    print ESTFILE "CLONE: $newname1 \n";
    print ESTFILE "SEQ_PRIMER: $primer \n";
    print ESTFILE "DNA_TYPE: cDNA \n";
    print ESTFILE "PUBLIC: \n";
    print ESTFILE "COMMENT:\nSequences have been trimmed to remove vector sequence and low quality 
sequence with phred score less than 20 \n";
    print ESTFILE "SEQUENCE: $estname[1]";
    print ESTFILE "|| \n\n";
}

close (ESTFILE);

