Perl: DNA Utility (Reverse Complement, GC Content, Translation) - by Eun Bae Kim (08/03/2018)
 

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
  
use strict;

my $sSeq = "ATGCGTACGTGCGTAGATCGCGCTGATTGA";

# GC Content
my @aResult = funcGcContent($sSeq);		# Get an array result from a function
print "--------------------------------------------\n";
print "Seq    : ".$sSeq."\n";
print "G Cnt  : ".$aResult[0]."\n";
print "C Cnt  : ".$aResult[1]."\n";
print "Length : ".$aResult[2]."\n";
print "GC(%)  : ".$aResult[3]."\n";

sub funcGcContent {
	my $sDnaSeq = uc(shift);            # or use "shift" function. 1st element of Array "@_"

	my $iGCnt = $sDnaSeq =~ tr/G/G/;    # Counting the number of "G"
	my $iCCnt = $sDnaSeq =~ tr/C/C/;    # Counting the number of "C"

	my $iGcContent = ($iGCnt+$iCCnt)*100 / length($sDnaSeq);
	$iGcContent = sprintf("%.2f", $iGcContent);

	return ($iGCnt, $iCCnt, length($sDnaSeq), $iGcContent);     # Return an Array
}





# Reverse Complement
print "--------------------------------------------\n";
print "Original: ".$sSeq."\n";
print "Reverse : ".reverse($sSeq)."\n";
print "Rev.Comp: ".funcRevComp($sSeq)."\n";

sub funcRevComp {
	my $sSeq = reverse uc($_[0]);
	$sSeq =~ tr/ATGCN/TACGN/;                     # A -> T, T ->A, G -> C, C -> G, N -> N
	return $sSeq;	
}






# Translation (DNA to Amino Acids - Protein Sequence)
print "--------------------------------------------\n";
print "DNA Sequence: ".$sSeq."\n";
print "A.A Sequence: ".funcTranslate($sSeq)."\n";

sub funcTranslate{
	my $sSeq = shift;
	my $sAa  = "";

	for (my $i=0; $i<length($sSeq); $i = $i + 3) {        # $i = 0, 3, 6, 9, ...
		my $sCurCodon = substr($sSeq, $i, 3);
		$sAa = $sAa." ".funcCodonToAa($sCurCodon)." ";
	}

	return $sAa;
}


sub funcCodonToAa{
	my $sCodon = uc(shift);

	my %AminoAcid_by_Codon = (
		'TCA'=>'S',	'TCC'=>'S',	'TCG'=>'S',	'TCT'=>'S',
		'TTC'=>'F',	'TTT'=>'F',	'TTA'=>'L',	'TTG'=>'L',
		'TAC'=>'Y',	'TAT'=>'Y',	'TAA'=>'-',	'TAG'=>'-',
		'TGC'=>'C',	'TGT'=>'C',	'TGA'=>'-',	'TGG'=>'W',
		'CTA'=>'L',	'CTC'=>'L',	'CTG'=>'L',	'CTT'=>'L',
		'CCA'=>'P',	'CCC'=>'P',	'CCG'=>'P',	'CCT'=>'P',
		'CAC'=>'H',	'CAT'=>'H',	'CAA'=>'Q',	'CAG'=>'Q',
		'CGA'=>'R',	'CGC'=>'R',	'CGG'=>'R',	'CGT'=>'R',
		'ATA'=>'I',	'ATC'=>'I',	'ATT'=>'I',	'ATG'=>'M',
		'ACA'=>'T',	'ACC'=>'T',	'ACG'=>'T',	'ACT'=>'T',
		'AAC'=>'N',	'AAT'=>'N',	'AAA'=>'K',	'AAG'=>'K',
		'AGC'=>'S',	'AGT'=>'S',	'AGA'=>'R',	'AGG'=>'R',
		'GTA'=>'V',	'GTC'=>'V',	'GTG'=>'V',	'GTT'=>'V',
		'GCA'=>'A',	'GCC'=>'A',	'GCG'=>'A',	'GCT'=>'A',
		'GAC'=>'D',	'GAT'=>'D',	'GAA'=>'E',	'GAG'=>'E',
		'GGA'=>'G',	'GGC'=>'G',	'GGG'=>'G',	'GGT'=>'G'
		);

	if(exists $AminoAcid_by_Codon{$sCodon}) {
		return $AminoAcid_by_Codon{$sCodon};
	} else	{
		return "X";
	}
}


# For more information, refer to the following link
# https://www.tutorialspoint.com/perl
# http://www.perl.or.kr/perl_iyagi/regexp