Hangul Processing - Part 2 (Korean Letters) - by Eun Bae Kim
 

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
  
use strict;
use warnings;
use Encode;

# This script was saved in ANSI
# cp944(ms949) = Basic encoding method in Windows. Extension of EUC-KR.
# When you save a text in ANSI, the text will be encoded in cp944.
binmode STDIN,  ":encoding(cp949)";    # To set STDIN  to cp949
binmode STDOUT, ":encoding(cp949)";    # To set STDOUT to cp949

print "Enter any Hangul Text: ";
my $sInput = <STDIN>;
print $sInput."\n";                    # print a string onto STDOUT in cp949


print "--------------------------------------------\n";
open(my $Hout1, ">a_ANSI.txt") or die;
my $sText1 = "ANSI-°¡³ª´Ù¶ó¸¶¹Ù»ç";         # Text in ANSI
print $Hout1 $sText1."\n";             # Saved in ANSI
print "ANSI               : ".$sText1." --> Saved\n";          # print a string onto STDOUT in cp949
print "ANSI --> dec-cp949 : ".decode("cp949", $sText1)."\n";   # Good Result
close($Hout1);
<STDIN>;

print "--------------------------------------------\n";
open(my $Hout2, ">a_UTF8.txt") or die;
my $sText2 = "UTF8-°¡³ª´Ù¶ó¸¶¹Ù»ç";         # Text in ANSI
print $Hout2 encode("UTF-8", decode("cp949",$sText2))."\n";                  # Saved in UTF-8
print "ANSI                             : ".$sText2."\n";
print "ANSI --> dec-cp949 --> enc-UTF-8 : ".encode("UTF-8", decode("cp949",$sText2))." --> Saved\n";
print "ANSI --> dec-cp949               : ".decode("cp949", $sText2)."\n";   # Good Result
close($Hout2);
<STDIN>;


print "--------------------------------------------\n";
open(my $H1, "a_ANSI.txt") or die;      # a.txt contains Korean Letters in ANSI.
while (my $s = <$H1>) {
	my $a = encode("cp949", $s); 
	my $b = encode("UTF-8", $s); 
	my $c = decode("cp949", $s); 
	my $d = decode("UTF-8", $s); 
	print "0. ANSI              : ".$s."\n----------------\n";
	print "1. ANSI --> enc-cp949: ".$a."\n----------------\n";
	print "2. ANSI --> enc-UTF-8: ".$b."\n----------------\n";
	print "3. ANSI --> dec-cp949: ".$c."\n----------------\n";  # Good Result
	print "4. ANSI --> dec-UTF-8: ".$d."\n----------------\n";
}
close($H1);
<STDIN>;

print "--------------------------------------------\n";
open(my $H2, "a_UTF8.txt") or die;      # a.txt contains Korean Letters in UTF-8.
while (my $s = <$H2>) {
	my $a = encode("cp949", $s); 
	my $b = encode("UTF-8", $s); 
	my $c = decode("cp949", $s); 
	my $d = decode("UTF-8", $s); 
	print "0. UTF-8              : ".$s."\n----------------\n";
	print "1. UTF-8 --> enc-cp949: ".$a."\n----------------\n";
	print "2. UTF-8 --> enc-UTF-8: ".$b."\n----------------\n";
	print "3. UTF-8 --> dec-cp949: ".$c."\n----------------\n";
	print "4. UTF-8 --> dec-UTF-8: ".$d."\n----------------\n";  # Good Result
}
close($H2);
<STDIN>;

print "--------------------------------------------\n";
my $s = "°í¾çÀÌabcd";         # Text in ANSI
my $a = encode("cp949", $s);
my $b = encode("UTF-8", $s);
my $c = decode("cp949", $s);
my $d = decode("UTF-8", $s);
print "0. UTF-8              : ".$s."\n----------------\n";
print "1. UTF-8 --> enc-cp949: ".$a."\n----------------\n";
print "2. UTF-8 --> enc-UTF-8: ".$b."\n----------------\n";
print "3. UTF-8 --> dec-cp949: ".$c."\n----------------\n";  # Good Result
print "4. UTF-8 --> dec-UTF-8: ".$d."\n----------------\n";
print "---------------------------------\n";
print length($c)."\n";
for (my $i=0 ; $i<length($c) ; $i++) {
	print substr($c, $i, 1)."\n";

}



# http://gypark.pe.kr/wiki/Perl/%ED%95%9C%EA%B8%80