티스토리 뷰
[Perl] BeeConverter (Convert HTML Entity to Unicode Character)
Hide Code 2008. 11. 9. 03:57
use warnings;
print "+----------------------+\r\n";
print "| BeeConverter 1.0.0.3 |\r\n";
print "| by Wing4Bee |\r\n";
print "| http://wing4bee.com |\r\n";
print "+----------------------+\r\n";
my ($source, $result) = @ARGV;
open(FILE_GET, "<:utf8", $source);
open(FILE_PUT, ">:utf8", $result);
my %c2dHash = (
quot => 34, amp => 38, apos => 39, lt => 60, gt => 62, nbsp => 160, iexcl => 161, cent => 162, pound => 163, curren => 164, yen => 165, brvbar => 166, sect => 167, uml => 168, copy => 169,
ordf => 170, laquo => 171, not => 172, shy => 173, reg => 174, macr => 175, deg => 176, plusmn => 177, sup2 => 178, sup3 => 179, acute => 180, micro => 181, para => 182, middot => 183, cedil => 184,
sup1 => 185, ordm => 186, raquo => 187, frac14 => 188, frac12 => 189, frac34 => 190, iquest => 191, Agrave => 192, Aacute => 193, Acirc => 194, Atilde => 195, Auml => 196, Aring => 197, AElig => 198, Ccedil => 199,
Egrave => 200, Eacute => 201, Ecirc => 202, Euml => 203, Igrave => 204, Iacute => 205, Icirc => 206, Iuml => 207, ETH => 208, Ntilde => 209, Ograve => 210, Oacute => 211, Ocirc => 212, Otilde => 213, Ouml => 214,
times => 215, Oslash => 216, Ugrave => 217, Uacute => 218, Ucirc => 219, Uuml => 220, Yacute => 221, THORN => 222, szlig => 223, agrave => 224, aacute => 225, acirc => 226, atilde => 227, auml => 228, aring => 229,
aelig => 230, ccedil => 231, egrave => 232, eacute => 233, ecirc => 234, euml => 235, igrave => 236, iacute => 237, icirc => 238, iuml => 239, eth => 240, ntilde => 241, ograve => 242, oacute => 243, ocirc => 244,
otilde => 245, ouml => 246, divide => 247, oslash => 248, ugrave => 249, uacute => 250, ucirc => 251, uuml => 252, yacute => 253, thorn => 254, yuml => 255, Amacr => 256, amacr => 257, Abreve => 258, abreve => 259,
Aogon => 260, aogon => 261, Cacute => 262, cacute => 263, Ccirc => 264, ccirc => 265, Cabove => 266, cabove => 267, Ccaron => 268, ccaron => 269, Dcaron => 270, dcaron => 271, Dstrok => 272, dstrok => 273, Emacr => 274,
emacr => 275, Ebreve => 276, ebreve => 277, Eabove => 278, eabove => 279, Eogon => 280, eogon => 281, Ecaron => 282, ecaron => 283, Gcirc => 284, gcirc => 285, Gbreve => 286, gbreve => 287, Gabove => 288, gabove => 289,
Gcedil => 290, gcedil => 291, Hcirc => 292, hcirc => 293, Hstrok => 294, hstrok => 295, Itilde => 296, itilde => 297, Imacr => 298, imacr => 299, Ibreve => 300, ibreve => 301, Iogon => 302, iogon => 303, Iabove => 304,
inodot => 305, IJlig => 306, ijlig => 307, Jcirc => 308, jcirc => 309, Kcedil => 310, kcedil => 311, kgreen => 312, Lacute => 313, lacute => 314, Lcedil => 315, lcedil => 316, Lcaron => 317, lcaron => 318, Lmidot => 319,
lmidot => 320, Lstrok => 321, lstrok => 322, Nacute => 323, nacute => 324, Ncedil => 325, ncedil => 326, Ncaron => 327, ncaron => 328, napos => 329, ENG => 330, eng => 331, Omacr => 332, omacr => 333, Obreve => 334,
obreve => 335, Odblac => 336, odblac => 337, OElig => 338, oelig => 339, Racute => 340, racute => 341, Rcedil => 342, rcedil => 343, Rcaron => 344, rcaron => 345, Sacute => 346, sacute => 347, Scirc => 348, scirc => 349,
Scedil => 350, scedil => 351, Scaron => 352, scaron => 353, Tcedil => 354, tcedil => 355, Tcaron => 356, tcaron => 357, Tstrok => 358, tstrok => 359, Utilde => 360, utilde => 361, Umacr => 362, umacr => 363, Ubreve => 364,
ubreve => 365, Uring => 366, uring => 367, Udblac => 368, udblac => 369, Uogon => 370, uogon => 371, Wcirc => 372, wcirc => 373, Ycirc => 374, ycirc => 375, Yuml => 376, Zacute => 377, zacute => 378, Zabove => 379,
zabove => 380, Zcaron => 381, zcaron => 382, fnof => 402, circ => 710, tilde => 732, Alpha => 913, Beta => 914, Gamma => 915, Delta => 916, Epsilon => 917, Zeta => 918, Eta => 919, Theta => 920, Iota => 921,
Kappa => 922, Lambda => 923, Mu => 924, Nu => 925, Xi => 926, Omicron => 927, Pi => 928, Rho => 929, Sigma => 931, Tau => 932, Upsilon => 933, Phi => 934, Chi => 935, Psi => 936, Omega => 937,
alpha => 945, beta => 946, gamma => 947, delta => 948, epsilon => 949, zeta => 950, eta => 951, theta => 952, iota => 953, kappa => 954, lambda => 955, mu => 956, nu => 957, xi => 958, omicron => 959,
pi => 960, rho => 961, sigmaf => 962, sigma => 963, tau => 964, upsilon => 965, phi => 966, chi => 967, psi => 968, omega => 969, thetasym => 977, upsih => 978, piv => 982, ensp => 8194, emsp => 8195,
thinsp => 8201, zwnj => 8204, zwj => 8205, lrm => 8206, rlm => 8207, ndash => 8211, mdash => 8212, lsquo => 8216, rsquo => 8217, sbquo => 8218, ldquo => 8220, rdquo => 8221, bdquo => 8222, dagger => 8224, Dagger => 8225,
bull => 8226, hellip => 8230, permil => 8240, prime => 8242, Prime => 8243, lsaquo => 8249, rsaquo => 8250, oline => 8254, frasl => 8260, euro => 8364, image => 8465, weierp => 8472, real => 8476, trade => 8482, alefsym => 8501,
larr => 8592, uarr => 8593, rarr => 8594, darr => 8595, harr => 8596, crarr => 8629, lArr => 8656, uArr => 8657, rArr => 8658, dArr => 8659, hArr => 8660, forall => 8704, part => 8706, exist => 8707, empty => 8709,
nabla => 8711, isin => 8712, notin => 8713, ni => 8715, prod => 8719, sum => 8721, minus => 8722, lowast => 8727, radic => 8730, prop => 8733, infin => 8734, ang => 8736, and => 8743, or => 8744, cap => 8745,
cup => 8746, int => 8747, there4 => 8756, sim => 8764, cong => 8773, asymp => 8776, ne => 8800, equiv => 8801, le => 8804, ge => 8805, sub => 8834, sup => 8835, nsub => 8836, sube => 8838, supe => 8839,
oplus => 8853, otimes => 8855, perp => 8869, sdot => 8901, lceil => 8968, rceil => 8969, lfloor => 8970, rfloor => 8971, lang => 9001, rang => 9002, loz => 9674, spades => 9824, clubs => 9827, hearts => 9829, diams => 9830,
);
while (<FILE_GET>) {
s/&#(\d+?);/sprintf("%c", $1)/eg;
s/&#[xX]([a-fA-F0-9]+?);/sprintf("%c", hex($1))/eg;
s/&([a-zA-Z0-9]+?);/sprintf("%c", $c2dHash{$1})/eg;
print FILE_PUT;
}
close FILE_PUT;
close FILE_GET;
'Code > Study' 카테고리의 다른 글
[PerlNET] Regex (Perl C# Mix) (0) | 2008.11.09 |
---|---|
[Perl] Cocoa (pl pm sample) (0) | 2008.11.09 |
[PerlNET] CIty (Mix Perl & C#) (0) | 2008.11.09 |
[C/C++ vs C#] static (1) (0) | 2008.07.16 |
[C/C++ vs C#] sizeof (0) | 2008.07.16 |
- Total
- Today
- Yesterday
- DotNetMagic
- tagREADYSTATE
- download.com
- c#
- Phalanger
- autohotkey
- registry
- Automation
- iText
- AxWebBrowser
- java
- Rollback Rx
- Service pack
- Sample Code
- Microsoft
- ScreenHunter
- WinAutomation
- jre
- .net framework
- Regular Expressions
- 애드센스감추기
- 애드센스숨기기
- 스크린캡쳐
- windows
- iTextSharp
- AdSense숨기기
- READYSTATE_COMPLETE
- AdSense감추기
- 유틸리티
일 | 월 | 화 | 수 | 목 | 금 | 토 |
---|---|---|---|---|---|---|
1 | 2 | 3 | ||||
4 | 5 | 6 | 7 | 8 | 9 | 10 |
11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 | 19 | 20 | 21 | 22 | 23 | 24 |
25 | 26 | 27 | 28 | 29 | 30 | 31 |