Ovo sam negde iskopao, ne sećam se više gde, a nije ni isprobano.
Dve funkcije za konverziju 1 karaktera između utf8 i uft16.
Code:
function utf82utf16($utf8) {
// Konvertuje UTF8 u UTF16
if(function_exists('mb_convert_encoding')) {
return mb_convert_encoding($utf8, 'UTF-16', 'UTF-8');
}
// sporija zamena za mb_convert_encoding
switch(strlen($utf8)) {
case 1:
// this case should never be reached, because we are in ASCII range
// see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
return $utf8;
case 2:
// return a UTF-16 character from a 2-byte UTF-8 char
// see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
return chr(0x07 & (ord($utf8{0}) >> 2))
. chr((0xC0 & (ord($utf8{0}) << 6))
| (0x3F & ord($utf8{1})));
case 3:
// return a UTF-16 character from a 3-byte UTF-8 char
// see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
return chr((0xF0 & (ord($utf8{0}) << 4))
| (0x0F & (ord($utf8{1}) >> 2)))
. chr((0xC0 & (ord($utf8{1}) << 6))
| (0x7F & ord($utf8{2})));
}
// ignoring UTF-32 for now, sorry
return '';
}
function utf162utf8($utf16) {
// konvertuje UTF16 u UTF8
if(function_exists('mb_convert_encoding')) {
return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
}
// sporija zamena za mb_convert_encoding
$bytes = (ord($utf16{0}) << 8) | ord($utf16{1});
switch(true) {
case ((0x7F & $bytes) == $bytes):
// this case should never be reached, because we are in ASCII range
// see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
return chr(0x7F & $bytes);
case (0x07FF & $bytes) == $bytes:
// return a 2-byte UTF-8 character
// see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
return chr(0xC0 | (($bytes >> 6) & 0x1F))
. chr(0x80 | ($bytes & 0x3F));
case (0xFFFF & $bytes) == $bytes:
// return a 3-byte UTF-8 character
// see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
return chr(0xE0 | (($bytes >> 12) & 0x0F))
. chr(0x80 | (($bytes >> 6) & 0x3F))
. chr(0x80 | ($bytes & 0x3F));
}
// ignoring UTF-32 for now, sorry
return '';
}