utf-8编码转换成gb2312
(编辑:jimmy 日期: 2025/1/11 浏览:3 次 )
[code]<script>
function chinesefromutf8url(strutf8)
{
var bstr = "";
var noffset = 0;
// processing point on strutf8
if( strutf8 == "" )
return "";
strutf8 = strutf8.tolowercase();
noffset = strutf8.indexof("%e");
if( noffset == -1 )
return strutf8;
while( noffset != -1 )
{
bstr += strutf8.substr(0, noffset);
strutf8 = strutf8.substr(noffset, strutf8.length - noffset);
if( strutf8 == "" ¦ ¦ strutf8.length < 9 ) // bad string
return bstr;
bstr += utf8codetochinesechar(strutf8.substr(0, 9));
strutf8 = strutf8.substr(9, strutf8.length - 9);
noffset = strutf8.indexof("%e");
}
return bstr + strutf8;
}
function unicodefromutf8(strutf8)
{
var bstr = "";
var ntotalchars = strutf8.length; // total chars to be processed.
var noffset = 0; // processing point on strutf8
var nremainingbytes = ntotalchars; // how many bytes left to be converted
var noutputposition = 0;
var icode, icode1, icode2; // the value of the unicode.
while (noffset < ntotalchars)
{
icode = strutf8.charcodeat(noffset);
if ((icode & 0x80) == 0) // 1 byte.
{
if ( nremainingbytes < 1 ) // not enough data
break;
bstr += string.fromcharcode(icode & 0x7f);
noffset ++;
nremainingbytes -= 1;
}
else if ((icode & 0xe0) == 0xc0) // 2 bytes
{
icode1 = strutf8.charcodeat(noffset + 1);
if ( nremainingbytes < 2 ¦ ¦ // not enough data
(icode1 & 0xc0) != 0x80 ) // invalid pattern
{
break;
}
bstr += string.fromcharcode(((icode & 0x3f) << 6) ¦ ( icode1 & 0x3f));
noffset += 2;
nremainingbytes -= 2;
}
else if ((icode & 0xf0) == 0xe0) // 3 bytes
{
icode1 = strutf8.charcodeat(noffset + 1);
icode2 = strutf8.charcodeat(noffset + 2);
if ( nremainingbytes < 3 ¦ ¦ // not enough data
(icode1 & 0xc0) != 0x80 ¦ ¦ // invalid pattern
(icode2 & 0xc0) != 0x80 )
{
break;
}
bstr += string.fromcharcode(((icode & 0x0f) << 12) ¦
((icode1 & 0x3f) << 6) ¦
(icode2 & 0x3f));
noffset += 3;
nremainingbytes -= 3;
}
else // 4 or more bytes -- unsupported
break;
}
if (nremainingbytes != 0)
{
// bad utf8 string.
return "";
}
return bstr;
}
function utf8codetochinesechar(strutf8)
{
var icode, icode1, icode2;
icode = parseint("0x" + strutf8.substr(1, 2));
icode1 = parseint("0x" + strutf8.substr(4, 2));
icode2 = parseint("0x" + strutf8.substr(7, 2));
return string.fromcharcode(((icode & 0x0f) << 12) ¦
((icode1 & 0x3f) << 6) ¦
(icode2 & 0x3f));
}
alert(chinesefromutf8url("%e6%b5%8b%e8%af%95"))
</script>[code]
function chinesefromutf8url(strutf8)
{
var bstr = "";
var noffset = 0;
// processing point on strutf8
if( strutf8 == "" )
return "";
strutf8 = strutf8.tolowercase();
noffset = strutf8.indexof("%e");
if( noffset == -1 )
return strutf8;
while( noffset != -1 )
{
bstr += strutf8.substr(0, noffset);
strutf8 = strutf8.substr(noffset, strutf8.length - noffset);
if( strutf8 == "" ¦ ¦ strutf8.length < 9 ) // bad string
return bstr;
bstr += utf8codetochinesechar(strutf8.substr(0, 9));
strutf8 = strutf8.substr(9, strutf8.length - 9);
noffset = strutf8.indexof("%e");
}
return bstr + strutf8;
}
function unicodefromutf8(strutf8)
{
var bstr = "";
var ntotalchars = strutf8.length; // total chars to be processed.
var noffset = 0; // processing point on strutf8
var nremainingbytes = ntotalchars; // how many bytes left to be converted
var noutputposition = 0;
var icode, icode1, icode2; // the value of the unicode.
while (noffset < ntotalchars)
{
icode = strutf8.charcodeat(noffset);
if ((icode & 0x80) == 0) // 1 byte.
{
if ( nremainingbytes < 1 ) // not enough data
break;
bstr += string.fromcharcode(icode & 0x7f);
noffset ++;
nremainingbytes -= 1;
}
else if ((icode & 0xe0) == 0xc0) // 2 bytes
{
icode1 = strutf8.charcodeat(noffset + 1);
if ( nremainingbytes < 2 ¦ ¦ // not enough data
(icode1 & 0xc0) != 0x80 ) // invalid pattern
{
break;
}
bstr += string.fromcharcode(((icode & 0x3f) << 6) ¦ ( icode1 & 0x3f));
noffset += 2;
nremainingbytes -= 2;
}
else if ((icode & 0xf0) == 0xe0) // 3 bytes
{
icode1 = strutf8.charcodeat(noffset + 1);
icode2 = strutf8.charcodeat(noffset + 2);
if ( nremainingbytes < 3 ¦ ¦ // not enough data
(icode1 & 0xc0) != 0x80 ¦ ¦ // invalid pattern
(icode2 & 0xc0) != 0x80 )
{
break;
}
bstr += string.fromcharcode(((icode & 0x0f) << 12) ¦
((icode1 & 0x3f) << 6) ¦
(icode2 & 0x3f));
noffset += 3;
nremainingbytes -= 3;
}
else // 4 or more bytes -- unsupported
break;
}
if (nremainingbytes != 0)
{
// bad utf8 string.
return "";
}
return bstr;
}
function utf8codetochinesechar(strutf8)
{
var icode, icode1, icode2;
icode = parseint("0x" + strutf8.substr(1, 2));
icode1 = parseint("0x" + strutf8.substr(4, 2));
icode2 = parseint("0x" + strutf8.substr(7, 2));
return string.fromcharcode(((icode & 0x0f) << 12) ¦
((icode1 & 0x3f) << 6) ¦
(icode2 & 0x3f));
}
alert(chinesefromutf8url("%e6%b5%8b%e8%af%95"))
</script>[code]
下一篇:UTF-8编码第1/2页