vc++宽字符转换代码.pdf-道客多多

资源描述

1、宽字符转换:宽字符转换的相关代码疯狂代码 http:/CrazyC :http:/CrazyC . C中和w互转第中思路方法:WideToMutilByte和MutilByteToWide WideToMutilByte(const w char *szBuf = charnBufSize; WideCharToMultiByte(GetACP, 0, _src.c_str,-1, szBuf, nBufSize, 0, FALSE); strRet(szBuf); delete szBuf; szBuf = NULL; strRet; w MutilByteToWide(const /

2、为 wsbuf 分配内存 BufSize 个字节 wchar_t *wsBuf = wchar_tnBufSize; / 转化为 unicode WideString MultiByteToWideChar(GetACP,0,_src.c_str,-1,wsBuf,nBufSize); w wstrRet(wsBuf); delete wsBuf; wsBuf = NULL; wstrRet; 代码: # # using std; ws2s(const w / curLocale = “C“; locale(LC_ALL, “chs“); const wchar_t* _Source = ws

3、.c_str; size_t _Dsize = 2 * ws.size + 1; char *_Dest = char_Dsize; mem(_Dest,0,_Dsize); wcstombs(_Dest,_Source,_Dsize); result = _Dest; delete _Dest; locale(LC_ALL, curLocale.c_str); result; w s2ws(const const char* _Source = s.c_str; size_t _Dsize = s.size + 1; wchar_t *_Dest = wchar_t_Dsize; wmem(

4、_Dest, 0, _Dsize); mbstowcs(_Dest,_Source,_Dsize); w result = _Dest; delete _Dest; locale(LC_ALL, “C“); result; 2 utf8.utf16.utf32相互转化可以参考Unicode.org 上有ConvertUTF.c和ConvertUTF.h (下载地址:http:/www.unicode.org/Public/PROGRAMS/CVTUTF/) 实现文件ConvertUTF.c:(.h省) /*/* * Copyright 2001-2004 Unicode, Inc. * *

5、Disclaimer * * This source code is provided as is by Unicode, Inc. No claims are * made as to fitness for any particular purpose. No warranties of any * kind are expressed or implied. The recipient agrees to determine * applicability of information provided. If this file has been * purchased _disibl

6、edevent= const UTF32 halfBase = 0x0010000UL; const UTF32 halfMask = 0x3FFUL; # UNI_SUR_HIGH_START (UTF32)0xD800 # UNI_SUR_HIGH_END (UTF32)0xDBFF # UNI_SUR_LOW_START (UTF32)0xDC00 # UNI_SUR_LOW_END (UTF32)0xDFFF # false 0 # true 1 /*/* - */ ConversionResult ConvertUTF32toUTF16 ( const UTF32* sourceSt

7、art, const UTF32* sourceEnd, UTF16* targetStart, UTF16* targetEnd, ConversionFlags flags) ConversionResult result = conversionOK; const UTF32* source = *sourceStart; UTF16* target = *targetStart; while (source = targetEnd) result = targetExhausted; ; ch = *source; (ch = UNI_SUR_HIGH_START *target =

8、UNI_REPLACEMENT_CHAR; /*/* target is a character in range 0xFFFF - 0x10FFFF. */ (target + 1 = targetEnd) -source; /*/* Back up source poer! */ result = targetExhausted; ; ch -= halfBase; *target = (UTF16)(ch halfSht) + UNI_SUR_HIGH_START); *target = (UTF16)(ch *sourceStart = source; *targetStart = t

9、arget; result; /*/* - */ ConversionResult ConvertUTF16toUTF32 ( const UTF16* sourceStart, const UTF16* sourceEnd, UTF32* targetStart, UTF32* targetEnd, ConversionFlags flags) ConversionResult result = conversionOK; const UTF16* source = *sourceStart; UTF32* target = *targetStart; UTF32 ch, ch2; whil

10、e (source = UNI_SUR_HIGH_START /*/* Back up source poer! */ result = targetExhausted; ; *target = ch; *sourceStart = source; *targetStart = target; #def CVTUTF_DEBUG (result sourceIllegal) fprf(stderr, “ConvertUTF16toUTF32 illegal seq 0x%04x,%04xn“, ch, ch2); fflush(stderr); #end result; /*/* - */ /

11、*/* * Index o the table below with the first of a UTF-8 sequence to * get the number of trailing s that are supposed to follow it. * Note that *legal* UTF-8 values cant have 4 or 5-s. The table is * left as-is for anyone who may want to do such conversion, which was * allowed in earlier algorithms.

12、*/ const char trailingBytesForUTF8256 = 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0

13、,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 ; /*/* * Magic values subtracted

14、from a buffer value during UTF8 conversion. * This table contains as many values as there might be trailing s * in a UTF-8 sequence. */ const UTF32 offsFromUTF86 = 0x00000000UL, 0x00003080UL, 0x000E2080UL, 0x03C82080UL, 0xFA082080UL, 0x82082080UL ; /*/* * _disibledevent= /*/* - */ /*/* The erface co

15、nverts a whole buffer to avoid function-call overhead. * Constants have been gathered. Loops const UTF16* source = *sourceStart; UTF8* target = *targetStart; while (source = UNI_SUR_HIGH_START /*/* Back up source poer! */ target -= sToWrite; result = targetExhausted; ; switch (sToWrite) /*/* note: e

16、verything falls through. */ 4: *-target = (UTF8)(ch | Mark) ch = 6; 3: *-target = (UTF8)(ch | Mark) ch = 6; 2: *-target = (UTF8)(ch | Mark) ch = 6; 1: *-target = (UTF8)(ch | firstByteMarksToWrite); target sToWrite; *sourceStart = source; *targetStart = target; result; /*/* - */ /*/* * Utility routin

17、e to tell whether a sequence of s is legal UTF-8. * This must be called with the length pre-determined by the first . * If not calling this from ConvertUTF8to*, then the length can be by: * length = trailingBytesForUTF8*source+1; * and the sequence is illegal right away there arent that many s * ava

18、ilable. * If presented with a length 4, this s false. The Unicode * definition of UTF-8 goes up to 4- sequences. */ Boolean isLegalUTF8(const UTF8 *source, length) UTF8 a; const UTF8 *srcptr = source+length; switch (length) default: false; /*/* Everything falls through when “true“ */ 4: (a = (*-srcp

19、tr) 0xBF) false; 3: (a = (*-srcptr) 0xBF) false; 2: (a = (*-srcptr) 0xBF) false; switch (*source) /*/* no fall-through in this inner switch */ 0xE0: (a 0x9F) false; ; 0xF0: (a 0x8F) false; ; default: (a = 0x80 true; /*/* - */ /*/* * Exported function to whether a UTF-8 sequence is legal or not. * Th

20、is is not used here; its just exported. */ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) length = trailingBytesForUTF8*source+1; (source+length sourceEnd) false; isLegalUTF8(source, length); /*/* - */ ConversionResult ConvertUTF8toUTF16 ( const UTF8* sourceStart, const UTF8*

21、 sourceEnd, UTF16* targetStart, UTF16* targetEnd, ConversionFlags flags) ConversionResult result = conversionOK; const UTF8* source = *sourceStart; UTF16* target = *targetStart; while (source = sourceEnd) result = sourceExhausted; ; /*/* Do this check whether lenient or strict */ (! isLegalUTF8(sour

22、ce, extraBytesToRead+1) result = sourceIllegal; ; /*/* * The s all fall through. See “Note A“ below. */ switch (extraBytesToRead) 5: ch *source; ch = targetEnd) source -= (extraBytesToRead+1); /*/* Back up source poer! */ result = targetExhausted; ; (ch = UNI_SUR_HIGH_START source -= (extraBytesToRe

23、ad+1); /*/* to the start */ ; /*/* Bail out; shouldnt continue */ *target = UNI_REPLACEMENT_CHAR; /*/* target is a character in range 0xFFFF - 0x10FFFF. */ (target + 1 = targetEnd) source -= (extraBytesToRead+1); /*/* Back up source poer! */ result = targetExhausted; ; ch -= halfBase; *target = (UTF

24、16)(ch halfSht) + UNI_SUR_HIGH_START); *target = (UTF16)(ch *sourceStart = source; *targetStart = target; result; /*/* - */ ConversionResult ConvertUTF32toUTF8 ( const UTF32* sourceStart, const UTF32* sourceEnd, UTF8* targetStart, UTF8* targetEnd, ConversionFlags flags) ConversionResult result = con

25、versionOK; const UTF32* source = *sourceStart; UTF8* target = *targetStart; while (source = UNI_SUR_HIGH_START /*/* Back up source poer! */ target -= sToWrite; result = targetExhausted; ; switch (sToWrite) /*/* note: everything falls through. */ 4: *-target = (UTF8)(ch | Mark) ch = 6; 3: *-target =

26、(UTF8)(ch | Mark) ch = 6; 2: *-target = (UTF8)(ch | Mark) ch = 6; 1: *-target = (UTF8) (ch | firstByteMarksToWrite); target sToWrite; *sourceStart = source; *targetStart = target; result; /*/* - */ ConversionResult ConvertUTF8toUTF32 ( const UTF8* sourceStart, const UTF8* sourceEnd, UTF32* targetSta

27、rt, UTF32* targetEnd, ConversionFlags flags) ConversionResult result = conversionOK; const UTF8* source = *sourceStart; UTF32* target = *targetStart; while (source = sourceEnd) result = sourceExhausted; ; /*/* Do this check whether lenient or strict */ (! isLegalUTF8(source, extraBytesToRead+1) resu

28、lt = sourceIllegal; ; /*/* * The s all fall through. See “Note A“ below. */ switch (extraBytesToRead) 5: ch *source; ch = targetEnd) source -= (extraBytesToRead+1); /*/* Back up the source poer! */ result = targetExhausted; ; (ch 0x10FFFF) is illegal. */ (ch = UNI_SUR_HIGH_START *target = UNI_REPLAC

29、EMENT_CHAR; *sourceStart = source; *targetStart = target; result; /*/* - Note A. The fall-through switches in UTF-8 reading code save a temp variable, some decrements do ch *source; -tmpBytesToRead; (tmpBytesToRead) ch 0); In UTF-8 writing code, the switches _disibledevent=(constchar*)(Marshal:Strin

30、gToHGlobalAnsi(s).ToPoer; os = chars; Marshal:FreeHGlobal(IntPtr(void*)chars); void MarshalString ( String s, w const wchar_t* chars = (const wchar_t*)(Marshal:StringToHGlobalUni(s).ToPoer; os = chars; Marshal:FreeHGlobal(IntPtr(void*)chars); a = “test“; w b = L“test2“; String c = gc String(“abcd“);

31、 cout # # using ; String str = “Hello“; / Pin memory so GC cant move it while native function is called pin_ptr wch = PtrChars(str); prf_s(“%Sn“, wch); / Conversion to char* : / Can just convert wchar_t* to char* using _disibledevent=size_t sizeInBytes = (str-Length + 1) *2); errno_t err = 0; char *ch = (char *)malloc(sizeInBytes); err = wcstombs_s( (err != 0) prf_s(“wcstombs_s failed!n“); prf_s(“%sn“, ch); 2009-8-16 12:40:46 疯狂代码 http:/CrazyC

展开阅读全文