参考在线字符编码转换工具.qr9.netmain.cpp/** * https://qr9.net/string-encoding */#includestdio.h#includestdint.h#includestring.h#includewindows.h/** * UTF-8 到 UTF-16 (无 BOM, 小端) 转换函数 * * 返回值: 实际写入 utf16_buf 的字节数 (不含空终止符) * 注意: 不支持 4 字节 UTF-8 序列 (U10000 以上)遇到时跳过 */uint32_tutf8_to_utf16_le(constuint8_t*utf8_str,uint8_t*utf16_buf,uint32_tbuf_size){uint32_tutf8_index0;uint32_tutf16_index0;while(utf8_str[utf8_index]!\0){uint32_tcodepoint0;uint8_tbyteutf8_str[utf8_index];// 1. 解析 UTF-8 码点if((byte0x80)0){// 单字节: 0xxxxxxx (U0000 ~ U007F)codepointbyte;utf8_index1;}elseif((byte0xE0)0xC0){// 双字节: 110xxxxx 10xxxxxx (U0080 ~ U07FF)uint8_tb1utf8_str[utf8_index1];if((b10xC0)!0x80){// 续字节非法含提前遇到 \0 的情况跳过首字节utf8_index1;continue;}codepoint((byte0x1F)6)|(b10x3F);utf8_index2;}elseif((byte0xF0)0xE0){// 三字节: 1110xxxx 10xxxxxx 10xxxxxx (U0800 ~ UFFFF, 含常用中文)uint8_tb1utf8_str[utf8_index1];uint8_tb2utf8_str[utf8_index2];if((b10xC0)!0x80||(b20xC0)!0x80){utf8_index1;continue;}codepoint((byte0x0F)12)|((b10x3F)6)|(b20x3F);utf8_index3;}else{// 4 字节序列或非法字节跳过utf8_index1;continue;}// 2. 过滤代理码点 (UD800 ~ UDFFF)这些码点在 UTF-16 中非法if(codepoint0xD800codepoint0xDFFF){continue;}// 3. 输出 UTF-16 LE (此处 codepoint 必定 0xFFFF)if(utf16_index2buf_size){break;}utf16_buf[utf16_index](uint8_t)(codepoint0xFF);// 低字节utf16_buf[utf16_index](uint8_t)((codepoint8)0xFF);// 高字节}// 4. 写入 UTF-16 空终止符 (占 2 字节)if(utf16_index2buf_size){utf16_buf[utf16_index]0x00;utf16_buf[utf16_index1]0x00;}returnutf16_index;// 返回写入字节数不含终止符}/** * 单个 Unicode 码点 → UTF-16 LE * * param codepoint Unicode 码点 (U0000 ~ UFFFF, 不支持代理对范围) * param utf16_buf 输出缓冲区至少 4 字节 (2字节数据 2字节终止符) * param buf_size 缓冲区大小 * return 写入字节数 (不含终止符)码点非法或缓冲区不足返回 0 */uint32_tcodepoint_to_utf16_le(uint32_tcodepoint,uint8_t*utf16_buf,uint32_tbuf_size){// 拒绝代理码点和 BMP 以上的码点if(codepoint0xD800codepoint0xDFFF)return0;if(codepoint0xFFFF)return0;if(buf_size4)return0;utf16_buf[0](uint8_t)(codepoint0xFF);utf16_buf[1](uint8_t)((codepoint8)0xFF);utf16_buf[2]0x00;utf16_buf[3]0x00;return2;}// 辅助函数以十六进制形式打印内存数据voidprint_hex(constchar*label,constuint8_t*data,uint32_tlen){printf(%s (Len: %d): ,label,len);for(uint32_ti0;ilen;i){printf(%02X ,data[i]);}printf(\n);}intmain(){SetConsoleOutputCP(CP_UTF8);uint8_tutf16_buffer[64];// --- 测试1: 字符串转换 ---constchar*test_str你好;memset(utf16_buffer,0,sizeof(utf16_buffer));print_hex(UTF-8 ,(constuint8_t*)test_str,strlen(test_str));uint32_tlenutf8_to_utf16_le((constuint8_t*)test_str,utf16_buffer,sizeof(utf16_buffer));print_hex(UTF-16 LE ,utf16_buffer,len);printf(\n);// --- 测试2: 单码点转换 ---// U4F60 你, U0041 A, U00E9 éuint32_tcodepoints[]{0x4F60,0x0041,0x00E9};constchar*labels[]{U4F60 (你),U0041 (A),U00E9 (é)};for(inti0;i3;i){memset(utf16_buffer,0,sizeof(utf16_buffer));lencodepoint_to_utf16_le(codepoints[i],utf16_buffer,sizeof(utf16_buffer));printf(Codepoint %-16s - ,labels[i]);print_hex(UTF-16 LE,utf16_buffer,len);}return0;}输出C:\Users\PC\CLionProjects\untitled28\cmake-build-debug\untitled28.exe UTF-8(Len:6): E4 BD A0 E5 A5 BD UTF-16 LE(Len:4):604F 7D59Codepoint U4F60(你)-UTF-16 LE(Len:2):604F Codepoint U0041(A)-UTF-16 LE(Len:2):4100 Codepoint U00E9(é)-UTF-16 LE(Len:2): E9 00 Process finished withexitcode0