To: vim-dev@vim.org Subject: Patch 6.1.454 (extra) Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 8bit ------------ Patch 6.1.454 (extra) Problem: Win32: pasting Russian text in Vim with 'enc' set to cp1251 results in utf-8 bytes. (Perelyubskiy) Conversion from DBCS to UCS2 does not work when 'encoding' is not the active codepage. Solution: Introduce enc_codepage and use it for conversion to 'encoding'. Fixes some problems with using another codepage than the one used in the system. (Glenn Maynard) Use MultiByteToWideChar() and WideCharToMultiByte() instead of iconv(). Should do most needed conversions without iconv.dll. Files: src/globals.h, src/gui_w32.c, src/mbyte.c, src/os_mswin.c, src/proto/mbyte.pro, src/proto/os_mswin.pro, src/structs.h *** ../vim61.453/src/globals.h Sat Mar 8 20:33:32 2003 --- src/globals.h Sun Apr 6 23:02:39 2003 *************** *** 576,584 **** DBCS encoding */ EXTERN int enc_unicode INIT(= 0); /* 2: UCS-2 or UTF-16, 4: UCS-4 */ EXTERN int enc_utf8 INIT(= FALSE); /* UTF-8 encoded Unicode */ ! # ifdef FEAT_GUI_W32 ! EXTERN int is_funky_dbcs INIT(= FALSE); /* if DBCS encoding, but not ! current codepage */ # endif EXTERN int has_mbyte INIT(= 0); /* any multi-byte encoding */ --- 576,583 ---- DBCS encoding */ EXTERN int enc_unicode INIT(= 0); /* 2: UCS-2 or UTF-16, 4: UCS-4 */ EXTERN int enc_utf8 INIT(= FALSE); /* UTF-8 encoded Unicode */ ! # ifdef WIN3264 ! EXTERN int enc_codepage INIT(= 0); /* codepage nr of 'encoding' */ # endif EXTERN int has_mbyte INIT(= 0); /* any multi-byte encoding */ *************** *** 593,602 **** EXTERN vimconv_T input_conv; /* type of input conversion */ EXTERN vimconv_T output_conv; /* type of output conversion */ - #ifdef FEAT_MBYTE_IME - EXTERN vimconv_T ime_conv; /* ucs-2 -> encoding */ - EXTERN vimconv_T ime_conv_cp; /* codepage -> ucs-2 */ - #endif /* * Function pointers, used to quickly get to the right function. Each has * three possible values: latin_ (8-bit), utfc_ or utf_ (utf-8) and dbcs_ --- 592,597 ---- *** ../vim61.453/src/gui_w32.c Tue Apr 8 18:05:22 2003 --- src/gui_w32.c Thu Apr 10 22:20:28 2003 *************** *** 228,234 **** #ifdef FEAT_MBYTE_IME static LRESULT _OnImeComposition(HWND hwnd, WPARAM dbcs, LPARAM param); ! static char_u *GetResultStr(HWND hwnd, int GCS); #endif #if defined(FEAT_MBYTE_IME) && defined(DYNAMIC_IME) # ifdef NOIME --- 228,234 ---- #ifdef FEAT_MBYTE_IME static LRESULT _OnImeComposition(HWND hwnd, WPARAM dbcs, LPARAM param); ! static char_u *GetResultStr(HWND hwnd, int GCS, int *lenp); #endif #if defined(FEAT_MBYTE_IME) && defined(DYNAMIC_IME) # ifdef NOIME *************** *** 1216,1222 **** int OldBkMode = GetBkMode(s_hdc); SetBkMode(s_hdc, OPAQUE); ! gui_outstr_nowrap(s, len, GUI_MON_TRS_CURSOR, (guicolor_T)0, (guicolor_T)0, 0); SetBkMode(s_hdc, OldBkMode); } --- 1216,1223 ---- int OldBkMode = GetBkMode(s_hdc); SetBkMode(s_hdc, OPAQUE); ! gui_outstr_nowrap(s, len, GUI_MON_TRS_CURSOR, ! (guicolor_T)0, (guicolor_T)0, 0); SetBkMode(s_hdc, OldBkMode); } *************** *** 1276,1288 **** _OnImeComposition(HWND hwnd, WPARAM dbcs, LPARAM param) { char_u *ret; if ((param & GCS_RESULTSTR) == 0) /* Composition unfinished. */ return 0; ! if (ret = GetResultStr(hwnd, GCS_RESULTSTR)) { ! add_to_input_buf_csi(ret, strlen(ret)); vim_free(ret); return 1; } --- 1277,1290 ---- _OnImeComposition(HWND hwnd, WPARAM dbcs, LPARAM param) { char_u *ret; + int len; if ((param & GCS_RESULTSTR) == 0) /* Composition unfinished. */ return 0; ! if (ret = GetResultStr(hwnd, GCS_RESULTSTR, &len)) { ! add_to_input_buf_csi(ret, len); vim_free(ret); return 1; } *************** *** 1292,1306 **** /* * get the current composition string, in UCS-2; *lenp is the number of ! * Unicode characters */ ! static unsigned short * GetCompositionString_inUCS2(HIMC hIMC, DWORD GCS, int *lenp) { LONG ret; ! unsigned short *wbuf = NULL; char_u *buf; - int len; if (!pImmGetContext) return NULL; /* no imm32.dll */ --- 1294,1307 ---- /* * get the current composition string, in UCS-2; *lenp is the number of ! * *lenp is the number of Unicode characters. */ ! static short_u * GetCompositionString_inUCS2(HIMC hIMC, DWORD GCS, int *lenp) { LONG ret; ! LPWSTR wbuf = NULL; char_u *buf; if (!pImmGetContext) return NULL; /* no imm32.dll */ *************** *** 1312,1322 **** if (ret > 0) { ! wbuf = (unsigned short *) alloc(ret * sizeof(unsigned short)); ! if(!wbuf) return NULL; ! pImmGetCompositionStringW(hIMC, GCS, wbuf, ret); ! *lenp = ret / sizeof(unsigned short); /* char -> wchar */ ! return wbuf; } /* ret < 0; we got an error, so try the ANSI version. This'll work --- 1313,1325 ---- if (ret > 0) { ! wbuf = (LPWSTR)alloc(ret * sizeof(WCHAR)); ! if (wbuf != NULL) ! { ! pImmGetCompositionStringW(hIMC, GCS, wbuf, ret); ! *lenp = ret; ! } ! return (short_u *)wbuf; } /* ret < 0; we got an error, so try the ANSI version. This'll work *************** *** 1332,1343 **** pImmGetCompositionStringA(hIMC, GCS, buf, ret); /* convert from codepage to UCS-2 */ ! len = ret; ! wbuf = (unsigned short *)string_convert(&ime_conv_cp, buf, &len); vim_free(buf); - *lenp = len / sizeof(unsigned short); /* char_u -> wchar */ ! return wbuf; } /* --- 1335,1344 ---- pImmGetCompositionStringA(hIMC, GCS, buf, ret); /* convert from codepage to UCS-2 */ ! MultiByteToWideChar_alloc(GetACP(), 0, buf, ret, &wbuf, lenp); vim_free(buf); ! return (short_u *)wbuf; } /* *************** *** 1347,1370 **** * get complete composition string */ static char_u * ! GetResultStr(HWND hwnd, int GCS) { - DWORD dwBufLen; /* Storage for len. of composition str. */ - int buflen; HIMC hIMC; /* Input context handle. */ ! unsigned short *buf = NULL; ! char *convbuf = NULL; if (!pImmGetContext || !(hIMC = pImmGetContext(hwnd))) return NULL; /* Reads in the composition string. */ ! buf = GetCompositionString_inUCS2(hIMC, GCS, &dwBufLen); if (buf == NULL) return NULL; ! buflen = dwBufLen * 2; /* length in words -> length in bytes */ ! convbuf = string_convert(&ime_conv, (unsigned char *)buf, &buflen); pImmReleaseContext(hwnd, hIMC); vim_free(buf); return convbuf; --- 1348,1368 ---- * get complete composition string */ static char_u * ! GetResultStr(HWND hwnd, int GCS, int *lenp) { HIMC hIMC; /* Input context handle. */ ! short_u *buf = NULL; ! char_u *convbuf = NULL; if (!pImmGetContext || !(hIMC = pImmGetContext(hwnd))) return NULL; /* Reads in the composition string. */ ! buf = GetCompositionString_inUCS2(hIMC, GCS, lenp); if (buf == NULL) return NULL; ! convbuf = ucs2_to_enc(buf, lenp); pImmReleaseContext(hwnd, hIMC); vim_free(buf); return convbuf; *************** *** 1700,1706 **** /* Check if the Unicode buffer exists and is big enough. Create it * with the same lengt as the multi-byte string, the number of wide * characters is always equal or smaller. */ ! if ((enc_utf8 || is_funky_dbcs) && (unicodebuf == NULL || len > unibuflen)) { vim_free(unicodebuf); unicodebuf = (WCHAR *)alloc(len * sizeof(WCHAR)); --- 1698,1705 ---- /* Check if the Unicode buffer exists and is big enough. Create it * with the same lengt as the multi-byte string, the number of wide * characters is always equal or smaller. */ ! if ((enc_utf8 || (enc_codepage != 0 && (int)GetACP() != enc_codepage)) ! && (unicodebuf == NULL || len > unibuflen)) { vim_free(unicodebuf); unicodebuf = (WCHAR *)alloc(len * sizeof(WCHAR)); *************** *** 1742,1754 **** foptions, pcliprect, unicodebuf, clen, unicodepdy); len = cells; /* used for underlining */ } ! else if (is_funky_dbcs) { ! /* If we want to display DBCS, and the current CP is not the DBCS * one, we need to go via Unicode. */ if (unicodebuf != NULL) { ! if ((len = MultiByteToWideChar(enc_dbcs, MB_PRECOMPOSED, (char *)text, len, (LPWSTR)unicodebuf, unibuflen))) --- 1741,1753 ---- foptions, pcliprect, unicodebuf, clen, unicodepdy); len = cells; /* used for underlining */ } ! else if (enc_codepage != 0 && (int)GetACP() != enc_codepage) { ! /* If we want to display codepage data, and the current CP is not the ANSI * one, we need to go via Unicode. */ if (unicodebuf != NULL) { ! if ((len = MultiByteToWideChar(enc_codepage, MB_PRECOMPOSED, (char *)text, len, (LPWSTR)unicodebuf, unibuflen))) *** ../vim61.453/src/mbyte.c Sun Apr 6 14:50:20 2003 --- src/mbyte.c Wed Apr 9 21:04:22 2003 *************** *** 129,135 **** enc_canon_table[] = { #define IDX_LATIN_1 0 ! {"latin1", ENC_8BIT + ENC_LATIN1, 0}, #define IDX_ISO_2 1 {"iso-8859-2", ENC_8BIT, 0}, #define IDX_ISO_3 2 --- 129,135 ---- enc_canon_table[] = { #define IDX_LATIN_1 0 ! {"latin1", ENC_8BIT + ENC_LATIN1, 1252}, #define IDX_ISO_2 1 {"iso-8859-2", ENC_8BIT, 0}, #define IDX_ISO_3 2 *************** *** 143,149 **** #define IDX_ISO_7 6 {"iso-8859-7", ENC_8BIT, 0}, #define IDX_CP1255 7 ! {"cp1255", ENC_8BIT, 0}, /* close to iso-8859-8 */ #define IDX_ISO_8 8 {"iso-8859-8", ENC_8BIT, 0}, #define IDX_ISO_9 9 --- 143,149 ---- #define IDX_ISO_7 6 {"iso-8859-7", ENC_8BIT, 0}, #define IDX_CP1255 7 ! {"cp1255", ENC_8BIT, 1255}, /* close to iso-8859-8 */ #define IDX_ISO_8 8 {"iso-8859-8", ENC_8BIT, 0}, #define IDX_ISO_9 9 *************** *** 453,466 **** enc_utf8 = FALSE; } enc_dbcs = enc_dbcs_new; ! #ifdef FEAT_GUI_W32 ! /* Check for codepage which is not the current one. */ ! is_funky_dbcs = (enc_dbcs != 0 && ((int)GetACP() != enc_dbcs)); #endif - has_mbyte = (enc_dbcs != 0 || enc_utf8); - /* * Set the function pointers. */ --- 453,464 ---- enc_utf8 = FALSE; } enc_dbcs = enc_dbcs_new; + has_mbyte = (enc_dbcs != 0 || enc_utf8); ! #ifdef WIN3264 ! enc_codepage = encname2codepage(p_enc); #endif /* * Set the function pointers. */ *************** *** 570,584 **** if (enc_utf8 && !option_was_set((char_u *)"fencs")) set_string_option_direct((char_u *)"fencs", -1, (char_u *)"ucs-bom,utf-8,latin1", OPT_FREE); - #ifdef FEAT_MBYTE_IME - ime_conv.vc_type = CONV_NONE; - ime_conv_cp.vc_type = CONV_NONE; - convert_setup(&ime_conv, (char_u *)"ucs-2", p_enc); - ime_conv_cp.vc_type = CONV_DBCS_TO_UCS2; - ime_conv_cp.vc_dbcs = GetACP(); - ime_conv_cp.vc_factor = 2; /* we don't really know anything about the codepage */ - #endif - #ifdef HAVE_BIND_TEXTDOMAIN_CODESET /* GNU gettext 0.10.37 supports this feature: set the codeset used for * translated messages independently from the current locale. */ --- 568,573 ---- *************** *** 2528,2533 **** --- 2578,2611 ---- return enc_canonize((char_u *)buf); } + #if defined(WIN3264) || defined(PROTO) + /* + * Convert an encoding name to an MS-Windows codepage. + * Returns zero if no codepage can be figured out. + */ + int + encname2codepage(name) + char_u *name; + { + int cp; + char_u *p = name; + int idx; + + if (STRNCMP(p, "8bit-", 5) == 0) + p += 5; + else if (STRNCMP(p_enc, "2byte-", 6) == 0) + p += 6; + + if (p[0] == 'c' && p[1] == 'p') + cp = atoi(p + 2); + else if ((idx = enc_canon_search(p)) >= 0) + cp = enc_canon_table[idx].codepage; + if (IsValidCodePage(cp)) + return cp; + return 0; + } + #endif + # if defined(USE_ICONV) || defined(PROTO) static char_u *iconv_string __ARGS((iconv_t fd, char_u *str, int slen)); *************** *** 4081,4088 **** * vcp->vc_type must have been initialized to CONV_NONE. * Note: cannot be used for conversion from/to ucs-2 and ucs-4 (will use utf-8 * instead). */ ! void convert_setup(vcp, from, to) vimconv_T *vcp; char_u *from; --- 4159,4167 ---- * vcp->vc_type must have been initialized to CONV_NONE. * Note: cannot be used for conversion from/to ucs-2 and ucs-4 (will use utf-8 * instead). + * Return FAIL when conversion is not supported, OK otherwise. */ ! int convert_setup(vcp, from, to) vimconv_T *vcp; char_u *from; *************** *** 4102,4108 **** /* No conversion when one of the names is empty or they are equal. */ if (from == NULL || *from == NUL || to == NULL || *to == NUL || STRCMP(from, to) == 0) ! return; from_prop = enc_canon_props(from); to_prop = enc_canon_props(to); --- 4181,4187 ---- /* No conversion when one of the names is empty or they are equal. */ if (from == NULL || *from == NUL || to == NULL || *to == NUL || STRCMP(from, to) == 0) ! return OK; from_prop = enc_canon_props(from); to_prop = enc_canon_props(to); *************** *** 4118,4138 **** vcp->vc_type = CONV_TO_LATIN1; } #ifdef WIN32 ! /* Win32-specific UCS-2 <-> DBCS conversion, for the IME, ! * so we don't need iconv ... */ ! else if ((from_prop & ENC_UNICODE) ! && (from_prop & ENC_2BYTE) && (to_prop & ENC_DBCS)) { ! vcp->vc_type = CONV_UCS2_TO_DBCS; vcp->vc_factor = 2; /* up to twice as long */ ! vcp->vc_dbcs = atoi(to + 2); ! } ! else if ((from_prop & ENC_UNICODE) ! && (from_prop & ENC_2BYTE) && (to_prop & ENC_UNICODE)) ! { ! vcp->vc_type = CONV_UCS2_TO_DBCS; ! vcp->vc_factor = 2; /* up to twice as long */ ! vcp->vc_dbcs = CP_UTF8; } #endif # ifdef USE_ICONV --- 4197,4210 ---- vcp->vc_type = CONV_TO_LATIN1; } #ifdef WIN32 ! /* Win32-specific codepage <-> codepage conversion without iconv. */ ! else if (((from_prop & ENC_UNICODE) || encname2codepage(from) > 0) ! && ((to_prop & ENC_UNICODE) || encname2codepage(to) > 0)) { ! vcp->vc_type = CONV_CODEPAGE; vcp->vc_factor = 2; /* up to twice as long */ ! vcp->vc_cpfrom = (from_prop & ENC_UNICODE) ? 0 : encname2codepage(from); ! vcp->vc_cpto = (to_prop & ENC_UNICODE) ? 0 : encname2codepage(to); } #endif # ifdef USE_ICONV *************** *** 4149,4154 **** --- 4221,4229 ---- } } # endif + if (vcp->vc_type == CONV_NONE) + return FAIL; + return OK; } /* *************** *** 4262,4304 **** *lenp = (int)STRLEN(retval); break; # endif ! # ifdef WIN32 ! /* ! * Note: Using these functions for UTF-8 (CP_UTF8) is NT-specific. ! * Don't put too much faith in its UTF-8 parsing; it's not ! * too good at handling invalid and overlong sequences. ! */ ! case CONV_UCS2_TO_DBCS: /* UCS-2 -> DBCS or UTF8 */ { ! int retlen; ! /* buffer size -> number of shorts */ ! len /= sizeof(unsigned short); ! retlen = WideCharToMultiByte(vcp->vc_dbcs, 0, ! (const unsigned short *)ptr, len, 0, 0, 0, 0); retval = alloc(retlen + 1); ! if (retval == NULL) ! break; ! WideCharToMultiByte(vcp->vc_dbcs, 0, ! (const unsigned short *) ptr, len, retval, retlen, 0, 0); ! retval[retlen] = NUL; ! if (lenp != NULL) ! *lenp = retlen; break; - } - case CONV_DBCS_TO_UCS2: /* UTF-8 or DBCS -> UCS-2 */ - { - int retlen; - - retlen = MultiByteToWideChar(vcp->vc_dbcs, 0, ptr, len, 0, 0); - retval = alloc(sizeof(unsigned short) * retlen); - if (retval == NULL) - break; - MultiByteToWideChar(GetACP(), 0, ptr, len, - (unsigned short *) retval, retlen); - if (lenp != NULL) - /* number of shorts -> buffer size */ - *lenp = retlen * sizeof(unsigned short); } # endif } --- 4337,4383 ---- *lenp = (int)STRLEN(retval); break; # endif ! # ifdef WIN3264 ! case CONV_CODEPAGE: /* codepage -> codepage */ { ! int retlen; ! int tmp_len; ! short_u *tmp; ! ! /* 1. codepage/UTF-8 -> ucs-2. */ ! if (vcp->vc_cpfrom == 0) ! tmp_len = utf8_to_ucs2(ptr, len, NULL); ! else ! tmp_len = MultiByteToWideChar(vcp->vc_cpfrom, 0, ! ptr, len, 0, 0); ! tmp = (short_u *)alloc(sizeof(short_u) * tmp_len); ! if (tmp == NULL) ! break; ! if (vcp->vc_cpfrom == 0) ! utf8_to_ucs2(ptr, len, tmp); ! else ! MultiByteToWideChar(vcp->vc_cpfrom, 0, ptr, len, tmp, tmp_len); ! /* 2. ucs-2 -> codepage/UTF-8. */ ! if (vcp->vc_cpto == 0) ! retlen = ucs2_to_utf8(tmp, tmp_len, NULL); ! else ! retlen = WideCharToMultiByte(vcp->vc_cpto, 0, ! tmp, tmp_len, 0, 0, 0, 0); retval = alloc(retlen + 1); ! if (retval != NULL) ! { ! if (vcp->vc_cpto == 0) ! ucs2_to_utf8(tmp, tmp_len, retval); ! else ! WideCharToMultiByte(vcp->vc_cpto, 0, ! tmp, tmp_len, retval, retlen, 0, 0); ! retval[retlen] = NUL; ! if (lenp != NULL) ! *lenp = retlen; ! } ! vim_free(tmp); break; } # endif } *** ../vim61.453/src/os_mswin.c Sun Apr 6 14:28:10 2003 --- src/os_mswin.c Thu Apr 10 22:30:21 2003 *************** *** 114,134 **** # define COORD int # define SHORT int # define WORD int ! # define DWORD int # define BOOL int # define WCHAR int typedef int UINT; typedef int CALLBACK; typedef int LRESULT; ! # define LPSTR int # define LPTSTR int typedef int WPARAM; typedef int LPARAM; # define KEY_EVENT_RECORD int # define MOUSE_EVENT_RECORD int # define WINAPI # define CONSOLE_CURSOR_INFO int ! # define LPCSTR char_u * # define WINBASEAPI # define INPUT_RECORD int # define SECURITY_INFORMATION int --- 114,137 ---- # define COORD int # define SHORT int # define WORD int ! typedef int DWORD; # define BOOL int # define WCHAR int typedef int UINT; typedef int CALLBACK; typedef int LRESULT; ! typedef int LPSTR; # define LPTSTR int + typedef int LPWSTR; + typedef int LPCWSTR; typedef int WPARAM; typedef int LPARAM; + typedef int LPBOOL; # define KEY_EVENT_RECORD int # define MOUSE_EVENT_RECORD int # define WINAPI # define CONSOLE_CURSOR_INFO int ! typedef int LPCSTR; # define WINBASEAPI # define INPUT_RECORD int # define SECURITY_INFORMATION int *************** *** 730,735 **** --- 775,878 ---- } #endif + #if defined(FEAT_MBYTE) || defined(PROTO) + /* + * Convert an UTF-8 string to UCS-2. + * "instr[inlen]" is the input. "inlen" is in bytes. + * When "outstr" is NULL only return the number of UCS-2 words produced. + * Otherwise "outstr" must be a buffer of sufficient size. + * Returns the number of UCS-2 words produced. + */ + int + utf8_to_ucs2(char_u *instr, int inlen, short_u *outstr) + { + int outlen = 0; + char_u *p = instr; + int todo = inlen; + int l; + + while (todo > 0) + { + /* Only convert if we have a complete sequence. */ + l = utf_ptr2len_check_len(p, todo); + if (l > todo) + break; + + if (outstr != NULL) + *outstr++ = utf_ptr2char(p); + ++outlen; + p += l; + todo -= l; + } + + return outlen; + } + + /* + * Convert an UCS-2 string to UTF-8. + * The input is "instr[inlen]" with "inlen" in number of ucs-2 words. + * When "outstr" is NULL only return the required number of bytes. + * Otherwise "outstr" must be a buffer of sufficient size. + * Return the number of bytes produced. + */ + int + ucs2_to_utf8(short_u *instr, int inlen, char_u *outstr) + { + int outlen = 0; + int todo = inlen; + short_u *p = instr; + int l; + + while (todo > 0) + { + if (outstr != NULL) + { + l = utf_char2bytes(*p, outstr); + outstr += l; + } + else + l = utf_char2len(*p); + ++p; + outlen += l; + --todo; + } + + return outlen; + } + + /* + * Call MultiByteToWideChar() and allocate memory for the result. + * Returns the result in "*out[*outlen]". "outlen" is in words. + */ + void + MultiByteToWideChar_alloc(UINT cp, DWORD flags, + LPCSTR in, int inlen, + LPWSTR *out, int *outlen) + { + *outlen = MultiByteToWideChar(cp, flags, in, inlen, 0, 0); + *out = (LPWSTR)alloc(sizeof(WCHAR) * *outlen); + if (*out != NULL) + MultiByteToWideChar(cp, flags, in, inlen, *out, *outlen); + } + + /* + * Call WideCharToMultiByte() and allocate memory for the result. + * Returns the result in "*out[*outlen]". + */ + void + WideCharToMultiByte_alloc(UINT cp, DWORD flags, + LPCWSTR in, int inlen, + LPSTR *out, int *outlen, + LPCSTR def, LPBOOL useddef) + { + *outlen = WideCharToMultiByte(cp, flags, in, inlen, NULL, 0, def, useddef); + *out = alloc((unsigned)*outlen); + if (*out != NULL) + WideCharToMultiByte(cp, flags, in, inlen, *out, *outlen, def, useddef); + } + + #endif /* FEAT_MBYTE */ + #ifdef FEAT_CLIPBOARD /* * Clipboard stuff, for cutting and pasting text to other windows. *************** *** 798,898 **** #if defined(FEAT_MBYTE) || defined(PROTO) /* ! * iconv-like utf-<->ucs2 interfaces. ! * ! * If outstr is NULL, return the required buffer length. ! * ! * Otherwise, convert from *instr to outstr, incrementing instr and ! * decrementing inlen. Return the number of bytes converted. ! * ! * (We assume outstr has enough space; the caller needs to make sure ! * of this itself.) ! */ ! ! /* ! * Convert an UTF-8 string to UCS-2 (see above for info). ! */ ! static int ! utf8_to_ucs2(char_u **instr, int *inlen, WCHAR *outstr) ! { ! int total_length = 0; ! ! if (outstr == NULL) ! { ! /* Return the required size. */ ! int need = 0, n; ! ! for (n = 0; n < *inlen; ! n += utf_ptr2len_check_len((*instr) + n, *inlen - n)) ! need++; ! return need; ! } ! ! while (*inlen) ! { ! /* Do we have a complete sequence? */ ! int seq_len = utf_ptr2len_check_len(*instr, *inlen); ! ! if (seq_len > *inlen) ! return total_length; ! ! *outstr = utf_ptr2char(*instr); ! (*instr) += seq_len; ! (*inlen) -= seq_len; ! outstr++; ! total_length++; ! } ! ! return total_length; ! } ! ! /* ! * Convert an UCS-2 string to UTF-8 (see above for info). ! */ ! static int ! ucs2_to_utf8(WCHAR **instr, int *inlen, char_u *outstr) ! { ! int total_length = 0; ! ! if (outstr == NULL) ! { ! /* Return the required size. */ ! int need = 0, n; ! ! for (n = 0; n < *inlen; ++n) ! need += utf_char2len((*instr)[n]); ! return need; ! } ! ! while (*inlen) ! { ! int seq_len = utf_char2bytes(**instr, outstr); ! ! (*instr)++; ! (*inlen)--; ! outstr += seq_len; ! total_length += seq_len; ! } ! ! return total_length; ! } ! ! /* ! * Note: the following two functions are only guaranteed to work if iconv() is ! * available *or* p_enc is Unicode *or* p_enc is the ACP. If encoding=cp932, ! * your system is in cp935, and iconv() isn't available, these return nothing. ! * (Lots of other things don't work in this case, anyway.) */ /* * Convert 'encoding' to UCS-2. ! * Input in "str" with length "*len". When "len" is NULL, use strlen(). ! * Output is returned as an allocated string. "*len" is set to the length of * the result. * Returns NULL when out of memory. */ static WCHAR * ! enc_to_ucs2(char_u *str, int *len) { vimconv_T conv; WCHAR *ret; --- 941,959 ---- #if defined(FEAT_MBYTE) || defined(PROTO) /* ! * Note: the following two functions are only guaranteed to work when using ! * valid MS-Windows codepages or when iconv() is available. */ /* * Convert 'encoding' to UCS-2. ! * Input in "str" with length "*lenp". When "lenp" is NULL, use strlen(). ! * Output is returned as an allocated string. "*lenp" is set to the length of * the result. * Returns NULL when out of memory. */ static WCHAR * ! enc_to_ucs2(char_u *str, int *lenp) { vimconv_T conv; WCHAR *ret; *************** *** 900,983 **** int len_loc; int length; ! if (len == NULL) { len_loc = STRLEN(str) + 1; ! len = &len_loc; } ! if (enc_dbcs) { ! /* We can do any CP###->WIDE in one pass, and we can do it * without iconv() (convert_* may need iconv). */ ! length = MultiByteToWideChar(enc_dbcs, 0, str, *len, NULL, 0); ! ret = (WCHAR *)alloc((unsigned)(length * sizeof(WCHAR))); ! if (ret != NULL) ! MultiByteToWideChar(enc_dbcs, 0, str, *len, ret, length); } else { ! /* We might be called before we have p_enc set up. */ conv.vc_type = CONV_NONE; ! convert_setup(&conv, p_enc ? p_enc : (char_u *)"latin1", ! (char_u *)"utf-8"); if (conv.vc_type != CONV_NONE) { ! str = allocbuf = string_convert(&conv, str, len); if (str == NULL) return NULL; } convert_setup(&conv, NULL, NULL); ! length = utf8_to_ucs2((char_u **)&str, len, NULL); ret = (WCHAR *)alloc((unsigned)(length * sizeof(WCHAR))); if (ret != NULL) ! utf8_to_ucs2((char_u **)&str, len, ret); vim_free(allocbuf); } ! *len = length; return ret; } /* * Convert an UCS-2 string to 'encoding'. ! * Input in "str" with length (counted in wide characters) "*len". When "len" ! * is NULL, use strlen(). ! * Output is returned as an allocated string. "*len" is set to the length of * the result. * Returns NULL when out of memory. */ ! static char_u * ! ucs2_to_enc(WCHAR *str, int *len) { vimconv_T conv; char_u *utf8_str = NULL, *enc_str = NULL; int len_loc; ! if (len == NULL) { len_loc = wcslen(str) + 1; ! len = &len_loc; } ! if (enc_dbcs) { ! /* We can do any WIDE->CP### in one pass. */ ! int length = WideCharToMultiByte(enc_dbcs, 0, str, *len, NULL, 0, 0, 0); ! utf8_str = alloc((unsigned)length); ! if (utf8_str != NULL) ! WideCharToMultiByte(enc_dbcs, 0, str, *len, utf8_str, length, 0, 0); ! *len = length; ! return utf8_str; } ! utf8_str = alloc(ucs2_to_utf8(&str, len, NULL)); if (utf8_str != NULL) { ! *len = ucs2_to_utf8(&str, len, utf8_str); /* We might be called before we have p_enc set up. */ conv.vc_type = CONV_NONE; --- 961,1043 ---- int len_loc; int length; ! if (lenp == NULL) { len_loc = STRLEN(str) + 1; ! lenp = &len_loc; } ! if (enc_codepage != 0) { ! /* We can do any CP### -> UCS-2 in one pass, and we can do it * without iconv() (convert_* may need iconv). */ ! MultiByteToWideChar_alloc(enc_codepage, 0, str, *lenp, &ret, &length); } else { ! /* Use "latin1" by default, we might be called before we have p_enc ! * set up. Convert to utf-8 first, works better with iconv(). Does ! * nothing if 'encoding' is "utf-8". */ conv.vc_type = CONV_NONE; ! if (convert_setup(&conv, p_enc ? p_enc : (char_u *)"latin1", ! (char_u *)"utf-8") == FAIL) ! return NULL; if (conv.vc_type != CONV_NONE) { ! str = allocbuf = string_convert(&conv, str, lenp); if (str == NULL) return NULL; } convert_setup(&conv, NULL, NULL); ! length = utf8_to_ucs2(str, *lenp, NULL); ret = (WCHAR *)alloc((unsigned)(length * sizeof(WCHAR))); if (ret != NULL) ! utf8_to_ucs2(str, *lenp, (short_u *)ret); vim_free(allocbuf); } ! *lenp = length; return ret; } /* * Convert an UCS-2 string to 'encoding'. ! * Input in "str" with length (counted in wide characters) "*lenp". When ! * "lenp" is NULL, use strlen(). ! * Output is returned as an allocated string. "*lenp" is set to the length of * the result. * Returns NULL when out of memory. */ ! char_u * ! ucs2_to_enc(short_u *str, int *lenp) { vimconv_T conv; char_u *utf8_str = NULL, *enc_str = NULL; int len_loc; ! if (lenp == NULL) { len_loc = wcslen(str) + 1; ! lenp = &len_loc; } ! if (enc_codepage != 0) { ! /* We can do any UCS-2 -> CP### in one pass. */ ! int length; ! WideCharToMultiByte_alloc(enc_codepage, 0, str, *lenp, ! &enc_str, &length, 0, 0); ! *lenp = length; ! return enc_str; } ! utf8_str = alloc(ucs2_to_utf8(str, *lenp, NULL)); if (utf8_str != NULL) { ! *lenp = ucs2_to_utf8(str, *lenp, utf8_str); /* We might be called before we have p_enc set up. */ conv.vc_type = CONV_NONE; *************** *** 990,996 **** } else { ! enc_str = string_convert(&conv, utf8_str, len); vim_free(utf8_str); } --- 1050,1056 ---- } else { ! enc_str = string_convert(&conv, utf8_str, lenp); vim_free(utf8_str); } *************** *** 1075,1087 **** if (hMemWstr[str_size] == NUL) break; } ! to_free = str = ucs2_to_enc(hMemWstr, &str_size); GlobalUnlock(hMemW); } } else #endif ! /* Get the clipboard in the ANSI codepage. */ if (IsClipboardFormatAvailable(CF_TEXT)) { if ((hMem = GetClipboardData(CF_TEXT)) != NULL) --- 1135,1147 ---- if (hMemWstr[str_size] == NUL) break; } ! to_free = str = ucs2_to_enc((short_u *)hMemWstr, &str_size); GlobalUnlock(hMemW); } } else #endif ! /* Get the clipboard in the Active codepage. */ if (IsClipboardFormatAvailable(CF_TEXT)) { if ((hMem = GetClipboardData(CF_TEXT)) != NULL) *************** *** 1106,1121 **** } #if defined(FEAT_MBYTE) && defined(WIN3264) ! /* The text is now in the active codepage. Convert to 'encoding', * going through UCS-2. */ ! maxlen = MultiByteToWideChar(CP_ACP, 0, str, str_size, NULL, 0); ! to_free = alloc((unsigned)(maxlen * sizeof(WCHAR))); if (to_free != NULL) { - MultiByteToWideChar(CP_ACP, 0, str, str_size, - (WCHAR *)to_free, maxlen); str_size = maxlen; ! str = ucs2_to_enc((WCHAR *)to_free, &str_size); if (str != NULL) { vim_free(to_free); --- 1166,1179 ---- } #if defined(FEAT_MBYTE) && defined(WIN3264) ! /* The text is in the active codepage. Convert to 'encoding', * going through UCS-2. */ ! MultiByteToWideChar_alloc(GetACP(), 0, str, str_size, ! &(LPWSTR)to_free, &maxlen); if (to_free != NULL) { str_size = maxlen; ! str = ucs2_to_enc((short_u *)to_free, &str_size); if (str != NULL) { vim_free(to_free); *************** *** 1191,1199 **** { WCHAR *lpszMemW; ! /* Convert the text for CF_TEXT to ANSI codepage. Otherwise it's ! * p_enc, which has no relation to the ANSI codepage. */ ! metadata.txtlen = WideCharToMultiByte(CP_ACP, 0, out, len, NULL, 0, 0, 0); vim_free(str); str = (char_u *)alloc((unsigned)metadata.txtlen); --- 1249,1257 ---- { WCHAR *lpszMemW; ! /* Convert the text for CF_TEXT to Active codepage. Otherwise it's ! * p_enc, which has no relation to the Active codepage. */ ! metadata.txtlen = WideCharToMultiByte(GetACP(), 0, out, len, NULL, 0, 0, 0); vim_free(str); str = (char_u *)alloc((unsigned)metadata.txtlen); *************** *** 1202,1208 **** vim_free(out); return; /* out of memory */ } ! WideCharToMultiByte(CP_ACP, 0, out, len, str, metadata.txtlen, 0, 0); /* Allocate memory for the UCS-2 text, add one NUL word to --- 1260,1266 ---- vim_free(out); return; /* out of memory */ } ! WideCharToMultiByte(GetACP(), 0, out, len, str, metadata.txtlen, 0, 0); /* Allocate memory for the UCS-2 text, add one NUL word to *************** *** 1249,1256 **** /* * Open the clipboard, clear it and put our text on it. ! * Always set our Vim format. Either put Unicode or plain text on it. ! * TODO: why not both? * * Don't pass GetActiveWindow() as an argument to OpenClipboard() * because then we can't paste back into the same window for some --- 1307,1313 ---- /* * Open the clipboard, clear it and put our text on it. ! * Always set our Vim format. Put Unicode and plain text on it. * * Don't pass GetActiveWindow() as an argument to OpenClipboard() * because then we can't paste back into the same window for some *** ../vim61.453/src/proto/mbyte.pro Wed Mar 26 21:48:04 2003 --- src/proto/mbyte.pro Tue Apr 8 17:21:33 2003 *************** *** 55,60 **** --- 55,61 ---- char_u *enc_skip __ARGS((char_u *p)); char_u *enc_canonize __ARGS((char_u *enc)); char_u *enc_locale __ARGS((void)); + int encname2codepage __ARGS((char_u *name)); void *my_iconv_open __ARGS((char_u *to, char_u *from)); int iconv_enabled __ARGS((int verbose)); void iconv_end __ARGS((void)); *************** *** 71,77 **** void xim_init __ARGS((void)); int xim_get_status_area_height __ARGS((void)); int im_get_status __ARGS((void)); ! void convert_setup __ARGS((vimconv_T *vcp, char_u *from, char_u *to)); int convert_input __ARGS((char_u *ptr, int len, int maxlen)); char_u *string_convert __ARGS((vimconv_T *vcp, char_u *ptr, int *lenp)); /* vim: set ft=c : */ --- 72,78 ---- void xim_init __ARGS((void)); int xim_get_status_area_height __ARGS((void)); int im_get_status __ARGS((void)); ! int convert_setup __ARGS((vimconv_T *vcp, char_u *from, char_u *to)); int convert_input __ARGS((char_u *ptr, int len, int maxlen)); char_u *string_convert __ARGS((vimconv_T *vcp, char_u *ptr, int *lenp)); /* vim: set ft=c : */ *** ../vim61.453/src/proto/os_mswin.pro Fri Mar 22 21:41:33 2002 --- src/proto/os_mswin.pro Mon Apr 7 23:10:00 2003 *************** *** 22,29 **** --- 22,34 ---- int can_end_termcap_mode __ARGS((int give_msg)); int mch_screenmode __ARGS((char_u *arg)); int mch_libcall __ARGS((char_u *libname, char_u *funcname, char_u *argstring, int argint, char_u **string_result, int *number_result)); + int utf8_to_ucs2 __ARGS((char_u *instr, int inlen, short_u *outstr)); + int ucs2_to_utf8 __ARGS((short_u *instr, int inlen, char_u *outstr)); + void MultiByteToWideChar_alloc __ARGS((UINT cp, DWORD flags, LPCSTR in, int inlen, LPWSTR *out, int *outlen)); + void WideCharToMultiByte_alloc __ARGS((UINT cp, DWORD flags, LPCWSTR in, int inlen, LPSTR *out, int *outlen, LPCSTR def, LPBOOL useddef)); int clip_mch_own_selection __ARGS((VimClipboard *cbd)); void clip_mch_lose_selection __ARGS((VimClipboard *cbd)); + char_u *ucs2_to_enc __ARGS((short_u *str, int *lenp)); void clip_mch_request_selection __ARGS((VimClipboard *cbd)); void clip_mch_set_selection __ARGS((VimClipboard *cbd)); void DumpPutS __ARGS((const char *psz)); *** ../vim61.453/src/structs.h Sat Mar 15 17:55:19 2003 --- src/structs.h Mon Apr 7 20:55:16 2003 *************** *** 695,702 **** { int vc_type; /* zero or one of the CONV_ values */ int vc_factor; /* max. expansion factor */ ! # ifdef FEAT_WINDOWS ! int vc_dbcs; /* codepage to convert to (CONV_CODEPAGE) */ # endif # ifdef USE_ICONV iconv_t vc_fd; /* for CONV_ICONV */ --- 695,703 ---- { int vc_type; /* zero or one of the CONV_ values */ int vc_factor; /* max. expansion factor */ ! # ifdef WIN3264 ! int vc_cpfrom; /* codepage to convert from (CONV_CODEPAGE) */ ! int vc_cpto; /* codepage to convert to (CONV_CODEPAGE) */ # endif # ifdef USE_ICONV iconv_t vc_fd; /* for CONV_ICONV */ *************** *** 719,727 **** #define CONV_TO_UTF8 1 #define CONV_TO_LATIN1 2 #define CONV_ICONV 3 ! #ifdef FEAT_WINDOWS ! # define CONV_UCS2_TO_DBCS 4 /* ucs-2 -> dbcs */ ! # define CONV_DBCS_TO_UCS2 5 /* current codepage -> ucs-2 */ #endif /* --- 720,727 ---- #define CONV_TO_UTF8 1 #define CONV_TO_LATIN1 2 #define CONV_ICONV 3 ! #ifdef WIN3264 ! # define CONV_CODEPAGE 4 /* codepage -> codepage */ #endif /* *** ../vim61.453/src/version.c Tue Apr 8 23:05:36 2003 --- src/version.c Thu Apr 10 22:13:58 2003 *************** *** 613,614 **** --- 613,616 ---- { /* Add new patch number below this line */ + /**/ + 454, /**/ -- MICHAEL PALIN PLAYED: 1ST SOLDIER WITH A KEEN INTEREST IN BIRDS, DENNIS, MR DUCK (A VILLAGE CARPENTER WHO IS ALMOST KEENER THAN ANYONE ELSE TO BURN WITCHES), THREE-HEADED KNIGHT, SIR GALAHAD, KING OF SWAMP CASTLE, BROTHER MAYNARD'S ROOMATE "Monty Python and the Holy Grail" PYTHON (MONTY) PICTURES LTD /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// Creator of Vim - Vi IMproved -- http://www.Vim.org \\\ \\\ Project leader for A-A-P -- http://www.A-A-P.org /// \\\ Help AIDS victims, buy at Amazon -- http://ICCF.nl/click1.html ///