1 /*---------------------------------------------------------------------------*
2   Project:  TwlSDK - libraries - STD
3   File:     std_unicode.c
4 
5   Copyright 2006-2008 Nintendo. All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law. They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13   $Date:: 2009-02-05#$
14   $Rev: 9972 $
15   $Author: ooshimay $
16  *---------------------------------------------------------------------------*/
17 #include <nitro.h>
18 
19 #include <nitro/std/unicode.h>
20 
21 
22 #if defined(SDK_ARM9) || !defined(SDK_NITRO)
23 // Because of memory restrictions, it is not possible to use the Unicode conversion feature with ARM7 in NITRO mode
24 
25 #if defined(SDK_ARM9)
26 #define STD_UNICODE_STATIC_IMPLEMENTATION
27 #endif
28 
29 #if defined(STD_UNICODE_STATIC_IMPLEMENTATION)
30 // ARM9 retains a table itself, and also locates executable codes in the resident module
31 
32 // The array pointers are weak symbols; in HYBRID mode, the code table is sent to ltdmain.
33 // As weak symbols, however, they are not overwritten externally if they are placed in the same file. The array pointers have therefore been separated into std_unicode_array.c.
34 //
35 //
36 extern const u8    *STD_Unicode2SjisArray;
37 extern const u16   *STD_Sjis2UnicodeArray;
38 #else
39 // ARM7 references ARM9's table and also splits up the executable codes and places them in several locations in the LTD region
40 static const u8    *STD_Unicode2SjisArray = NULL;
41 static const u16   *STD_Sjis2UnicodeArray = NULL;
42 static STDResult STDi_ConvertStringSjisToUnicodeCore(u16 *dst, int *dst_len,
43                                                      const char *src, int *src_len,
44                                                      STDConvertUnicodeCallback callback)
45                                                      __attribute__((never_inline));
46 static STDResult STDi_ConvertStringUnicodeToSjisCore(char *dst, int *dst_len,
47                                                      const u16 *src, int *src_len,
48                                                      STDConvertSjisCallback callback)
49                                                      __attribute__((never_inline));
50 #endif
51 
52 
53 /*****************************************************************************/
54 /* Functions */
55 
56 /*---------------------------------------------------------------------------*
57   Name:         STDi_GetUnicodeConversionTable
58 
59   Description:  Gets the Unicode conversion table.
60 
61   Arguments:    u2s: Storage location of pointer to Unicode -> SJIS conversion table
62                 s2u: Storage location of pointer to SJIS -> Unicode conversion table
63 
64   Returns:      None.
65  *---------------------------------------------------------------------------*/
STDi_GetUnicodeConversionTable(const u8 ** u2s,const u16 ** s2u)66 void STDi_GetUnicodeConversionTable(const u8 **u2s, const u16 **s2u)
67 {
68     if (u2s)
69     {
70         *u2s = STD_Unicode2SjisArray;
71     }
72     if (s2u)
73     {
74         *s2u = STD_Sjis2UnicodeArray;
75     }
76 }
77 
78 /*---------------------------------------------------------------------------*
79   Name:         STDi_AttachUnicodeConversionTable
80 
81   Description:  Assigns a Unicode conversion table to the STD library.
82 
83   Arguments:    u2s: Unicode -> SJIS conversion table
84                 s2u: SJIS -> Unicode conversion table
85 
86   Returns:      None.
87  *---------------------------------------------------------------------------*/
STDi_AttachUnicodeConversionTable(const u8 * u2s,const u16 * s2u)88 void STDi_AttachUnicodeConversionTable(const u8 *u2s, const u16 *s2u)
89 {
90 #if defined(STD_UNICODE_STATIC_IMPLEMENTATION)
91     (void)u2s;
92     (void)s2u;
93 #else
94     STD_Unicode2SjisArray = u2s;
95     STD_Sjis2UnicodeArray = s2u;
96 #endif
97 }
98 
99 /*---------------------------------------------------------------------------*
100   Name:         STD_ConvertStringSjisToUnicode
101 
102   Description:  Converts a ShiftJIS character string to a Unicode character string.
103 
104   Arguments:    dst:               Conversion destination buffer
105                                   The storage process is ignored if NULL is specified.
106                 dst_len:           Pointer which stores and passes the maximum number of characters for the conversion destination buffer, then receives the number of characters that were actually stored.
107                                   Ignored when NULL is given.
108 
109                 src:               Conversion source buffer
110                 src_len           Pointer which stores and passes the maximum number of characters to be converted, then receives the number actually converted.
111 
112                                   The end-of-string position takes priority over this specification.
113                                   When a either a negative value is stored and passed, or NULL is given, the character count is revised to be the number of characters to the end of the string.
114 
115                 callback:          The callback to be called if there are any characters that can't be converted.
116                                   When NULL is specified, the conversion process ends at the position of the character that cannot be converted.
117 
118 
119   Returns:      Result of the conversion process.
120  *---------------------------------------------------------------------------*/
STD_ConvertStringSjisToUnicode(u16 * dst,int * dst_len,const char * src,int * src_len,STDConvertUnicodeCallback callback)121 STDResult STD_ConvertStringSjisToUnicode(u16 *dst, int *dst_len,
122                                          const char *src, int *src_len,
123                                          STDConvertUnicodeCallback callback)
124 #if !defined(STD_UNICODE_STATIC_IMPLEMENTATION)
125 {
126     // If the table is not resolved, substitute with ASCII conversion
127     if (STD_Unicode2SjisArray == NULL)
128     {
129         STDResult   result = STD_RESULT_SUCCESS;
130         int         i;
131         int         max = 0x7FFFFFFF;
132         if (src_len && (*src_len >= 0))
133         {
134             max = *src_len;
135         }
136         if (dst && dst_len && (*dst_len >= 0) && (*dst_len < max))
137         {
138             max = *dst_len;
139         }
140         for (i = 0; i < max; ++i)
141         {
142             int     c = ((const u8 *)src)[i];
143             if (c == 0)
144             {
145                break;
146             }
147             else if (c >= 0x80)
148             {
149                result = STD_RESULT_ERROR;
150                break;
151             }
152             dst[i] = (u16)c;
153         }
154         if (src_len)
155         {
156             *src_len = i;
157         }
158         if (dst_len)
159         {
160             *dst_len = i;
161         }
162         return result;
163     }
164     else
165     {
166         return STDi_ConvertStringSjisToUnicodeCore(dst, dst_len, src, src_len, callback);
167     }
168 }
169 #include <twl/ltdmain_begin.h>
STDi_ConvertStringSjisToUnicodeCore(u16 * dst,int * dst_len,const char * src,int * src_len,STDConvertUnicodeCallback callback)170 static STDResult STDi_ConvertStringSjisToUnicodeCore(u16 *dst, int *dst_len,
171                                                      const char *src, int *src_len,
172                                                      STDConvertUnicodeCallback callback)
173 #endif // !defined(STD_UNICODE_STATIC_IMPLEMENTATION)
174 {
175     STDResult retval = STD_RESULT_SUCCESS;
176     if (!src)
177     {
178         retval = STD_RESULT_INVALID_PARAM;
179     }
180     else
181     {
182         int     src_pos = 0;
183         int     dst_pos = 0;
184         int     src_max = (src_len && (*src_len >= 0)) ? *src_len : 0x7FFFFFFF;
185         int     dst_max = (dst && dst_len && (*dst_len >= 0)) ? *dst_len : 0x7FFFFFFF;
186 
187         /* Until either buffer length reaches its terminus, process one character at a time. */
188         while ((dst_pos < dst_max) && (src_pos < src_max))
189         {
190             u16     dst_tmp[4];
191             int     dst_count = 0;
192             int     src_count;
193             u32     c1 = (u8)src[src_pos];
194             /* Detection of end-of-string has precedence over the string length */
195             if (!c1)
196             {
197                 break;
198             }
199             /* ASCII [00, 7E] */
200             else if (c1 <= 0x7E)
201             {
202                 dst_tmp[0] = (u16)c1;
203                 src_count = 1;
204                 dst_count = 1;
205             }
206             /* Half-width kana [A1, DF] */
207             else if ((c1 >= 0xA1) && (c1 <= 0xDF))
208             {
209                 dst_tmp[0] = (u16)(c1 + (0xFF61 - 0xA1));
210                 src_count = 1;
211                 dst_count = 1;
212             }
213             /* ShiftJIS */
214             else if (STD_IsSjisCharacter(&src[src_pos]))
215             {
216                 src_count = 2;
217                 if (src_pos + src_count <= src_max)
218                 {
219                     /*
220                      * The range of the conversion table is:
221                      * First byte: {81 - 9F, E0 - FC}, second byte: {40 - FF}.
222                      * In order to avoid division to streamline calculation of the table,
223                      * the second byte {7F, FD, FE, FF} is left in full.
224                      */
225                     u32     c2 = (u8)src[src_pos + 1];
226                     c1 -= 0x81 + ((c1 >= 0xE0) ? (0xE0 - 0xA0) : 0);
227                     dst_tmp[0] = STD_Sjis2UnicodeArray[c1 * 0xC0 + (c2 - 0x40)];
228                     dst_count = (dst_tmp[0] ? 1 : 0);
229                 }
230             }
231             /* Calls the callback if a character appears which cannot be converted. */
232             if (dst_count == 0)
233             {
234                 if (!callback)
235                 {
236                     retval = STD_RESULT_CONVERSION_FAILED;
237                 }
238                 else
239                 {
240                     src_count = src_max - src_pos;
241                     dst_count = sizeof(dst_tmp) / sizeof(*dst_tmp);
242                     retval = (*callback) (dst_tmp, &dst_count, &src[src_pos], &src_count);
243                 }
244                 if (retval != STD_RESULT_SUCCESS)
245                 {
246                     break;
247                 }
248             }
249             /* Terminate here if either the conversion source or the conversion destination passes their respective ends. */
250             if ((src_pos + src_count > src_max) || (dst_pos + dst_count > dst_max))
251             {
252                 break;
253             }
254             /* Stores the conversion result if the buffer is valid */
255             if (dst)
256             {
257                 int     i;
258                 for (i = 0; i < dst_count; ++i)
259                 {
260                     MI_StoreLE16(&dst[dst_pos + i], (u16)dst_tmp[i]);
261                 }
262             }
263             src_pos += src_count;
264             dst_pos += dst_count;
265         }
266         /* Returns the number of converted characters */
267         if (src_len)
268         {
269             *src_len = src_pos;
270         }
271         if (dst_len)
272         {
273             *dst_len = dst_pos;
274         }
275     }
276     return retval;
277 }
278 #if !defined(STD_UNICODE_STATIC_IMPLEMENTATION)
279 #include <twl/ltdmain_end.h>
280 #endif // !defined(STD_UNICODE_STATIC_IMPLEMENTATION)
281 
282 /*---------------------------------------------------------------------------*
283   Name:         STD_ConvertStringUnicodeToSjis
284 
285   Description:  Converts a Unicode character string into a ShiftJIS character string.
286 
287   Arguments:    dst:               Conversion destination buffer
288                                   The storage process is ignored if NULL is specified.
289                 dst_len:           Pointer which stores and passes the maximum number of characters for the conversion destination buffer, then receives the number of characters that were actually stored.
290                                   Ignored when NULL is given.
291 
292                 src:               Conversion source buffer
293                 src_len           Pointer which stores and passes the maximum number of characters to be converted, then receives the number actually converted.
294 
295                                   The end-of-string position takes priority over this specification.
296                                   When either a negative value is stored and passed, or NULL is given, the character count is revised to be the number of characters to the end of the string.
297 
298                 callback:          The callback to be called if there are any characters that can't be converted.
299                                   When NULL is specified, the conversion process ends at the position of the character that cannot be converted.
300 
301 
302   Returns:      Result of the conversion process.
303  *---------------------------------------------------------------------------*/
STD_ConvertStringUnicodeToSjis(char * dst,int * dst_len,const u16 * src,int * src_len,STDConvertSjisCallback callback)304 STDResult STD_ConvertStringUnicodeToSjis(char *dst, int *dst_len,
305                                          const u16 *src, int *src_len,
306                                          STDConvertSjisCallback callback)
307 #if !defined(STD_UNICODE_STATIC_IMPLEMENTATION)
308 {
309     // If the table is not resolved, substitute with ASCII conversion
310     if (STD_Unicode2SjisArray == NULL)
311     {
312         STDResult   result = STD_RESULT_SUCCESS;
313         int         i;
314         int         max = 0x7FFFFFFF;
315         if (src_len && (*src_len >= 0))
316         {
317             max = *src_len;
318         }
319         if (dst && dst_len && (*dst_len >= 0) && (*dst_len < max))
320         {
321             max = *dst_len;
322         }
323         for (i = 0; i < max; ++i)
324         {
325             int     c = ((const u16 *)src)[i];
326             if (c == 0)
327             {
328                break;
329             }
330             else if (c >= 0x80)
331             {
332                result = STD_RESULT_ERROR;
333                break;
334             }
335             dst[i] = (char)c;
336         }
337         if (src_len)
338         {
339             *src_len = i;
340         }
341         if (dst_len)
342         {
343             *dst_len = i;
344         }
345         return result;
346     }
347     else
348     {
349         return STDi_ConvertStringUnicodeToSjisCore(dst, dst_len, src, src_len, callback);
350     }
351 }
352 #include <twl/ltdmain_begin.h>
STDi_ConvertStringUnicodeToSjisCore(char * dst,int * dst_len,const u16 * src,int * src_len,STDConvertSjisCallback callback)353 static STDResult STDi_ConvertStringUnicodeToSjisCore(char *dst, int *dst_len,
354                                                      const u16 *src, int *src_len,
355                                                      STDConvertSjisCallback callback)
356 #endif // !defined(STD_UNICODE_STATIC_IMPLEMENTATION)
357 {
358     STDResult retval = STD_RESULT_SUCCESS;
359     if (!src)
360     {
361         retval = STD_RESULT_INVALID_PARAM;
362     }
363     else
364     {
365         int     src_pos = 0;
366         int     dst_pos = 0;
367         int     src_max = (src_len && (*src_len >= 0)) ? *src_len : 0x7FFFFFFF;
368         int     dst_max = (dst && dst_len && (*dst_len >= 0)) ? *dst_len : 0x7FFFFFFF;
369         /* Until either buffer length reaches its terminus, process one character at a time. */
370         while ((dst_pos < dst_max) && (src_pos < src_max))
371         {
372             char    dst_tmp[4];
373             int     dst_count = 0;
374             int     src_count = 1;
375             u32     w = MI_LoadLE16(&src[src_pos]);
376             /* Detection of end-of-string has precedence over the string length */
377             if (!w)
378             {
379                 break;
380             }
381             /* Extended characters (private region) */
382             else if ((w >= 0xE000) && (w < 0xF8FF))
383             {
384                 const u32 sjis_page = 188UL;
385                 const u32 offset = w - 0xE000;
386                 u32     c1 = offset / sjis_page;
387                 u32     c2 = offset - c1 * sjis_page;
388                 dst_tmp[0] = (char)(c1 + 0xF0);
389                 dst_tmp[1] = (char)(c2 + ((c2 < 0x3F) ? 0x40 : 0x41));
390                 dst_count = 2;
391             }
392             else
393             {
394                 /*
395                  * The range of the conversion table is:
396                  * [0000-0480), [2000-2680), [3000-3400), [4E00-9FA8), [F928-FFE6).
397                  * Regions filled with 0000 are simply deleted in descending order (largest first) and packed.
398                  */
399 				/* *INDENT-OFF* */
400                 static const int table[][2] =
401                 {
402                     {0x0000,0x0480 - 0x0000,},
403                     {0x2000,0x2680 - 0x2000,},
404                     {0x3000,0x3400 - 0x3000,},
405                     {0x4E00,0x9FA8 - 0x4E00,},
406                     {0xF928,0xFFE6 - 0xF928,},
407                 };
408 				enum { table_max = sizeof(table) / (sizeof(int) * 2) };
409 				/* *INDENT-ON* */
410                 int     i;
411                 int     index = 0;
412                 for (i = 0; i < table_max; ++i)
413                 {
414                     const int offset = (int)(w - table[i][0]);
415                     /* Invalid range */
416                     if (offset < 0)
417                     {
418                         break;
419                     }
420                     /* Valid range */
421                     else if (offset < table[i][1])
422                     {
423                         index += offset;
424                         dst_tmp[0] = (char)STD_Unicode2SjisArray[index * 2 + 0];
425                         if (dst_tmp[0])
426                         {
427                             dst_tmp[1] = (char)STD_Unicode2SjisArray[index * 2 + 1];
428                             dst_count = dst_tmp[1] ? 2 : 1;
429                         }
430                         break;
431                     }
432                     /* Higher ranges */
433                     else
434                     {
435                         index += table[i][1];
436                     }
437                 }
438             }
439             /* Calls the callback if a character appears which cannot be converted. */
440             if (dst_count == 0)
441             {
442                 if (!callback)
443                 {
444                     retval = STD_RESULT_CONVERSION_FAILED;
445                 }
446                 else
447                 {
448                     src_count = src_max - src_pos;
449                     dst_count = sizeof(dst_tmp) / sizeof(*dst_tmp);
450                     retval = (*callback) (dst_tmp, &dst_count, &src[src_pos], &src_count);
451                 }
452                 if (retval != STD_RESULT_SUCCESS)
453                 {
454                     break;
455                 }
456             }
457             /* Terminate here if either the conversion source or the conversion destination passes their respective ends. */
458             if ((src_pos + src_count > src_max) || (dst_pos + dst_count > dst_max))
459             {
460                 break;
461             }
462             /* Stores the conversion result if the buffer is valid */
463             if (dst)
464             {
465                 int     i;
466                 for (i = 0; i < dst_count; ++i)
467                 {
468                     MI_StoreLE8(&dst[dst_pos + i], (u8)dst_tmp[i]);
469                 }
470             }
471             src_pos += src_count;
472             dst_pos += dst_count;
473         }
474         /* Returns the number of converted characters */
475         if (src_len)
476         {
477             *src_len = src_pos;
478         }
479         if (dst_len)
480         {
481             *dst_len = dst_pos;
482         }
483     }
484     return retval;
485 }
486 #if !defined(STD_UNICODE_STATIC_IMPLEMENTATION)
487 #include <twl/ltdmain_end.h>
488 #endif // !defined(STD_UNICODE_STATIC_IMPLEMENTATION)
489 
490 
491 #endif // defined(SDK_ARM9) || !defined(SDK_NITRO)
492