1 /*---------------------------------------------------------------------------*
2   Project: ENC library
3   File:    enc.h
4 
5   Copyright 2006 Nintendo.  All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law. They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13   $Log: enc.h,v $
14   Revision 1.9  2008/02/27 13:46:45  wada_jumpei
15   Since other library including NWC24 cannot hundle ENC_ERR_NOT_LOADED, obsoleted it.
16   But in order to avoid being overwritten by other error, it is still defined.
17 
18   Revision 1.8  2008/02/12 13:14:50  wada_jumpei
19   Supported Korean and Chinese.
20    Korean: UHC (CP949)
21    Chinese: EUC-CN + additional Kanji
22 
23   Revision 1.7  2007/02/05 23:34:45  yoshioka_yasuhiro
24   Added table stripping.
25 
26   Revision 1.6  2006/11/04 08:14:34  yoshioka_yasuhiro
27   Added windows-1252 support.
28 
29   Revision 1.5  2006/10/27 11:10:36  yoshioka_yasuhiro
30   Added some character encodings and automatic converter.
31 
32   Revision 1.4  2006/08/14 04:30:37  yoshioka_yasuhiro
33   Added ENCConvertStringJisToUnicode and ENCConvertStringUnicodeToJis.
34 
35   Revision 1.2  2006/08/09 10:36:55  yoshioka_yasuhiro
36   Specification change.
37   ENCConverString* functions returns a result code,
38   and read/write dstlen and srclen.
39 
40   Revision 1.1  2006/08/07 09:13:42  yoshioka_yasuhiro
41   Initial commit.
42 
43   $NoKeywords: $
44  *---------------------------------------------------------------------------*/
45 
46 #ifndef REVOLUTION_ENC_H__
47 #define REVOLUTION_ENC_H__
48 #ifdef __cplusplus
49 extern "C" {
50 #endif
51 
52 #include <revolution/types.h>
53 
54 typedef enum ENCResult
55 {
56     ENC_OK = 0,
57     ENC_ERR_NO_BUF_LEFT = -1,
58     ENC_ERR_NO_MAP_RULE = -2,
59     ENC_ERR_INVALID_PARAM = -3,
60     ENC_ERR_INVALID_FORMAT = -4,
61     ENC_ERR_UNKNOWN_ENCODING = -5,
62     ENC_ERR_UNSUPPORTED = -6,
63     ENC_ERR_NOT_LOADED = -7
64 } ENCResult;
65 
66 typedef enum ENCBreakType
67 {
68     ENC_BR_KEEP = 0,
69     ENC_BR_CRLF,
70     ENC_BR_CR,
71     ENC_BR_LF
72 } ENCBreakType;
73 
74 typedef struct ENCContext
75 {
76     s32 encoding;
77     ENCBreakType brtype;
78     s32 state;
79     u16 nomap;
80     u16 invalid;
81 } ENCContext;
82 
83 #define ENC_INTERNAL_CHAR_WIDTH sizeof(u16)
84 
85 #define ENC_ENCODING_COUNT 23
86 #define ENC_ENCODING_NAME_LENGTH 16
87 
88 #define ENC_CHECK_NOT_FOUND -1
89 
90 #define ENC_ALT_STOP 0x0000
91 #define ENC_ALT_SKIP 0xFFFF
92 
93 /* Internal from/to External */
94 
95 ENCResult ENCInitContext(ENCContext *context);
96 ENCResult ENCDuplicateContext(ENCContext *duplicate, const ENCContext *original);
97 ENCResult ENCSetExternalEncoding(ENCContext* context, const u8* encoding);
98 ENCResult ENCGetExternalEncoding(const ENCContext* context, u8* encoding);
99 ENCResult ENCSetBreakType(ENCContext* context, ENCBreakType type);
100 ENCResult ENCSetAlternativeCharacter(ENCContext* context, u16 nomap, u16 invalid);
101 ENCResult ENCGetNextCharacterWidth(const ENCContext* context, const u8* src, u32* width);
102 ENCResult ENCGetExternalCharacterWidth(const ENCContext* context, u32* width);
103 ENCResult ENCConvertToInternalEncoding(ENCContext* context, u16* dst, s32* dstlen, const u8* src, s32* srclen);
104 ENCResult ENCConvertFromInternalEncoding(ENCContext* context, u8* dst, s32* dstlen, const u16* src, s32* srclen);
105 
106 
107 /* Check */
108 
109 ENCResult ENCCheckEncoding(s32* index, const u8** encodings, s32 encsize, const u16* src, s32 srclen);
110 
111 
112 ENCResult ENCIs7BitEncoding(BOOL* is7bit, const u8* encoding);
113 
114 
115 /* BOM */
116 
117 ENCResult ENCSetUnicodeBOM(u16* dst, s32 dstlen);
118 ENCResult ENCSetUnicodeBOM32(u32* dst, s32 dstlen);
119 ENCResult ENCSetUnicodeBOM16(u16* dst, s32 dstlen);
120 ENCResult ENCSetUnicodeBOM8(u8* dst, s32 dstlen);
121 
122 /* ASCII */
123 
124 ENCResult ENCConvertStringUnicodeToAscii(u8* dst, s32* dstlen, const u16* src, s32* srclen);
125 ENCResult ENCConvertStringAsciiToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
126 
127 /* UTF */
128 
129 ENCResult ENCConvertStringUtf32ToUtf16(u16* dst, s32* dstlen, const u32* src, s32* srclen);
130 ENCResult ENCConvertStringUtf16ToUtf32(u32* dst, s32* dstlen, const u16* src, s32* srclen);
131 
132 ENCResult ENCConvertStringUtf32ToUtf8(u8* dst, s32* dstlen, const u32* src, s32* srclen);
133 ENCResult ENCConvertStringUtf8ToUtf32(u32* dst, s32* dstlen, const u8* src, s32* srclen);
134 
135 ENCResult ENCConvertStringUtf16ToUtf8(u8* dst, s32* dstlen, const u16* src, s32* srclen);
136 ENCResult ENCConvertStringUtf8ToUtf16(u16* dst, s32* dstlen, const u8* src, s32* srclen);
137 
138 /* Japanese */
139 
140 ENCResult ENCConvertStringSjisToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
141 ENCResult ENCConvertStringUnicodeToSjis(u8* dst, s32* dstlen, const u16* src, s32* srclen);
142 
143 ENCResult ENCConvertStringJisToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
144 ENCResult ENCConvertStringUnicodeToJis(u8* dst, s32* dstlen, const u16* src, s32* srclen);
145 
146 ENCResult ENCConvertStringJisToSjis(u8* dst, s32* dstlen, const u8* src, s32* srclen);
147 ENCResult ENCConvertStringSjisToJis(u8* dst, s32* dstlen, const u8* src, s32* srclen);
148 
149 /* Latin */
150 
151 ENCResult ENCConvertStringLatin1ToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
152 ENCResult ENCConvertStringUnicodeToLatin1(u8* dst, s32* dstlen, const u16* src, s32* srclen);
153 
154 ENCResult ENCConvertStringLatin2ToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
155 ENCResult ENCConvertStringUnicodeToLatin2(u8* dst, s32* dstlen, const u16* src, s32* srclen);
156 
157 ENCResult ENCConvertStringLatin3ToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
158 ENCResult ENCConvertStringUnicodeToLatin3(u8* dst, s32* dstlen, const u16* src, s32* srclen);
159 
160 ENCResult ENCConvertStringGreekToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
161 ENCResult ENCConvertStringUnicodeToGreek(u8* dst, s32* dstlen, const u16* src, s32* srclen);
162 
163 ENCResult ENCConvertStringLatin6ToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
164 ENCResult ENCConvertStringUnicodeToLatin6(u8* dst, s32* dstlen, const u16* src, s32* srclen);
165 
166 ENCResult ENCConvertStringLatin9ToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
167 ENCResult ENCConvertStringUnicodeToLatin9(u8* dst, s32* dstlen, const u16* src, s32* srclen);
168 
169 ENCResult ENCConvertStringWin1252ToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
170 ENCResult ENCConvertStringUnicodeToWin1252(u8* dst, s32* dstlen, const u16* src, s32* srclen);
171 
172 /* Chinese */
173 
174 ENCResult ENCConvertStringGb2312ToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
175 ENCResult ENCConvertStringUnicodeToGb2312(u8* dst, s32* dstlen, const u16* src, s32* srclen);
176 
177 /* Korean */
178 
179 ENCResult ENCConvertStringUhcToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
180 ENCResult ENCConvertStringUnicodeToUhc(u8* dst, s32* dstlen, const u16* src, s32* srclen);
181 
182 /* one-way */
183 
184 ENCResult ENCConvertStringUtf16ToUtf16(u16* dst, s32* dstlen, const u16* src, s32* srclen);
185 ENCResult ENCConvertStringUtf16LEToUtf16BE(u16* dst, s32* dstlen, const u16* src, s32* srclen);
186 ENCResult ENCConvertStringUtf7ToUtf16(u16* dst, s32* dstlen, const u8* src, s32* srclen);
187 
188 ENCResult ENCConvertStringWin1250ToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
189 ENCResult ENCConvertStringWin1253ToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
190 
191 ENCResult ENCConvertStringMacromanToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
192 ENCResult ENCConvertStringMacgreekToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
193 ENCResult ENCConvertStringMacceToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
194 
195 ENCResult ENCConvertStringIbm850ToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
196 ENCResult ENCConvertStringIbm852ToUnicode(u16* dst, s32* dstlen, const u8* src, s32* srclen);
197 
198 /* strip table */
199 
200 #define ENC_STRIP_TABLE_JP \
201     BOOL enc_tbl_jp_loaded = FALSE; \
202     const u8* enc_tbl_jp_wctomb = NULL;   \
203     const u8* enc_tbl_jp_mbtowc = NULL;   \
204     const u16* enc_offset_jp = NULL;
205 
206 #define ENC_STRIP_TABLE_CN \
207     BOOL enc_tbl_cn_loaded = FALSE; \
208     const u8* enc_tbl_cn_wctomb = NULL;   \
209     const u8* enc_tbl_cn_mbtowc = NULL;   \
210     const u16* enc_offset_cn = NULL; \
211     const u16* enc_tbl_cnex_mbtowc = NULL;
212 
213 
214 #define ENC_STRIP_TABLE_KR_KANJI \
215     BOOL enc_tbl_kr_loaded = FALSE; \
216     const u8* enc_tbl_kr_wctomb2 = NULL;   \
217     const u8* enc_tbl_kr_mbtowc2 = NULL;   \
218     const u16* enc_offset_kr2 = NULL;
219 
220 #define ENC_STRIP_TABLE_KR_UHC \
221     BOOL enc_tbl_uhc_loaded = FALSE; \
222     const u16* enc_tbl_uhc_mbtowc = NULL; \
223     const u16* enc_offset_uhc = NULL;
224 
225 #define ENC_STRIP_TABLE_KR \
226     const u8* enc_tbl_kr_wctomb1 = NULL;   \
227     const u8* enc_tbl_kr_mbtowc1 = NULL;   \
228     const u16* enc_offset_kr1 = NULL; \
229     ENC_STRIP_TABLE_KR_KANJI \
230     ENC_STRIP_TABLE_KR_UHC
231 
232 #ifdef __cplusplus
233 } /* extern "C" */
234 #endif
235 #endif /* REVOLUTION_ENC_H__ */
236