1 /*---------------------------------------------------------------------------*
2 Project: Compress/uncompress library
3 File: CXCompression.h
4 Programmer: Makoto Takano
5
6 Copyright 2006 Nintendo. All rights reserved.
7
8 These coded instructions, statements, and computer programs contain
9 proprietary information of Nintendo of America Inc. and/or Nintendo
10 Company Ltd., and are protected by Federal copyright law. They may
11 not be disclosed to third parties or copied or duplicated in any form,
12 in whole or in part, without the prior written consent of Nintendo.
13
14 $Log: CXCompression.c,v $
15 Revision 1.19 2007/11/02 04:48:26 takano_makoto
16 Reduced LZEX compression time in the worst-case scenario.
17
18 Revision 1.18 2007/10/31 12:51:28 takano_makoto
19 Appended the LH and LRC formats.
20
21 Revision 1.16 2007/05/30 04:17:40 takano_makoto
22 Strict security check.
23 Added expanded LZ77 format.
24
25 Revision 1.11 2006/09/12 11:34:17 takano_makoto
26 Expanded max file size.
27
28 Revision 1.10 2006/09/05 04:39:31 takano_makoto
29 Changed <revolution/os.h> to <dolphin/os.h>
30
31 Revision 1.9 2006/07/05 13:06:42 takano_makoto
32 Revised indents
33
34 Revision 1.8 2006/07/05 10:38:39 takano_makoto
35 Fixed a bug when a 1-byte over-access could occur when compression fails
36
37 Revision 1.7 2006/07/05 09:31:05 takano_makoto
38 Separated HuffConvertData() into another function
39 Added asserts.
40
41 Revision 1.6 2006/07/05 08:12:20 takano_makoto
42 Minor comment revisions
43
44 Revision 1.5 2006/07/05 05:54:29 takano_makoto
45 Fixed a bug for the scope of Huffman table initialization
46
47 Revision 1.4 2006/07/04 13:18:55 takano_makoto
48 Changed LZ compression to the high-speed version using work buffer
49 Revised to avoid use of static variables in Huffman compression
50
51 $NoKeywords: $
52 *---------------------------------------------------------------------------*/
53
54
55 #include <dolphin/types.h>
56 #include <dolphin/os.h>
57 #include <revolution/cx/CXUncompression.h>
58 #include <revolution/cx/CXCompression.h>
59 #include "CXUtil.h"
60
61
62 //===========================================================================
63 // LZ Encoding
64 //===========================================================================
65
66 // Temporary information for LZ high-speed encoding
67 typedef struct
68 {
69 u16 windowPos; // Top position of the history window
70 u16 windowLen; // Length of the history window
71
72 s16 *LZOffsetTable; // Offset buffer of the history window
73 s16 *LZByteTable; // Pointer to the most recent character history
74 s16 *LZEndTable; // Pointer to the oldest character history
75 }
76 LZCompressInfo;
77
78 static void LZInitTable( LZCompressInfo * info, void *work );
79 static u32 SearchLZ ( LZCompressInfo * info, const u8 *nextp, u32 remainSize, u16 *offset, u32 maxLength );
80 static void SlideByte ( LZCompressInfo * info, const u8 *srcp );
81 static inline void LZSlide ( LZCompressInfo * info, const u8 *srcp, u32 n );
82
83
84 /*---------------------------------------------------------------------------*
85 Name: CXCompressLZ
86
87 Description: Function that performs LZ77 compression
88
89 Arguments: srcp: Pointer to compression source data
90 size: Size of compression source data
91 dstp: Pointer to destination for compressed data
92 The buffer must be larger than the size of the compression source data.
93 work: Temporary buffer for compression
94 Requires a region of at least size CX_LZ_FAST_COMPRESS_WORK_SIZE.
95
96 Returns: The data size after compression.
97 If compressed data is larger than original data, compression is terminated and 0 is returned.
98 *---------------------------------------------------------------------------*/
CXCompressLZImpl(const u8 * srcp,u32 size,u8 * dstp,void * work,BOOL exFormat)99 u32 CXCompressLZImpl(const u8 *srcp, u32 size, u8 *dstp, void *work, BOOL exFormat)
100 {
101 u32 LZDstCount; // Number of bytes of compressed data
102 u8 LZCompFlags; // Flag series indicating whether there is compression
103 u8 *LZCompFlagsp; // Points to memory region storing LZCompFlags
104 u16 lastOffset; // Offset to matching data (the longest matching data at the time)
105 u32 lastLength; // Length of matching data (the longest matching data at the time)
106 u8 i;
107 u32 dstMax;
108 LZCompressInfo info; // Temporary LZ compression information
109 const u32 MAX_LENGTH = (exFormat)? (0xFFFF + 0xFF + 0xF + 3U) : (0xF + 3U);
110
111 ASSERT( ((u32)srcp & 0x1) == 0 );
112 ASSERT( work != NULL );
113 ASSERT( size > 4 );
114
115 if ( size < (1 << 24) )
116 {
117 *(u32 *)dstp = CXiConvertEndian_( size << 8 | CX_COMPRESSION_LZ | (exFormat? 1 : 0) ); // Data header
118 dstp += 4;
119 LZDstCount = 4;
120 }
121 else
122 // Use extended header if the size is larger than 24 bits
123 {
124 *(u32 *)dstp = CXiConvertEndian_( CX_COMPRESSION_LZ | (exFormat? 1U : 0U) ); // Data header
125 dstp += 4;
126 *(u32 *)dstp = CXiConvertEndian_( size ); // Size extended header
127 dstp += 4;
128 LZDstCount = 8;
129 }
130 dstMax = size;
131 LZInitTable(&info, work);
132
133 while ( size > 0 )
134 {
135 LZCompFlags = 0;
136 LZCompFlagsp = dstp++; // Destination for storing flag series
137 LZDstCount++;
138
139 // Since flag series is stored as 8-bit data, loop eight times
140 for ( i = 0; i < 8; i++ )
141 {
142 LZCompFlags <<= 1; // The first time (i=0) has no real meaning
143 if (size <= 0)
144 {
145 // When the end terminator is reached, quit after shifting flag through to the last
146 continue;
147 }
148
149 if ( (lastLength = SearchLZ( &info, srcp, size, &lastOffset, MAX_LENGTH)) != 0 )
150 {
151 u32 length;
152 // Enable flag if compression is possible
153 LZCompFlags |= 0x1;
154
155 if (LZDstCount + 2 >= dstMax) // Quit on error if size becomes larger than source
156 {
157 return 0;
158 }
159
160 if ( exFormat )
161 {
162 if ( lastLength >= 0xFF + 0xF + 3 )
163 {
164 length = (u32)( lastLength - 0xFF - 0xF - 3 );
165 *dstp++ = (u8)( 0x10 | (length >> 12) );
166 *dstp++ = (u8)( length >> 4 );
167 LZDstCount += 2;
168 }
169 else if ( lastLength >= 0xF + 2 )
170 {
171 length = (u32)( lastLength - 0xF - 2 );
172 *dstp++ = (u8)( length >> 4 );
173 LZDstCount += 1;
174 }
175 else
176 {
177 length = (u32)( lastLength - 1 );
178 }
179 }
180 else
181 {
182 length = (u32)( lastLength - 3 );
183 }
184
185 // Divide offset into upper 4 bits and lower 8 bits and store
186 *dstp++ = (u8)( length << 4 | (lastOffset - 1) >> 8 );
187 *dstp++ = (u8)( (lastOffset - 1) & 0xff);
188 LZDstCount += 2;
189 LZSlide(&info, srcp, lastLength);
190 srcp += lastLength;
191 size -= lastLength;
192 }
193 else
194 {
195 // No compression
196 if (LZDstCount + 1 >= dstMax) // Quit on error if size becomes larger than source
197 {
198 return 0;
199 }
200 LZSlide(&info, srcp, 1);
201 *dstp++ = *srcp++;
202 size--;
203 LZDstCount++;
204 }
205 } // Completed eight loops
206 *LZCompFlagsp = LZCompFlags; // Store flag series
207 }
208
209 // 4-byte boundary alignment
210 // Data size does not include Data0, used for alignment
211 i = 0;
212 while ( (LZDstCount + i) & 0x3 )
213 {
214 *dstp++ = 0;
215 i++;
216 }
217
218 return LZDstCount;
219 }
220
221 //--------------------------------------------------------
222 // With LZ77 compression, searches for the longest matching string in the slide window.
223 // Arguments: startp: Pointer to starting position of data
224 // nextp: Pointer to data where search will start
225 // remainSize: Size of remaining data
226 // offset: Pointer to region storing matched offset
227 // Return : TRUE if matching string is found
228 // FALSE if not found
229 //--------------------------------------------------------
SearchLZ(LZCompressInfo * info,const u8 * nextp,u32 remainSize,u16 * offset,u32 maxLength)230 static u32 SearchLZ( LZCompressInfo * info, const u8 *nextp, u32 remainSize, u16 *offset, u32 maxLength )
231 {
232 const u8 *searchp;
233 const u8 *headp, *searchHeadp;
234 u16 maxOffset;
235 u32 currLength = 2;
236 u32 tmpLength;
237 s32 w_offset;
238 s16 *const LZOffsetTable = info->LZOffsetTable;
239 const u16 windowPos = info->windowPos;
240 const u16 windowLen = info->windowLen;
241
242 if (remainSize < 3)
243 {
244 return 0;
245 }
246
247 w_offset = info->LZByteTable[*nextp];
248
249 while (w_offset != -1)
250 {
251 if (w_offset < windowPos)
252 {
253 searchp = nextp - windowPos + w_offset;
254 }
255 else
256 {
257 searchp = nextp - windowLen - windowPos + w_offset;
258 }
259
260 /* This isn't needed, but it seems to make it a little faster */
261 if (*(searchp + 1) != *(nextp + 1) || *(searchp + 2) != *(nextp + 2))
262 {
263 w_offset = LZOffsetTable[w_offset];
264 continue;
265 }
266
267 if (nextp - searchp < 2)
268 {
269 // VRAM is accessed in 2-byte units (since data is sometimes read from VRAM), so the data to search must start 2 bytes prior to the start location actually desired.
270 //
271 //
272 // Since the offset is stored in 12 bits, the value is 4096 or less
273 break;
274 }
275 tmpLength = 3;
276 searchHeadp = searchp + 3;
277 headp = nextp + 3;
278
279 // Increments the compression size until a data terminator or different data is encountered.
280 while (((u32)(headp - nextp) < remainSize) && (*headp == *searchHeadp))
281 {
282 headp++;
283 searchHeadp++;
284 tmpLength++;
285
286 // Since the data length is stored in 4 bits, the value is 18 or less (3 is added)
287 if (tmpLength == maxLength)
288 {
289 break;
290 }
291 }
292
293 if (tmpLength > currLength)
294 {
295 // Update the maximum-length offset
296 currLength = tmpLength;
297 maxOffset = (u16)(nextp - searchp);
298 if (currLength == maxLength || currLength == remainSize)
299 {
300 // This is the longest matching length, so end search.
301 break;
302 }
303 }
304 w_offset = LZOffsetTable[w_offset];
305 }
306
307 if (currLength < 3)
308 {
309 return 0;
310 }
311 *offset = maxOffset;
312 return currLength;
313 }
314
315 //--------------------------------------------------------
316 // Initialize the dictionary index
317 //--------------------------------------------------------
LZInitTable(LZCompressInfo * info,void * work)318 static void LZInitTable(LZCompressInfo * info, void *work)
319 {
320 u16 i;
321
322 info->LZOffsetTable = (s16*)work;
323 info->LZByteTable = (s16*)( (u32)work + 4096 * sizeof(s16) );
324 info->LZEndTable = (s16*)( (u32)work + (4096 + 256) * sizeof(s16) );
325
326 for ( i = 0; i < 256; i++ )
327 {
328 info->LZByteTable[i] = -1;
329 info->LZEndTable [i] = -1;
330 }
331 info->windowPos = 0;
332 info->windowLen = 0;
333 }
334
335 //--------------------------------------------------------
336 // Slide the dictionary 1 byte
337 //--------------------------------------------------------
SlideByte(LZCompressInfo * info,const u8 * srcp)338 static void SlideByte(LZCompressInfo * info, const u8 *srcp)
339 {
340 s16 offset;
341 u8 in_data = *srcp;
342 u16 insert_offset;
343
344 s16 *const LZByteTable = info->LZByteTable;
345 s16 *const LZOffsetTable = info->LZOffsetTable;
346 s16 *const LZEndTable = info->LZEndTable;
347 const u16 windowPos = info->windowPos;
348 const u16 windowLen = info->windowLen;
349
350 if (windowLen == 4096)
351 {
352 u8 out_data = *(srcp - 4096);
353 if ((LZByteTable[out_data] = LZOffsetTable[LZByteTable[out_data]]) == -1)
354 {
355 LZEndTable[out_data] = -1;
356 }
357 insert_offset = windowPos;
358 }
359 else
360 {
361 insert_offset = windowLen;
362 }
363
364 offset = LZEndTable[in_data];
365 if (offset == -1)
366 {
367 LZByteTable[in_data] = (s16)insert_offset;
368 }
369 else
370 {
371 LZOffsetTable[offset] = (s16)insert_offset;
372 }
373 LZEndTable[in_data] = (s16)insert_offset;
374 LZOffsetTable[insert_offset] = -1;
375
376 if (windowLen == 4096)
377 {
378 info->windowPos = (u16)((windowPos + 1) % 0x1000);
379 }
380 else
381 {
382 info->windowLen++;
383 }
384 }
385
386 //--------------------------------------------------------
387 // Slide the dictionary n bytes
388 //--------------------------------------------------------
LZSlide(LZCompressInfo * info,const u8 * srcp,u32 n)389 static inline void LZSlide(LZCompressInfo * info, const u8 *srcp, u32 n)
390 {
391 u32 i;
392
393 for (i = 0; i < n; i++)
394 {
395 SlideByte(info, srcp++);
396 }
397 }
398
399
400 //===========================================================================
401 // Run-Length Encoding
402 //===========================================================================
403
404 /*---------------------------------------------------------------------------*
405 Name: CXCompressRL
406
407 Description: Function that performs run-length compression
408
409 Arguments: srcp: Pointer to compression source data
410 size: Size of compression source data
411 dstp: Pointer to destination for compressed data
412 The buffer must be larger than the size of the compression source data.
413
414 Returns: The data size after compression.
415 If compressed data is larger than original data, compression is terminated and 0 is returned.
416 *---------------------------------------------------------------------------*/
CXCompressRL(const u8 * srcp,u32 size,u8 * dstp)417 u32 CXCompressRL(const u8 *srcp, u32 size, u8 *dstp)
418 {
419 u32 RLDstCount; // Number of bytes of compressed data
420 u32 RLSrcCount; // Processed data volume of the compression target data (in bytes)
421 u8 RLCompFlag; // 1 if performing run-length encoding
422 u8 runLength; // Run length
423 u8 rawDataLength; // Length of non-run data
424 u32 i;
425
426 const u8 *startp; // Point to the start of compression target data for each process loop
427
428 ASSERT( srcp != NULL );
429 ASSERT( dstp != NULL );
430 ASSERT( size > 4 );
431
432 // Data header (For the size after decompression)
433 // To create the same output data as Nitro, work on on the endian.
434 if ( size < (1 << 24) )
435 {
436 *(u32 *)dstp = CXiConvertEndian_(size << 8 | CX_COMPRESSION_RL); // Data header
437 RLDstCount = 4;
438 }
439 else
440 // Use extended header if the size is larger than 24 bits
441 {
442 *(u32 *)dstp = CXiConvertEndian_(CX_COMPRESSION_RL); // Data header
443 *(u32 *)(dstp + 4) = CXiConvertEndian_(size); // Extend header size
444 RLDstCount = 8;
445 }
446 RLSrcCount = 0;
447 rawDataLength = 0;
448 RLCompFlag = 0;
449
450 while (RLSrcCount < size)
451 {
452 startp = &srcp[RLSrcCount]; // Set compression target data
453
454 for (i = 0; i < 128; i++) // Data volume that can be expressed in 7 bits is 0 to 127
455 {
456 // Reach the end of the compression target data
457 if (RLSrcCount + rawDataLength >= size)
458 {
459 rawDataLength = (u8)(size - RLSrcCount);
460 break;
461 }
462
463 if (RLSrcCount + rawDataLength + 2 < size)
464 {
465 if (startp[i] == startp[i + 1] && startp[i] == startp[i + 2])
466 {
467 RLCompFlag = 1;
468 break;
469 }
470 }
471 rawDataLength++;
472 }
473
474 // Store data that will not be encoded
475 // If the 8th bit of the data length storage byte is 0, this is a data sequence that is not encoded.
476 // The data length is x - 1, so 0-127 becomes 1-128.
477 if (rawDataLength)
478 {
479 if (RLDstCount + rawDataLength + 1 >= size) // Quit on error if size becomes larger than source
480 {
481 return 0;
482 }
483 dstp[RLDstCount++] = (u8)(rawDataLength - 1); // Store "data length - 1" (7 bits)
484 for (i = 0; i < rawDataLength; i++)
485 {
486 dstp[RLDstCount++] = srcp[RLSrcCount++];
487 }
488 rawDataLength = 0;
489 }
490
491 // Run-Length Encoding
492 if (RLCompFlag)
493 {
494 runLength = 3;
495 for (i = 3; i < 128 + 2; i++)
496 {
497 // Reach the end of the data for compression
498 if (RLSrcCount + runLength >= size)
499 {
500 runLength = (u8)(size - RLSrcCount);
501 break;
502 }
503
504 // If run was interrupted
505 if (srcp[RLSrcCount] != srcp[RLSrcCount + runLength])
506 {
507 break;
508 }
509 // Run continues
510 runLength++;
511 }
512
513 // If the 8th bit of the data length storage byte is 1, this is an encoded data sequence
514 if (RLDstCount + 2 >= size) // Quit on error if size becomes larger than source
515 {
516 return 0;
517 }
518 dstp[RLDstCount++] = (u8)(0x80 | (runLength - 3)); // Add 3, and store from 3 to 130
519 dstp[RLDstCount++] = srcp[RLSrcCount];
520 RLSrcCount += runLength;
521 RLCompFlag = 0;
522 }
523 }
524
525 // 4-byte boundary alignment
526 // Data size does not include Data0, used for alignment
527 i = 0;
528 while ((RLDstCount + i) & 0x3)
529 {
530 dstp[RLDstCount + i] = 0;
531 i++;
532 }
533 return RLDstCount;
534 }
535
536
537 //===========================================================================
538 // Huffman encoding
539 //===========================================================================
540 #define HUFF_END_L 0x80
541 #define HUFF_END_R 0x40
542
543 typedef struct
544 {
545 u32 Freq; // Frequency of occurrence
546 u16 No; // Data number
547 s16 PaNo; // Parent number
548 s16 ChNo[2]; // Child Number (0: left side, 1: right side)
549 u16 PaDepth; // Parent node depth
550 u16 LeafDepth; // Depth to leaf
551 u32 HuffCode; // Huffman code
552 u8 Bit; // Node's bit data
553 u8 _padding;
554 u16 HWord; // For each intermediate node, the amount of memory needed to store in HuffTree the subtree that has that node as its root
555 }
556 HuffData; // Total of 24 bytes
557
558 typedef struct
559 {
560 u8 leftOffsetNeed; // 1 if offset to left child node is required
561 u8 rightOffsetNeed; // 1 if an offset to the right child node is required
562 u16 leftNodeNo; // The left child node's number
563 u16 rightNodeNo; // Right child node's number
564 }
565 HuffTreeCtrlData; // Total of 6 bytes
566
567 // Structure of the Huffman work buffer
568 typedef struct
569 {
570 HuffData *huffTable; // huffTable[ 512 ]; 12288B
571 u8 *huffTree; // huffTree[ 256 * 2 ]; 512B
572 HuffTreeCtrlData *huffTreeCtrl; // huffTreeCtrl[ 256 ]; 1536B
573 u8 huffTreeTop; //
574 u8 padding_[3]; //
575 }
576 HuffCompressionInfo; // Total is 14340B
577
578 static void HuffInitTable( HuffCompressionInfo* info, void* work, u16 dataNum );
579 static void HuffCountData( HuffData* table, const u8 *srcp, u32 size, u8 bitSize );
580 static u16 HuffConstructTree( HuffData* table, u32 dataNum );
581 static u32 HuffConvertData( const HuffData *table, const u8* srcp, u8* dstp, u32 srcSize, u32 maxSize, u8 bitSize );
582
583 static void HuffAddParentDepthToTable( HuffData *table, u16 leftNo, u16 rightNo );
584 static void HuffAddCodeToTable ( HuffData *table, u16 nodeNo, u32 paHuffCode );
585 static u8 HuffAddCountHWordToTable ( HuffData *table, u16 nodeNo );
586
587 static void HuffMakeHuffTree ( HuffCompressionInfo* info, u16 rootNo );
588 static void HuffMakeSubsetHuffTree ( HuffCompressionInfo* info, u16 huffTreeNo, u8 rightNodeFlag );
589 static u8 HuffRemainingNodeCanSetOffset( HuffCompressionInfo* info, u8 costHWord );
590 static void HuffSetOneNodeOffset ( HuffCompressionInfo* info, u16 huffTreeNo, u8 rightNodeFlag );
591
592
593 /*---------------------------------------------------------------------------*
594 Name: CXCompressHuffman
595
596 Description: Function that performs Huffman compression
597
598 Arguments: srcp: Pointer to compression source data
599 size: Size of compression source data
600 dstp: Pointer to destination for compressed data
601 The buffer must be larger than the size of the compression source data.
602 huffBitSize: The number of bits to encode.
603 work: Work buffer for Huffman compression
604
605 Returns: The data size after compression.
606 If compressed data is larger than original data, compression is terminated and 0 is returned.
607 *---------------------------------------------------------------------------*/
CXCompressHuffman(const u8 * srcp,u32 size,u8 * dstp,u8 huffBitSize,void * work)608 u32 CXCompressHuffman( const u8 *srcp, u32 size, u8 *dstp, u8 huffBitSize, void *work )
609 {
610 u32 huffDstCount; // Number of bytes of compressed data
611 s32 i;
612 u16 rootNo; // Binary tree's root number
613 u16 huffDataNum = (u16)(1 << huffBitSize); // 8->256, 4->16
614 u32 tableOffset;
615 HuffCompressionInfo info;
616
617 ASSERT( srcp != NULL );
618 ASSERT( dstp != NULL );
619 ASSERT( huffBitSize == 4 || huffBitSize == 8 );
620 ASSERT( work != NULL );
621 ASSERT( ((u32)work & 0x3) == 0 );
622 ASSERT( size > 4 );
623
624 // Initialize table
625 HuffInitTable( &info, work, huffDataNum );
626
627 // Check frequency of occurrence
628 HuffCountData( info.huffTable, srcp, size, huffBitSize );
629
630 // Create tree table
631 rootNo = HuffConstructTree( info.huffTable, huffDataNum );
632
633 // Create HuffTree
634 HuffMakeHuffTree( &info, rootNo );
635 info.huffTree[0] = --info.huffTreeTop;
636
637 // Data header
638 // To create the same compression data as Nitro, work on the endian.
639 if ( size < (1 << 24) )
640 {
641 *(u32 *)dstp = CXiConvertEndian_(size << 8 | CX_COMPRESSION_HUFFMAN | huffBitSize);
642 tableOffset = 4;
643 }
644 else
645 // Use extended header if the size is larger than 24 bits
646 {
647 *(u32 *)dstp = CXiConvertEndian_( (u32)(CX_COMPRESSION_HUFFMAN | huffBitSize) );
648 *(u32 *)(dstp + 4) = CXiConvertEndian_( size );
649 tableOffset = 8;
650 }
651 huffDstCount = tableOffset;
652
653 if ( huffDstCount + (info.huffTreeTop + 1) * 2 >= size ) // Quit on error if size becomes larger than source
654 {
655 return 0;
656 }
657
658 for ( i = 0; i < (u16)( (info.huffTreeTop + 1) * 2 ); i++ ) // Tree table
659 {
660 dstp[ huffDstCount++ ] = ((u8*)info.huffTree)[ i ];
661 }
662
663 // 4-byte boundary alignment
664 // Data0 used for alignment is included in data size (as per the decoder algorithm)
665 while ( huffDstCount & 0x3 )
666 {
667 if ( huffDstCount & 0x1 )
668 {
669 info.huffTreeTop++;
670 dstp[ tableOffset ]++;
671 }
672 dstp[ huffDstCount++ ] = 0;
673 }
674
675 // Data conversion via the Huffman table
676 {
677 u32 convSize = HuffConvertData( info.huffTable, srcp, &dstp[ huffDstCount ], size, size - huffDstCount, huffBitSize );
678 if ( convSize == 0 )
679 {
680 // Compression fails because the compressed data is larger than the source
681 return 0;
682 }
683 huffDstCount += convSize;
684 }
685
686 return huffDstCount;
687 }
688
689
690 /*---------------------------------------------------------------------------*
691 Name: HuffInitTable
692 Description: Initializes the Huffman table.
693 Arguments: table
694 size
695 Returns: None.
696 *---------------------------------------------------------------------------*/
HuffInitTable(HuffCompressionInfo * info,void * work,u16 dataNum)697 static void HuffInitTable( HuffCompressionInfo* info, void* work, u16 dataNum )
698 {
699 u32 i;
700 info->huffTable = (HuffData*)(work);
701 info->huffTree = (u8*)( (u32)work + sizeof(HuffData) * 512 );
702 info->huffTreeCtrl = (HuffTreeCtrlData*)( (u32)info->huffTree + sizeof(u8) * 512 );
703 info->huffTreeTop = 1;
704
705 // Initialize huffTable
706 {
707 HuffData* table = info->huffTable;
708
709 const HuffData HUFF_TABLE_INIT_DATA = { 0, 0, 0, {-1, -1}, 0, 0, 0, 0, 0 };
710 for ( i = 0; i < dataNum * 2U; i++ )
711 {
712 table[ i ] = HUFF_TABLE_INIT_DATA;
713 table[ i ].No = (u16)i;
714 }
715 }
716
717 // Initialize huffTree and huffTreeCtrl
718 {
719 const HuffTreeCtrlData HUFF_TREE_CTRL_INIT_DATA = { 1, 1, 0, 0 };
720 u8* huffTree = info->huffTree;
721 HuffTreeCtrlData* huffTreeCtrl = info->huffTreeCtrl;
722
723 for ( i = 0; i < 256; i++ )
724 {
725 huffTree[ i * 2 ] = 0;
726 huffTree[ i * 2 + 1 ] = 0;
727 huffTreeCtrl[ i ] = HUFF_TREE_CTRL_INIT_DATA;
728 }
729 }
730 }
731
732
733 /*---------------------------------------------------------------------------*
734 Name: HuffCountData
735 Description: Count of frequency of appearance.
736 Arguments: table
737 *srcp
738 size
739 bitSize
740 Returns: None.
741 *---------------------------------------------------------------------------*/
HuffCountData(HuffData * table,const u8 * srcp,u32 size,u8 bitSize)742 static void HuffCountData( HuffData* table, const u8 *srcp, u32 size, u8 bitSize )
743 {
744 u32 i;
745 u8 tmp;
746
747 if ( bitSize == 8 )
748 {
749 for ( i = 0; i < size; i++ )
750 {
751 table[ srcp[ i ] ].Freq++; // 8-bit encoding
752 }
753 }
754 else
755 {
756 for ( i = 0; i < size; i++ ) // 4-bit encoding
757 {
758 tmp = (u8)( (srcp[ i ] & 0xf0) >> 4 );
759 table[ tmp ].Freq++; // Store from upper 4 bits first // Either is OK
760 tmp = (u8)( srcp[ i ] & 0x0f );
761 table[ tmp ].Freq++; // The problem is the encoding
762 }
763 }
764 }
765
766
767 /*---------------------------------------------------------------------------*
768 Name: HuffConstructTree
769 Description: Constructs a Huffman tree
770 Arguments: *table
771 dataNum
772 Returns: None.
773 *---------------------------------------------------------------------------*/
HuffConstructTree(HuffData * table,u32 dataNum)774 static u16 HuffConstructTree( HuffData *table, u32 dataNum )
775 {
776 s32 i;
777 s32 leftNo, rightNo; // Node's numbers at time of binary tree's creation
778 u16 tableTop = (u16)dataNum; // The table top number at time of table's creation
779 u16 rootNo; // Binary tree's root number
780 u16 nodeNum; // Number of valid nodes
781
782 leftNo = -1;
783 rightNo = -1;
784 while ( 1 )
785 {
786 // Search for two subtree vertices with low Freq value. At least one should be found.
787 // Search child vertices (left)
788 for ( i = 0; i < tableTop; i++ )
789 {
790 if ( ( table[i].Freq == 0 ) ||
791 ( table[i].PaNo != 0 ) )
792 {
793 continue;
794 }
795
796 if ( leftNo < 0 )
797 {
798 leftNo = i;
799 }
800 else if ( table[i].Freq < table[ leftNo ].Freq )
801 {
802 leftNo = i;
803 }
804 }
805
806 // Search child vertices (right)
807 for ( i = 0; i < tableTop; i++ )
808 {
809 if ( ( table[i].Freq == 0 ) ||
810 ( table[i].PaNo != 0 ) ||
811 ( i == leftNo ) )
812 {
813 continue;
814 }
815
816 if ( rightNo < 0 )
817 {
818 rightNo = i;
819 }
820 else if ( table[i].Freq < table[rightNo].Freq )
821 {
822 rightNo = i;
823 }
824 }
825
826 // If only one, then end table creation
827 if ( rightNo < 0 )
828 {
829 // When only one type of value exists, then create one node that gives the same value for both 0 and 1.
830 if ( tableTop == dataNum )
831 {
832 table[ tableTop ].Freq = table[ leftNo ].Freq;
833 table[ tableTop ].ChNo[0] = (s16)leftNo;
834 table[ tableTop ].ChNo[1] = (s16)leftNo;
835 table[ tableTop ].LeafDepth = 1;
836 table[ leftNo ].PaNo = (s16)tableTop;
837 table[ leftNo ].Bit = 0;
838 table[ leftNo ].PaDepth = 1;
839 }
840 else
841 {
842 tableTop--;
843 }
844 rootNo = tableTop;
845 nodeNum = (u16)( (rootNo - dataNum + 1) * 2 + 1 );
846 break;
847 }
848
849 // Create vertex that combines left subtree and right subtree
850 table[ tableTop ].Freq = table[ leftNo ].Freq + table[ rightNo ].Freq;
851 table[ tableTop ].ChNo[0] = (s16)leftNo;
852 table[ tableTop ].ChNo[1] = (s16)rightNo;
853 if ( table[ leftNo ].LeafDepth > table[ rightNo ].LeafDepth )
854 {
855 table[ tableTop ].LeafDepth = (u16)( table[ leftNo ].LeafDepth + 1 );
856 }
857 else
858 {
859 table[ tableTop ].LeafDepth = (u16)( table[ rightNo ].LeafDepth + 1 );
860 }
861
862 table[ leftNo ].PaNo = table[ rightNo ].PaNo = (s16)(tableTop);
863 table[ leftNo ].Bit = 0;
864 table[ rightNo ].Bit = 1;
865
866 HuffAddParentDepthToTable( table, (u16)leftNo, (u16)rightNo );
867
868 tableTop++;
869 leftNo = rightNo = -1;
870 }
871
872 // Generate Huffman code (In table[i].HuffCode)
873 HuffAddCodeToTable( table, rootNo, 0x00 ); // The Huffman code is the code with HuffCode's lower bits masked for PaDepth bits
874
875 // For each intermediate node, calculate the amount of memory needed to store as a huffTree the subtree that has that node as the root.
876 HuffAddCountHWordToTable( table, rootNo );
877
878 return rootNo;
879 }
880
881 //-----------------------------------------------------------------------
882 // When creating binary tree and when combining subtrees, add 1 to the depth of every node in the subtree.
883 //-----------------------------------------------------------------------
HuffAddParentDepthToTable(HuffData * table,u16 leftNo,u16 rightNo)884 static void HuffAddParentDepthToTable( HuffData *table, u16 leftNo, u16 rightNo )
885 {
886 table[ leftNo ].PaDepth++;
887 table[ rightNo ].PaDepth++;
888
889 if ( table[ leftNo ].LeafDepth != 0 )
890 {
891 HuffAddParentDepthToTable( table, (u16)table[ leftNo ].ChNo[0], (u16)table[ leftNo ].ChNo[1] );
892 }
893 if ( table[ rightNo ].LeafDepth != 0 )
894 {
895 HuffAddParentDepthToTable( table, (u16)table[ rightNo ].ChNo[0], (u16)table[ rightNo ].ChNo[1] );
896 }
897 }
898
899 //-----------------------------------------------------------------------
900 // Create Huffman code
901 //-----------------------------------------------------------------------
HuffAddCodeToTable(HuffData * table,u16 nodeNo,u32 paHuffCode)902 static void HuffAddCodeToTable( HuffData* table, u16 nodeNo, u32 paHuffCode )
903 {
904 table[ nodeNo ].HuffCode = (paHuffCode << 1) | table[ nodeNo ].Bit;
905
906 if ( table[ nodeNo ].LeafDepth != 0 )
907 {
908 HuffAddCodeToTable( table, (u16)table[ nodeNo ].ChNo[0], table[ nodeNo ].HuffCode );
909 HuffAddCodeToTable( table, (u16)table[ nodeNo ].ChNo[1], table[ nodeNo ].HuffCode );
910 }
911 }
912
913
914 //-----------------------------------------------------------------------
915 // Data volume required by intermediate nodes to create huffTree
916 //-----------------------------------------------------------------------
HuffAddCountHWordToTable(HuffData * table,u16 nodeNo)917 static u8 HuffAddCountHWordToTable( HuffData *table, u16 nodeNo)
918 {
919 u8 leftHWord, rightHWord;
920
921 switch ( table[ nodeNo ].LeafDepth )
922 {
923 case 0:
924 return 0;
925 case 1:
926 leftHWord = rightHWord = 0;
927 break;
928 default:
929 leftHWord = HuffAddCountHWordToTable( table, (u16)table[nodeNo].ChNo[0] );
930 rightHWord = HuffAddCountHWordToTable( table, (u16)table[nodeNo].ChNo[1] );
931 break;
932 }
933
934 table[ nodeNo ].HWord = (u16)( leftHWord + rightHWord + 1 );
935 return (u8)( leftHWord + rightHWord + 1 );
936 }
937
938
939 //-----------------------------------------------------------------------
940 // Create Huffman code table
941 //-----------------------------------------------------------------------
HuffMakeHuffTree(HuffCompressionInfo * info,u16 rootNo)942 static void HuffMakeHuffTree( HuffCompressionInfo* info, u16 rootNo )
943 {
944 s16 i;
945 s16 costHWord, tmpCostHWord; // Make subtree code table for most-costly node when subtree code table has not been created.
946 s16 costOffsetNeed, tmpCostOffsetNeed;
947 s16 costMaxKey, costMaxRightFlag; // Information for singling out the least costly node from HuffTree
948 u8 offsetNeedNum;
949 u8 tmpKey, tmpRightFlag;
950
951 info->huffTreeTop = 1;
952 costOffsetNeed = 0;
953
954 info->huffTreeCtrl[0].leftOffsetNeed = 0; // Do not use (used as table size)
955 info->huffTreeCtrl[0].rightNodeNo = rootNo;
956
957 while ( 1 ) // Until return
958 {
959 // Calculate the number of nodes whose offset needs setting
960 offsetNeedNum = 0;
961 for ( i = 0; i < info->huffTreeTop; i++ )
962 {
963 if ( info->huffTreeCtrl[ i ].leftOffsetNeed )
964 {
965 offsetNeedNum++;
966 }
967 if ( info->huffTreeCtrl[ i ].rightOffsetNeed )
968 {
969 offsetNeedNum++;
970 }
971 }
972
973 // Search for node with greatest cost
974 costHWord = -1;
975 costMaxKey = -1;
976 tmpKey = 0;
977 tmpRightFlag = 0;
978
979 for ( i = 0; i < info->huffTreeTop; i++ )
980 {
981 tmpCostOffsetNeed = (u8)( info->huffTreeTop - i );
982
983 // Evaluate cost of left child node
984 if (info->huffTreeCtrl[i].leftOffsetNeed)
985 {
986 tmpCostHWord = (s16)info->huffTable[ info->huffTreeCtrl[i].leftNodeNo ].HWord;
987
988 if ( (tmpCostHWord + offsetNeedNum) > 64 )
989 {
990 goto leftCostEvaluationEnd;
991 }
992 if ( ! HuffRemainingNodeCanSetOffset( info, (u8)tmpCostHWord ) )
993 {
994 goto leftCostEvaluationEnd;
995 }
996 if ( tmpCostHWord > costHWord )
997 {
998 costMaxKey = i;
999 costMaxRightFlag = 0;
1000 }
1001 else if ( (tmpCostHWord == costHWord) && (tmpCostOffsetNeed > costOffsetNeed) )
1002 {
1003 costMaxKey = i;
1004 costMaxRightFlag = 0;
1005 }
1006 }
1007 leftCostEvaluationEnd:{}
1008
1009 // Evaluate cost of right child node
1010 if ( info->huffTreeCtrl[i].rightOffsetNeed)
1011 {
1012 tmpCostHWord = (s16)info->huffTable[info->huffTreeCtrl[i].rightNodeNo].HWord;
1013
1014 if ( (tmpCostHWord + offsetNeedNum) > 64 )
1015 {
1016 goto rightCostEvaluationEnd;
1017 }
1018 if ( ! HuffRemainingNodeCanSetOffset( info, (u8)tmpCostHWord ) )
1019 {
1020 goto rightCostEvaluationEnd;
1021 }
1022 if ( tmpCostHWord > costHWord )
1023 {
1024 costMaxKey = i;
1025 costMaxRightFlag = 1;
1026 }
1027 else if ( (tmpCostHWord == costHWord) && (tmpCostOffsetNeed > costOffsetNeed) )
1028 {
1029 costMaxKey = i;
1030 costMaxRightFlag = 1;
1031 }
1032 }
1033 rightCostEvaluationEnd:{}
1034 }
1035
1036 // Store entire subtree in huffTree
1037 if ( costMaxKey >= 0 )
1038 {
1039 HuffMakeSubsetHuffTree( info, (u8)costMaxKey, (u8)costMaxRightFlag);
1040 goto nextTreeMaking;
1041 }
1042 else
1043 {
1044 // Search for node with largest required offset
1045 for ( i = 0; i < info->huffTreeTop; i++ )
1046 {
1047 u16 tmp = 0;
1048 tmpRightFlag = 0;
1049 if (info->huffTreeCtrl[i].leftOffsetNeed)
1050 {
1051 tmp = info->huffTable[ info->huffTreeCtrl[i].leftNodeNo ].HWord;
1052 }
1053 if (info->huffTreeCtrl[i].rightOffsetNeed)
1054 {
1055 if ( info->huffTable[info->huffTreeCtrl[i].rightNodeNo ].HWord > tmp )
1056 {
1057 tmpRightFlag = 1;
1058 }
1059 }
1060 if ( (tmp != 0) || (tmpRightFlag) )
1061 {
1062 HuffSetOneNodeOffset( info, (u8)i, tmpRightFlag);
1063 goto nextTreeMaking;
1064 }
1065 }
1066 }
1067 return;
1068 nextTreeMaking:{}
1069 }
1070 }
1071
1072 //-----------------------------------------------------------------------
1073 // Store entire subtree in huffTree
1074 //-----------------------------------------------------------------------
HuffMakeSubsetHuffTree(HuffCompressionInfo * info,u16 huffTreeNo,u8 rightNodeFlag)1075 static void HuffMakeSubsetHuffTree( HuffCompressionInfo* info, u16 huffTreeNo, u8 rightNodeFlag )
1076 {
1077 u8 i;
1078
1079 i = info->huffTreeTop;
1080 HuffSetOneNodeOffset( info, huffTreeNo, rightNodeFlag);
1081
1082 if ( rightNodeFlag )
1083 {
1084 info->huffTreeCtrl[ huffTreeNo ].rightOffsetNeed = 0;
1085 }
1086 else
1087 {
1088 info->huffTreeCtrl[ huffTreeNo ].leftOffsetNeed = 0;
1089 }
1090
1091 while ( i < info->huffTreeTop )
1092 {
1093 if ( info->huffTreeCtrl[ i ].leftOffsetNeed )
1094 {
1095 HuffSetOneNodeOffset( info, i, 0);
1096 info->huffTreeCtrl[ i ].leftOffsetNeed = 0;
1097 }
1098 if ( info->huffTreeCtrl[ i ].rightOffsetNeed )
1099 {
1100 HuffSetOneNodeOffset( info, i, 1);
1101 info->huffTreeCtrl[ i ].rightOffsetNeed = 0;
1102 }
1103 i++;
1104 }
1105 }
1106
1107 //-----------------------------------------------------------------------
1108 // Check if there is any problems with huffTree construction if subtree of obtained data size is decompressed.
1109 //-----------------------------------------------------------------------
HuffRemainingNodeCanSetOffset(HuffCompressionInfo * info,u8 costHWord)1110 static u8 HuffRemainingNodeCanSetOffset( HuffCompressionInfo* info, u8 costHWord )
1111 {
1112 u8 i;
1113 s16 capacity;
1114
1115 capacity = (s16)( 64 - costHWord );
1116
1117 // The offset value is larger for smaller values of i, so you should calculate without sorting, with i=0 -> huffTreeTop
1118 for ( i = 0; i < info->huffTreeTop; i++ )
1119 {
1120 if ( info->huffTreeCtrl[i].leftOffsetNeed )
1121 {
1122 if ( (info->huffTreeTop - i) <= capacity )
1123 {
1124 capacity--;
1125 }
1126 else
1127 {
1128 return 0;
1129 }
1130 }
1131 if ( info->huffTreeCtrl[i].rightOffsetNeed )
1132 {
1133 if ( (info->huffTreeTop - i) <= capacity )
1134 {
1135 capacity--;
1136 }
1137 else
1138 {
1139 return 0;
1140 }
1141 }
1142 }
1143
1144 return 1;
1145 }
1146
1147 /*---------------------------------------------------------------------------*
1148 Name: HuffSetOneNodeOffset
1149 Description: Create Huffman code table for one node
1150 Arguments: *info : Pointer to data for constructing a Huffman tree
1151 huffTreeNo
1152 rightNodeFlag : Flag for whether node is at right
1153 Returns: None.
1154 *---------------------------------------------------------------------------*/
HuffSetOneNodeOffset(HuffCompressionInfo * info,u16 huffTreeNo,u8 rightNodeFlag)1155 static void HuffSetOneNodeOffset( HuffCompressionInfo* info, u16 huffTreeNo, u8 rightNodeFlag )
1156 {
1157 u16 nodeNo;
1158 u8 offsetData = 0;
1159
1160 HuffData* huffTable = info->huffTable;
1161 u8* huffTree = info->huffTree;
1162 HuffTreeCtrlData* huffTreeCtrl = info->huffTreeCtrl;
1163 u8 huffTreeTop = info->huffTreeTop;
1164
1165 if (rightNodeFlag)
1166 {
1167 nodeNo = huffTreeCtrl[ huffTreeNo ].rightNodeNo;
1168 huffTreeCtrl[ huffTreeNo ].rightOffsetNeed = 0;
1169 }
1170 else
1171 {
1172 nodeNo = huffTreeCtrl[ huffTreeNo ].leftNodeNo;
1173 huffTreeCtrl [huffTreeNo ].leftOffsetNeed = 0;
1174 }
1175
1176 // Left child node
1177 if ( huffTable[ huffTable[nodeNo].ChNo[0] ].LeafDepth == 0)
1178 {
1179 offsetData |= 0x80;
1180 huffTree[ huffTreeTop * 2 + 0 ] = (u8)huffTable[ nodeNo ].ChNo[0];
1181 huffTreeCtrl[ huffTreeTop ].leftNodeNo = (u8)huffTable[ nodeNo ].ChNo[0];
1182 huffTreeCtrl[ huffTreeTop ].leftOffsetNeed = 0; // Offset no longer required
1183 }
1184 else
1185 {
1186 huffTreeCtrl[ huffTreeTop ].leftNodeNo = (u16)huffTable[ nodeNo ].ChNo[0]; // Offset is required
1187 }
1188
1189 // Right child node
1190 if ( huffTable[ huffTable[ nodeNo ].ChNo[1] ].LeafDepth == 0 )
1191 {
1192 offsetData |= 0x40;
1193 huffTree[ huffTreeTop * 2 + 1 ] = (u8)huffTable[nodeNo].ChNo[1];
1194 huffTreeCtrl[ huffTreeTop ].rightNodeNo = (u8)huffTable[ nodeNo ].ChNo[1];
1195 huffTreeCtrl[ huffTreeTop ].rightOffsetNeed = 0; // Offset no longer required
1196 }
1197 else
1198 {
1199 huffTreeCtrl[ huffTreeTop ].rightNodeNo = (u16)huffTable[ nodeNo ].ChNo[1]; // Offset is required
1200 }
1201
1202 offsetData |= (u8)( huffTreeTop - huffTreeNo - 1 );
1203 huffTree[ huffTreeNo * 2 + rightNodeFlag ] = offsetData;
1204
1205 info->huffTreeTop++;
1206 }
1207
1208
1209 /*---------------------------------------------------------------------------*
1210 Name: HuffConvertData
1211 Description: Data conversion based on Huffman table.
1212 Arguments: *table
1213 srcp
1214 dstp
1215 srcSize
1216 bitSize
1217 Returns: None.
1218 *---------------------------------------------------------------------------*/
HuffConvertData(const HuffData * table,const u8 * srcp,u8 * dstp,u32 srcSize,u32 maxSize,u8 bitSize)1219 static u32 HuffConvertData( const HuffData *table, const u8* srcp, u8* dstp, u32 srcSize, u32 maxSize, u8 bitSize )
1220 {
1221 u32 i, ii, iii;
1222 u8 srcTmp;
1223 u32 bitStream = 0;
1224 u32 streamLength = 0;
1225 u32 dstSize = 0;
1226
1227 // Huffman encoding
1228 for ( i = 0; i < srcSize; i++ )
1229 { // Data compression
1230 u8 val = srcp[ i ];
1231 if ( bitSize == 8 )
1232 { // 8-bit Huffman
1233 bitStream = (bitStream << table[ val ].PaDepth) | table[ val ].HuffCode;
1234 streamLength += table[ val ].PaDepth;
1235
1236 if ( dstSize + (streamLength / 8) >= maxSize )
1237 {
1238 // Error if size becomes larger than source
1239 return 0;
1240 }
1241
1242 for ( ii = 0; ii < streamLength / 8; ii++ )
1243 {
1244 dstp[ dstSize++ ] = (u8)(bitStream >> (streamLength - (ii + 1) * 8));
1245 }
1246 streamLength %= 8;
1247 }
1248 else // 4-bit Huffman
1249 {
1250 for ( ii = 0; ii < 2; ii++ )
1251 {
1252 if ( ii )
1253 {
1254 srcTmp = (u8)( val >> 4 ); // First four bits come later
1255 }
1256 else
1257 {
1258 srcTmp = (u8)( val & 0x0F ); // Last four bits come first (because the decoder accesses in a Little-Endian method)
1259 }
1260 bitStream = (bitStream << table[ srcTmp ].PaDepth) | table[ srcTmp ].HuffCode;
1261 streamLength += table[ srcTmp ].PaDepth;
1262 if ( dstSize + (streamLength / 8) >= maxSize )
1263 {
1264 // Error if size becomes larger than source
1265 return 0;
1266 }
1267 for ( iii = 0; iii < streamLength / 8; iii++ )
1268 {
1269 dstp[ dstSize++ ] = (u8)(bitStream >> (streamLength - (iii + 1) * 8));
1270 }
1271 streamLength %= 8;
1272 }
1273 }
1274 }
1275
1276 if ( streamLength != 0 )
1277 {
1278 if ( dstSize + 1 >= maxSize )
1279 {
1280 // Error if size becomes larger than source
1281 return 0;
1282 }
1283 dstp[ dstSize++ ] = (u8)( bitStream << (8 - streamLength) );
1284 }
1285
1286 // 4-byte boundary alignment
1287 // Data0 for alignment is included in data size
1288 // This is special to Huffman encoding! Data subsequent to the alignment-boundary data is stored because little-endian conversion is used.
1289 while ( dstSize & 0x3 )
1290 {
1291 dstp[ dstSize++ ] = 0;
1292 }
1293
1294 // Little endian conversion
1295 for ( i = 0; i < dstSize / 4; i++ )
1296 {
1297 u8 tmp;
1298 tmp = dstp[i * 4 + 0];
1299 dstp[i * 4 + 0] = dstp[i * 4 + 3];
1300 dstp[i * 4 + 3] = tmp; // Swap
1301 tmp = dstp[i * 4 + 1];
1302 dstp[i * 4 + 1] = dstp[i * 4 + 2];
1303 dstp[i * 4 + 2] = tmp; // Swap
1304 }
1305 return dstSize;
1306 }
1307
1308