1 /*---------------------------------------------------------------------------*
2 Project: TwlSDK - MI -
3 File: mi_compress.c
4
5 Copyright 2003-2008 Nintendo. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13 $Date:: 2008-09-18#$
14 $Rev: 8573 $
15 $Author: okubata_ryoma $
16 *---------------------------------------------------------------------------*/
17
18 #include <nitro/types.h>
19 #include <nitro/mi/uncompress.h>
20 #include <nitro/mi/compress.h>
21
22 static u32 SearchLZ(const u8 *startp, const u8 *nextp, u32 remainSize, u16 *offset, u32 maxLength );
23
24 //===========================================================================
25 // LZ Encoding
26 //===========================================================================
27
28 /*---------------------------------------------------------------------------*
29 Name: MI_CompressLZ
30
31 Description: Performs LZ77 compression.
32
33 Arguments: srcp: Pointer to compression source data
34 size: Size of compression source data
35 dstp Pointer to compressed data
36 The buffer must be larger than the size of the compression source data.
37
38 Returns: The data size after compression.
39 If compressed data is larger than original data, compression is terminated and 0 is returned.
40 *---------------------------------------------------------------------------*/
MI_CompressLZImpl(const u8 * srcp,u32 size,u8 * dstp,BOOL exFormat)41 u32 MI_CompressLZImpl(const u8 *srcp, u32 size, u8 *dstp, BOOL exFormat)
42 {
43 u32 LZDstCount; // Number of bytes of compressed data
44 u32 LZSrcCount; // Processed data volume of the compression target data (in bytes)
45 const u8 *srcStartp;
46 u8 LZCompFlags; // Flag sequence indicating whether there is a compression
47 u8 *LZCompFlagsp; // Point to memory regions storing LZCompFlags
48 u16 lastOffset; // Offset to matching data (the longest matching data at the time)
49 u32 lastLength; // Length of matching data (the longest matching data at the time)
50 u8 i;
51 u32 dstMax;
52 const u32 MAX_LENGTH = (exFormat)? (0xFFFF + 0xFF + 0xF + 3U) : (0xF + 3U);
53
54 SDK_ALIGN2_ASSERT(srcp);
55
56 *(u32 *)dstp = size << 8 | MI_COMPRESSION_LZ | (exFormat? 1 : 0); // Data header
57 dstp += 4;
58 LZDstCount = 4;
59 LZSrcCount = 0;
60 srcStartp = srcp;
61 dstMax = size;
62
63 while (size > 0)
64 {
65 LZCompFlags = 0;
66 LZCompFlagsp = dstp++; // Destination for storing flag sequence
67 LZDstCount++;
68
69 // Since flag sequence is stored as 8-bit data, loop eight times
70 for (i = 0; i < 8; i++)
71 {
72 LZCompFlags <<= 1; // No meaning for the first time (i=0)
73 if (size <= 0)
74 {
75 // When reached the end, quit after shifting flag to the end
76 continue;
77 }
78
79 if ((lastLength = SearchLZ(srcStartp, srcp, size, &lastOffset, MAX_LENGTH)) != 0)
80 {
81 u32 length;
82 // Enable flag if compression is possible
83 LZCompFlags |= 0x1;
84
85 if (LZDstCount + 2 >= dstMax) // Quit on error if size becomes larger than source
86 {
87 return 0;
88 }
89
90 if ( exFormat )
91 {
92 if ( lastLength >= 0xFF + 0xF + 3 )
93 {
94 length = (u32)( lastLength - 0xFF - 0xF - 3 );
95 *dstp++ = (u8)( 0x10 | (length >> 12) );
96 *dstp++ = (u8)( length >> 4 );
97 LZDstCount += 2;
98 }
99 else if ( lastLength >= 0xF + 2 )
100 {
101 length = (u32)( lastLength - 0xF - 2 );
102 *dstp++ = (u8)( length >> 4 );
103 LZDstCount += 1;
104 }
105 else
106 {
107 length = (u32)( lastLength - 1 );
108 }
109 }
110 else
111 {
112 length = (u32)( lastLength - 3 );
113 }
114
115 // Divide offset into upper 4 bits and lower 8 bits and store
116 *dstp++ = (u8)( length << 4 | (lastOffset - 1) >> 8 );
117 *dstp++ = (u8)((lastOffset - 1) & 0xff);
118 LZDstCount += 2;
119 srcp += lastLength;
120 size -= lastLength;
121 }
122 else
123 {
124 // No compression
125 if (LZDstCount + 1 >= dstMax) // Quit on error if size becomes larger than source
126 {
127 return 0;
128 }
129 *dstp++ = *srcp++;
130 size--;
131 LZDstCount++;
132 }
133 } // Completed eight loops
134 *LZCompFlagsp = LZCompFlags; // Store flag series
135 }
136
137 // 4-byte boundary alignment
138 // Does not include Data0 used for alignment in data size
139 i = 0;
140 while ((LZDstCount + i) & 0x3)
141 {
142 *dstp++ = 0;
143 i++;
144 }
145
146 return LZDstCount;
147 }
148
149 //--------------------------------------------------------
150 // With LZ77 Compression, searches for the longest matching string from the slide window.
151 // Arguments: startp Pointer to starting position of data
152 // nextp Pointer to data where search will start
153 // remainSize Size of remaining data
154 // offset Pointer to region storing matched offset
155 // Return: TRUE if matching string is found
156 // FALSE if not found.
157 //--------------------------------------------------------
SearchLZ(const u8 * startp,const u8 * nextp,u32 remainSize,u16 * offset,u32 maxLength)158 static u32 SearchLZ(const u8 *startp, const u8 *nextp, u32 remainSize, u16 *offset, u32 maxLength )
159 {
160 const u8 *searchp;
161 const u8 *headp, *searchHeadp;
162 u16 maxOffset;
163 u32 currLength = 2;
164 u32 tmpLength;
165
166 if (remainSize < 3)
167 {
168 return 0;
169 }
170
171 // VRAM is accessed in units of 2 bytes (becuase sometimes data is read from VRAM), so the search must start 2 bytes prior to the search target
172 //
173 //
174 // Because the offset is stored in 12 bits, the value is 4096 or less
175 searchp = nextp - 4096;
176 if (searchp < startp)
177 {
178 searchp = startp;
179 }
180
181 while (nextp - searchp >= 2)
182 {
183 headp = nextp;
184 searchHeadp = searchp;
185 tmpLength = 0;
186
187 while ((*headp != *searchp) || (*(headp + 1) != *(searchp + 1))
188 || (*(headp + 2) != *(searchp + 2)))
189 {
190 searchp++;
191 if (nextp - searchp < 2)
192 {
193 goto search_end;
194 }
195 }
196 searchHeadp = searchp + 3;
197 headp += 3;
198 tmpLength = 3;
199
200 // Increments the compression size until the data ends or different data is encountered
201 while (((u32)(headp - nextp) < remainSize) && (*headp == *searchHeadp))
202 {
203 headp++;
204 searchHeadp++;
205 tmpLength++;
206
207 // Because the data length is stored in 4 bits, the value is 18 or less (3 is added)
208 if (tmpLength == maxLength)
209 {
210 break;
211 }
212 }
213
214 if (tmpLength > currLength)
215 {
216 // Update the maximum-length offset
217 currLength = tmpLength;
218 maxOffset = (u16)(nextp - searchp);
219 if (currLength == maxLength || currLength == remainSize)
220 {
221 // This is the longest matching length, so end search
222 break;
223 }
224 }
225
226 searchp++;
227 }
228
229 search_end:
230 if (currLength < 3)
231 {
232 return 0;
233 }
234 *offset = maxOffset;
235 return currLength;
236 }
237
238
239 //===========================================================================
240 // LZ encoding (high-speed version)
241 //===========================================================================
242
243 // Temporary information for LZ high-speed encoding
244 typedef struct
245 {
246 u16 windowPos; // Initial position of the history window
247 u16 windowLen; // Length of the history window
248
249 s16 *LZOffsetTable; // Offset buffer of the history window
250 s16 *LZByteTable; // Pointer to the most recent character history
251 s16 *LZEndTable; // Pointer to the oldest character history
252 }
253 LZCompressInfo;
254
255 static void LZInitTable(LZCompressInfo * info, u8 *work);
256 static void SlideByte(LZCompressInfo * info, const u8 *srcp);
257 static inline void LZSlide(LZCompressInfo * info, const u8 *srcp, u32 n);
258 static u32 SearchLZFast(LZCompressInfo * info, const u8 *nextp, u32 remainSize, u16 *offset, u32 maxLength );
259
260
261 //--------------------------------------------------------
262 // Initialize the dictionary index
263 //--------------------------------------------------------
LZInitTable(LZCompressInfo * info,u8 * work)264 static void LZInitTable(LZCompressInfo * info, u8 *work)
265 {
266 u16 i;
267
268 info->LZOffsetTable = (s16 *)work;
269 info->LZByteTable = (s16 *)work + 4096;
270 info->LZEndTable = (s16 *)work + 4096 + 256;
271
272 for (i = 0; i < 256; i++)
273 {
274 info->LZByteTable[i] = -1;
275 info->LZEndTable[i] = -1;
276 }
277 info->windowPos = 0;
278 info->windowLen = 0;
279 }
280
281 //--------------------------------------------------------
282 // Slide the dictionary 1 byte
283 //--------------------------------------------------------
SlideByte(LZCompressInfo * info,const u8 * srcp)284 static void SlideByte(LZCompressInfo * info, const u8 *srcp)
285 {
286 s16 offset;
287 u8 in_data = *srcp;
288 u16 insert_offset;
289
290 s16 *const LZByteTable = info->LZByteTable;
291 s16 *const LZOffsetTable = info->LZOffsetTable;
292 s16 *const LZEndTable = info->LZEndTable;
293 const u16 windowPos = info->windowPos;
294 const u16 windowLen = info->windowLen;
295
296 if (windowLen == 4096)
297 {
298 u8 out_data = *(srcp - 4096);
299 if ((LZByteTable[out_data] = LZOffsetTable[LZByteTable[out_data]]) == -1)
300 {
301 LZEndTable[out_data] = -1;
302 }
303 insert_offset = windowPos;
304 }
305 else
306 {
307 insert_offset = windowLen;
308 }
309
310 offset = LZEndTable[in_data];
311 if (offset == -1)
312 {
313 LZByteTable[in_data] = (s16)insert_offset;
314 }
315 else
316 {
317 LZOffsetTable[offset] = (s16)insert_offset;
318 }
319 LZEndTable[in_data] = (s16)insert_offset;
320 LZOffsetTable[insert_offset] = -1;
321
322 if (windowLen == 4096)
323 {
324 info->windowPos = (u16)((windowPos + 1) % 0x1000);
325 }
326 else
327 {
328 info->windowLen++;
329 }
330 }
331
332 //--------------------------------------------------------
333 // Slide the dictionary n bytes
334 //--------------------------------------------------------
LZSlide(LZCompressInfo * info,const u8 * srcp,u32 n)335 static inline void LZSlide(LZCompressInfo * info, const u8 *srcp, u32 n)
336 {
337 u32 i;
338
339 for (i = 0; i < n; i++)
340 {
341 SlideByte(info, srcp++);
342 }
343 }
344
345
346 /*---------------------------------------------------------------------------*
347 Name: MI_CompressLZFastImpl
348
349 Description: Performs LZ77 compression.
350
351 Arguments: srcp: Pointer to compression source data
352 size: Size of compression source data
353 dstp Pointer to compressed data
354 The buffer must be larger than the size of the compression source data.
355 work Temporary buffer for compression
356 An area of size MI_LZ_FAST_COMPRESS_WORK_SIZE is necessary.
357
358 Returns: The data size after compression.
359 If compressed data is larger than original data, compression is terminated and 0 is returned.
360 *---------------------------------------------------------------------------*/
MI_CompressLZFastImpl(const u8 * srcp,u32 size,u8 * dstp,u8 * work,BOOL exFormat)361 u32 MI_CompressLZFastImpl(const u8 *srcp, u32 size, u8 *dstp, u8 *work, BOOL exFormat)
362 {
363 u32 LZDstCount; // Number of bytes of compressed data
364 u8 LZCompFlags; // Flag sequence indicating whether there is a compression
365 u8 *LZCompFlagsp; // Point to memory regions storing LZCompFlags
366 u16 lastOffset; // Offset to matching data (the longest matching data at the time)
367 u32 lastLength; // Length of matching data (the longest matching data at the time)
368 u8 i;
369 u32 dstMax;
370 LZCompressInfo info; // Temporary LZ compression information
371 const u32 MAX_LENGTH = (exFormat)? (0xFFFF + 0xFF + 0xF + 3U) : (0xF + 3U);
372
373 SDK_ALIGN2_ASSERT(srcp);
374 SDK_NULL_ASSERT(work);
375
376 *(u32 *)dstp = size << 8 | MI_COMPRESSION_LZ | (exFormat? 1 : 0); // Data header
377 dstp += 4;
378 LZDstCount = 4;
379 dstMax = size;
380 LZInitTable(&info, work);
381
382 while (size > 0)
383 {
384 LZCompFlags = 0;
385 LZCompFlagsp = dstp++; // Destination for storing flag sequence
386 LZDstCount++;
387
388 // Because flag sequence is stored as 8-bit data, loop eight times
389 for (i = 0; i < 8; i++)
390 {
391 LZCompFlags <<= 1; // No meaning for the first time (i=0)
392 if (size <= 0)
393 {
394 // When reached the end, quit after shifting flag to the end
395 continue;
396 }
397
398 if ( (lastLength = SearchLZFast(&info, srcp, size, &lastOffset, MAX_LENGTH)) != 0 )
399 {
400 u32 length;
401 // Enable flag if compression is possible
402 LZCompFlags |= 0x1;
403
404 if (LZDstCount + 2 >= dstMax) // Quit on error if size becomes larger than source
405 {
406 return 0;
407 }
408
409 if ( exFormat )
410 {
411 if ( lastLength >= 0xFF + 0xF + 3 )
412 {
413 length = (u32)( lastLength - 0xFF - 0xF - 3 );
414 *dstp++ = (u8)( 0x10 | (length >> 12) );
415 *dstp++ = (u8)( length >> 4 );
416 LZDstCount += 2;
417 }
418 else if ( lastLength >= 0xF + 2 )
419 {
420 length = (u32)( lastLength - 0xF - 2 );
421 *dstp++ = (u8)( length >> 4 );
422 LZDstCount += 1;
423 }
424 else
425 {
426 length = (u32)( lastLength - 1 );
427 }
428 }
429 else
430 {
431 length = (u32)( lastLength - 3 );
432 }
433
434 // Divide offset into upper 4 bits and lower 8 bits and store
435 *dstp++ = (u8)(length << 4 | (lastOffset - 1) >> 8);
436 *dstp++ = (u8)((lastOffset - 1) & 0xff);
437 LZDstCount += 2;
438 LZSlide(&info, srcp, lastLength);
439 srcp += lastLength;
440 size -= lastLength;
441 }
442 else
443 {
444 // No compression
445 if (LZDstCount + 1 >= dstMax) // Quit on error if size becomes larger than source
446 {
447 return 0;
448 }
449 LZSlide(&info, srcp, 1);
450 *dstp++ = *srcp++;
451 size--;
452 LZDstCount++;
453 }
454 } // Completed eight loops
455 *LZCompFlagsp = LZCompFlags; // Store flag series
456 }
457
458 // 4-byte boundary alignment
459 // Does not include Data0 used for alignment in data size
460 i = 0;
461 while ((LZDstCount + i) & 0x3)
462 {
463 *dstp++ = 0;
464 i++;
465 }
466
467 return LZDstCount;
468 }
469
470 //--------------------------------------------------------
471 // With LZ77 Compression, searches for the longest matching string from the slide window.
472 // Arguments: startp Pointer to starting position of data
473 // nextp Pointer to data where search will start
474 // remainSize Size of remaining data
475 // offset Pointer to region storing matched offset
476 // Return: TRUE if matching string is found
477 // FALSE if not found.
478 //--------------------------------------------------------
SearchLZFast(LZCompressInfo * info,const u8 * nextp,u32 remainSize,u16 * offset,u32 maxLength)479 static u32 SearchLZFast(LZCompressInfo * info, const u8 *nextp, u32 remainSize, u16 *offset, u32 maxLength )
480 {
481 const u8 *searchp;
482 const u8 *headp, *searchHeadp;
483 u16 maxOffset;
484 u32 currLength = 2;
485 u32 tmpLength;
486 s32 w_offset;
487 s16 *const LZOffsetTable = info->LZOffsetTable;
488 const u16 windowPos = info->windowPos;
489 const u16 windowLen = info->windowLen;
490
491 if (remainSize < 3)
492 {
493 return 0;
494 }
495
496 w_offset = info->LZByteTable[*nextp];
497
498 while (w_offset != -1)
499 {
500 if (w_offset < windowPos)
501 {
502 searchp = nextp - windowPos + w_offset;
503 }
504 else
505 {
506 searchp = nextp - windowLen - windowPos + w_offset;
507 }
508
509 /* This isn't needed, but it seems to make it a little faster */
510 if (*(searchp + 1) != *(nextp + 1) || *(searchp + 2) != *(nextp + 2))
511 {
512 w_offset = LZOffsetTable[w_offset];
513 continue;
514 }
515
516 if (nextp - searchp < 2)
517 {
518 // VRAM is accessed in units of 2 bytes (because sometimes data is read from VRAM), so the search must start 2 bytes prior to the search target
519 //
520 //
521 // Because the offset is stored in 12 bits, the value is 4096 or less
522 break;
523 }
524 tmpLength = 3;
525 searchHeadp = searchp + 3;
526 headp = nextp + 3;
527
528 // Increments the compression size until the data ends or different data is encountered
529 while (((u32)(headp - nextp) < remainSize) && (*headp == *searchHeadp))
530 {
531 headp++;
532 searchHeadp++;
533 tmpLength++;
534
535 // Because the data length is stored in 4 bits, the value is 18 or less (3 is added)
536 if (tmpLength == maxLength)
537 {
538 break;
539 }
540 }
541
542 if (tmpLength > currLength)
543 {
544 // Update the maximum-length offset
545 currLength = tmpLength;
546 maxOffset = (u16)(nextp - searchp);
547 if (currLength == maxLength || currLength == remainSize)
548 {
549 // This is the longest matching length, so end search
550 break;
551 }
552 }
553 w_offset = LZOffsetTable[w_offset];
554 }
555
556 if (currLength < 3)
557 {
558 return 0;
559 }
560 *offset = maxOffset;
561 return currLength;
562 }
563
564
565 //===========================================================================
566 // Run-Length Encoding
567 //===========================================================================
568
569 /*---------------------------------------------------------------------------*
570 Name: MI_CompressRL
571
572 Description: Performs run-length compression.
573
574 Arguments: srcp: Pointer to compression source data
575 size: Size of compression source data
576 dstp Pointer to compressed data
577 The buffer must be larger than the size of the compression source data.
578
579 Returns: The data size after compression.
580 If compressed data is larger than original data, compression is terminated and 0 is returned.
581 *---------------------------------------------------------------------------*/
MI_CompressRL(const u8 * srcp,u32 size,u8 * dstp)582 u32 MI_CompressRL(const u8 *srcp, u32 size, u8 *dstp)
583 {
584 u32 RLDstCount; // Number of bytes of compressed data
585 u32 RLSrcCount; // Processed data volume of the compression target data (in bytes)
586 u8 RLCompFlag; // 1 if performing run-length encoding
587 u8 runLength; // Run length
588 u8 rawDataLength; // Length of data not run
589 u32 i;
590
591 const u8 *startp; // Point to the start of compression target data for each process loop
592
593 // Data header (The size after decompression)
594 *(u32 *)dstp = size << 8 | MI_COMPRESSION_RL; // Data header
595 RLDstCount = 4;
596
597 RLSrcCount = 0;
598 rawDataLength = 0;
599 RLCompFlag = 0;
600
601 while (RLSrcCount < size)
602 {
603 startp = &srcp[RLSrcCount]; // Set compression target data
604
605 for (i = 0; i < 128; i++) // Data volume that can be expressed in 7 bits is 0 to 127
606 {
607 // Reach the end of the compression target data
608 if (RLSrcCount + rawDataLength >= size)
609 {
610 rawDataLength = (u8)(size - RLSrcCount);
611 break;
612 }
613
614 if (RLSrcCount + rawDataLength + 2 < size)
615 {
616 if (startp[i] == startp[i + 1] && startp[i] == startp[i + 2])
617 {
618 RLCompFlag = 1;
619 break;
620 }
621 }
622 rawDataLength++;
623 }
624
625 // Store data that will not be encoded
626 // If the 8th bit of the data length storage byte is 0, the data sequence that is not encoded.
627 // The data length is x - 1, so 0-127 becomes 1-128.
628 if (rawDataLength)
629 {
630 if (RLDstCount + rawDataLength + 1 >= size) // Quit on error if size becomes larger than source
631 {
632 return 0;
633 }
634 dstp[RLDstCount++] = (u8)(rawDataLength - 1); // Store "data length - 1" (7 bits)
635 for (i = 0; i < rawDataLength; i++)
636 {
637 dstp[RLDstCount++] = srcp[RLSrcCount++];
638 }
639 rawDataLength = 0;
640 }
641
642 // Run-Length Encoding
643 if (RLCompFlag)
644 {
645 runLength = 3;
646 for (i = 3; i < 128 + 2; i++)
647 {
648 // Reach the end of the data for compression
649 if (RLSrcCount + runLength >= size)
650 {
651 runLength = (u8)(size - RLSrcCount);
652 break;
653 }
654
655 // If run is interrupted
656 if (srcp[RLSrcCount] != srcp[RLSrcCount + runLength])
657 {
658 break;
659 }
660 // Run continues
661 runLength++;
662 }
663
664 // If the 8th bit of the data length storage byte is 1, the data sequence that is encoded
665 if (RLDstCount + 2 >= size) // Quit on error if size becomes larger than source
666 {
667 return 0;
668 }
669 dstp[RLDstCount++] = (u8)(0x80 | (runLength - 3)); // Add 3, and store from 3 to 130
670 dstp[RLDstCount++] = srcp[RLSrcCount];
671 RLSrcCount += runLength;
672 RLCompFlag = 0;
673 }
674 }
675
676 // 4-byte boundary alignment
677 // Does not include Data0 used for alignment in data size
678 i = 0;
679 while ((RLDstCount + i) & 0x3)
680 {
681 dstp[RLDstCount + i] = 0;
682 i++;
683 }
684 return RLDstCount;
685 }
686
687
688 //===========================================================================
689 // Huffman encoding
690 //===========================================================================
691 #define HUFF_END_L 0x80
692 #define HUFF_END_R 0x40
693
694 static void HuffUpdateParentDepth(u16 leftNo, u16 rightNo);
695 static void HuffMakeCode(u16 nodeNo, u32 paHuffCode);
696 static u8 HuffCountHWord(u16 nodeNo);
697 static void HuffMakeHuffTree(u16 rootNo);
698 static void HuffMakeSubsetHuffTree(u16 huffTreeNo, u8 rightNodeFlag);
699 static u8 HuffRemainingNodeCanSetOffset(u8 costHWord);
700 static void HuffSetOneNodeOffset(u16 huffTreeNo, u8 rightNodeFlag);
701
702 typedef struct
703 {
704 u32 Freq; // Frequency of occurrence
705 u16 No; // Data number
706 s16 PaNo; // Parent number
707 s16 ChNo[2]; // Child Number (0: left side, 1: right side)
708 u16 PaDepth; // Parent node depth
709 u16 LeafDepth; // Depth to leaf
710 u32 HuffCode; // Huffman code
711 u8 Bit; // Node's bit data
712 u8 _padding;
713 u16 HWord; // For each intermediate node, the amount of memory needed to store in HuffTree the subtree that has that node as its root
714 }
715 HuffData; // Total of 24 bytes
716
717 static HuffData *HuffTable; // [512] 12288B
718 static const HuffData HuffTableInitData = { 0, 0, 0, {-1, -1}, 0, 0, 0, 0, 0 };
719
720 static u8 HuffTreeTop; // The number for HuffTreeTop
721 static u8 *HuffTree; // [256][2] 512B
722
723 typedef struct
724 {
725 u8 leftOffsetNeed; // 1 if offset to left child node is required
726 u8 rightOffsetNeed; // 1 if an offset to the right child node is required
727 u16 leftNodeNo; // The left child node number
728 u16 rightNodeNo; // Right child node number
729 }
730 HuffTreeCtrlData; // Total of 6 bytes
731 static HuffTreeCtrlData *HuffTreeCtrl; // 1536 Byte [ 256 ]
732 static const HuffTreeCtrlData HuffTreeCtrlInitData = { 1, 1, 0, 0 };
733
734 static u16 sHuffDataNum; // The data type to encode; this is 16 for 4-bit encoding and 256 for 8-bit encoding
735
736 // Structure of the Huffman work buffer
737 typedef struct
738 {
739 HuffData HuffTable[512]; // 12288B
740 u8 HuffTree[256 * 2]; // 512B
741 HuffTreeCtrlData HuffTreeCtrl[256]; // 1536B
742 }
743 HuffWork; // Total is 14336B
744
745 /*---------------------------------------------------------------------------*
746 Name: MI_CompressHuffman
747
748 Description: Performs Huffman compression.
749
750 Arguments: srcp: Pointer to compression source data
751 size: Size of compression source data
752 dstp Pointer to compressed data
753 The buffer must be larger than the size of the compression source data.
754 huffBitSize The number of bits to encode
755 work Work buffer for Huffman compression
756
757 Returns: The data size after compression.
758 If compressed data is larger than original data, compression is terminated and 0 is returned.
759 *---------------------------------------------------------------------------*/
MI_CompressHuffman(const u8 * srcp,u32 size,u8 * dstp,u8 huffBitSize,u8 * work)760 u32 MI_CompressHuffman(const u8 *srcp, u32 size, u8 *dstp, u8 huffBitSize, u8 *work)
761 {
762 u32 HuffDstCount; // Number of bytes of compressed data
763 u8 tmp;
764 u16 nodeNum; // Number of valid nodes
765 u16 tableTop; // When creating table, the table top number
766 s32 leftNo, rightNo; // Node number for creating binary tree
767 s32 i, ii, iii;
768 u8 srcTmp;
769 u32 bitStream = 0;
770 u32 streamLength = 0;
771 u16 rootNo; // Binary tree's root number
772
773
774 SDK_NULL_ASSERT(srcp);
775 SDK_NULL_ASSERT(dstp);
776 SDK_ASSERT(huffBitSize == 4 || huffBitSize == 8);
777 SDK_NULL_ASSERT(work);
778 SDK_ALIGN4_ASSERT(work);
779
780 HuffTable = ((HuffWork *) work)->HuffTable;
781 HuffTree = ((HuffWork *) work)->HuffTree;
782 HuffTreeCtrl = ((HuffWork *) work)->HuffTreeCtrl;
783
784 sHuffDataNum = (u16)(1 << huffBitSize); // 8->256, 4->16
785 tableTop = sHuffDataNum;
786
787 // Initialize table
788 // Subscript: 0 ~ 15(255) : Information for the data to encode (8-bit encoding)
789 // 16 ~ 31(511) : Information for creating binary tree
790 for (i = 0; i < (u16)(sHuffDataNum * 2); i++)
791 {
792 HuffTable[i] = HuffTableInitData;
793 HuffTable[i].No = (u16)i;
794 }
795
796 // Check frequency of occurrence
797 if (huffBitSize == 8)
798 {
799 for (i = 0; i < size; i++)
800 {
801 HuffTable[srcp[i]].Freq++; // 8-bit encoding
802 }
803 }
804 else
805 {
806 for (i = 0; i < size; i++)
807 { // 4-bit encoding
808 tmp = (u8)((srcp[i] & 0xf0) >> 4); // Store from upper 4 bits first
809 HuffTable[tmp].Freq++; // The problem is the encoding
810 tmp = (u8)(srcp[i] & 0x0f);
811 HuffTable[tmp].Freq++;
812 }
813 }
814
815 // Create tree table
816 leftNo = rightNo = -1;
817 while (1)
818 {
819 // Search for two subtree vertices with low Freq value. At least one should be found.
820 // Search child node (left)
821 for (i = 0; i < tableTop; i++)
822 {
823 if ((HuffTable[i].Freq != 0) && (HuffTable[i].PaNo == 0))
824 {
825 leftNo = i;
826 break;
827 }
828 }
829 for (i = (u16)leftNo; i < tableTop; i++)
830 {
831 if ((HuffTable[i].Freq != 0) &&
832 (HuffTable[i].PaNo == 0) && (HuffTable[i].Freq < HuffTable[leftNo].Freq))
833 {
834 leftNo = i;
835 }
836 }
837 // Search child node (right)
838 for (i = 0; i < tableTop; i++)
839 {
840 if ((HuffTable[i].Freq != 0) && (HuffTable[i].PaNo == 0) && (i != leftNo))
841 {
842 rightNo = i;
843 break;
844 }
845 }
846 for (i = (u16)rightNo; i < tableTop; i++)
847 {
848 if ((HuffTable[i].Freq != 0) &&
849 (HuffTable[i].PaNo == 0) &&
850 (HuffTable[i].Freq < HuffTable[rightNo].Freq) && (i != leftNo))
851 {
852 rightNo = i;
853 }
854 }
855 // If only one, then end table creation
856 if (rightNo < 0)
857 {
858 if (tableTop == sHuffDataNum)
859 // When only one type of value exists or doesn't exist, then create one node that takes the same value for both 0 and 1
860 {
861 HuffTable[tableTop].Freq = HuffTable[leftNo].Freq;
862 HuffTable[tableTop].ChNo[0] = (s16)leftNo;
863 HuffTable[tableTop].ChNo[1] = (s16)leftNo;
864 HuffTable[tableTop].LeafDepth = 1;
865 HuffTable[leftNo].PaNo = (s16)tableTop;
866 HuffTable[leftNo].Bit = 0;
867 HuffTable[leftNo].PaDepth = 1;
868 }
869 else
870 {
871 tableTop--;
872 }
873 rootNo = tableTop;
874 nodeNum = (u16)((rootNo - sHuffDataNum + 1) * 2 + 1);
875 break;
876 }
877
878 // Create vertex that combines left subtree and right subtree
879 HuffTable[tableTop].Freq = HuffTable[leftNo].Freq + HuffTable[rightNo].Freq;
880 HuffTable[tableTop].ChNo[0] = (s16)leftNo;
881 HuffTable[tableTop].ChNo[1] = (s16)rightNo;
882 if (HuffTable[leftNo].LeafDepth > HuffTable[rightNo].LeafDepth)
883 {
884 HuffTable[tableTop].LeafDepth = (u16)(HuffTable[leftNo].LeafDepth + 1);
885 }
886 else
887 {
888 HuffTable[tableTop].LeafDepth = (u16)(HuffTable[rightNo].LeafDepth + 1);
889 }
890
891 HuffTable[leftNo].PaNo = HuffTable[rightNo].PaNo = (s16)tableTop;
892 HuffTable[leftNo].Bit = 0;
893 HuffTable[rightNo].Bit = 1;
894
895 HuffUpdateParentDepth((u16)leftNo, (u16)rightNo);
896
897 tableTop++;
898 leftNo = rightNo = -1;
899 }
900
901 // Generate Huffman code (In HuffTable[i].HuffCode)
902 HuffMakeCode(rootNo, 0x00); // The Huffman code is the code with HuffCode's lower bit masked for PaDepth bits
903
904 // For each intermediate node, calculate the amount of memory needed to store, in HuffTree, the subtree that has this node as its root
905 (void)HuffCountHWord(rootNo);
906
907 // Create HuffTree
908 HuffMakeHuffTree(rootNo);
909 HuffTree[0] = --HuffTreeTop;
910
911 // Data header
912 *(u32 *)dstp = size << 8 | MI_COMPRESSION_HUFFMAN | huffBitSize;
913 HuffDstCount = 4;
914
915 if (HuffDstCount + (HuffTreeTop + 1) * 2 >= size) // Quit on error if size becomes larger than source
916 {
917 return 0;
918 }
919
920 for (i = 0; i < (u16)((HuffTreeTop + 1) * 2); i++) // Tree table
921 {
922 dstp[HuffDstCount++] = ((u8 *)HuffTree)[i];
923 }
924
925 // 4-byte boundary alignment
926 // Data0 used for alignment is included in data size (as per the decoder algorithm)
927 while (HuffDstCount & 0x3)
928 {
929 if (HuffDstCount & 0x1)
930 {
931 HuffTreeTop++;
932 dstp[4]++;
933 }
934 dstp[HuffDstCount++] = 0;
935 }
936
937 // Huffman encoding
938 for (i = 0; i < size; i++) // Data compression
939 {
940 u8 val = srcp[i];
941 if (huffBitSize == 8) // 8-bit Huffman
942 {
943 bitStream = (bitStream << HuffTable[ val ].PaDepth) | HuffTable[ val ].HuffCode;
944 streamLength += HuffTable[ val ].PaDepth;
945
946 if (HuffDstCount + (streamLength / 8) >= size)
947 {
948 // Quit on error if size becomes larger than source
949 return 0;
950 }
951 for (ii = 0; ii < streamLength / 8; ii++)
952 {
953 dstp[HuffDstCount++] = (u8)(bitStream >> (streamLength - (ii + 1) * 8));
954 }
955 streamLength %= 8;
956 }
957 else // 4-bit Huffman
958 {
959 for (ii = 0; ii < 2; ii++)
960 {
961 if (ii)
962 {
963 srcTmp = (u8)( val >> 4 ); // Upper 4 bits
964 }
965 else
966 {
967 srcTmp = (u8)( val & 0x0F ); // Lower 4 bits
968 }
969 bitStream = (bitStream << HuffTable[srcTmp].PaDepth) | HuffTable[srcTmp].HuffCode;
970 streamLength += HuffTable[srcTmp].PaDepth;
971 if (HuffDstCount + (streamLength / 8) >= size)
972 {
973 // Quit on error if size becomes larger than source
974 return 0;
975 }
976 for (iii = 0; iii < streamLength / 8; iii++)
977 {
978 dstp[HuffDstCount++] = (u8)(bitStream >> (streamLength - (iii + 1) * 8));
979 }
980 streamLength %= 8;
981 }
982 }
983 }
984
985 if (streamLength != 0)
986 {
987 if (HuffDstCount + 1 >= size)
988 {
989 // Quit on error if size becomes larger than source
990 return 0;
991 }
992 dstp[HuffDstCount++] = (u8)(bitStream << (8 - streamLength));
993 }
994
995 // 4-byte boundary alignment
996 // Data0 for alignment is included in data size
997 // This is special to Huffman encoding! Data is stored after the alignment-boundary data because little endian conversion is used.
998 while (HuffDstCount & 0x3)
999 {
1000 dstp[HuffDstCount++] = 0;
1001 }
1002
1003 for (i = 1 + (HuffTreeTop + 1) * 2 / 4; i < (HuffDstCount / 4) + 1; i++) // Little endian conversion
1004 {
1005 tmp = dstp[i * 4 + 0];
1006 dstp[i * 4 + 0] = dstp[i * 4 + 3];
1007 dstp[i * 4 + 3] = tmp; // Swap
1008 tmp = dstp[i * 4 + 1];
1009 dstp[i * 4 + 1] = dstp[i * 4 + 2];
1010 dstp[i * 4 + 2] = tmp; // Swap
1011 }
1012
1013 return HuffDstCount;
1014 }
1015
1016
1017
1018 //-----------------------------------------------------------------------
1019 // Create Huffman code table
1020 //-----------------------------------------------------------------------
HuffMakeHuffTree(u16 rootNo)1021 static void HuffMakeHuffTree(u16 rootNo)
1022 {
1023 s16 i;
1024 u16 tmp;
1025 s16 costHWord, tmpCostHWord; // Make subtree code table for most-costly node when subtree code table has not been created.
1026 s16 costOffsetNeed, tmpCostOffsetNeed;
1027 s16 costMaxKey, costMaxRightFlag; // Information for singling out the least costly node from HuffTree
1028 u8 offsetNeedNum;
1029 u8 tmpKey, tmpRightFlag;
1030
1031 // Initialize HuffTreeCtrl
1032 for (i = 0; i < 256; i++)
1033 {
1034 HuffTree[i * 2] = HuffTree[i * 2 + 1] = 0;
1035 }
1036 for (i = 0; i < 256; i++)
1037 {
1038 HuffTreeCtrl[i] = HuffTreeCtrlInitData;
1039 }
1040 HuffTreeTop = 1;
1041 costOffsetNeed = 0;
1042
1043 HuffTreeCtrl[0].leftOffsetNeed = 0; // Do not use (used as table size)
1044 HuffTreeCtrl[0].rightNodeNo = rootNo;
1045
1046 while (1) // Until return
1047 {
1048 // Calculate the number of nodes required to set offset
1049 offsetNeedNum = 0;
1050 for (i = 0; i < HuffTreeTop; i++)
1051 {
1052 if (HuffTreeCtrl[i].leftOffsetNeed)
1053 {
1054 offsetNeedNum++;
1055 }
1056 if (HuffTreeCtrl[i].rightOffsetNeed)
1057 {
1058 offsetNeedNum++;
1059 }
1060 }
1061
1062 // Search for node with greatest cost
1063 costHWord = -1;
1064 costMaxKey = -1;
1065 tmpKey = 0;
1066 tmpRightFlag = 0;
1067
1068 for (i = 0; i < HuffTreeTop; i++)
1069 {
1070
1071 tmpCostOffsetNeed = (u8)(HuffTreeTop - i);
1072
1073 // Evaluate cost of left child node
1074 if (HuffTreeCtrl[i].leftOffsetNeed)
1075 {
1076 tmpCostHWord = (s16)HuffTable[HuffTreeCtrl[i].leftNodeNo].HWord;
1077
1078 if ((tmpCostHWord + offsetNeedNum) > 64)
1079 {
1080 goto leftCostEvaluationEnd;
1081 }
1082 if (!HuffRemainingNodeCanSetOffset((u8)tmpCostHWord))
1083 {
1084 goto leftCostEvaluationEnd;
1085 }
1086 if (tmpCostHWord > costHWord)
1087 {
1088 costMaxKey = i;
1089 costMaxRightFlag = 0;
1090 }
1091 else if ((tmpCostHWord == costHWord) && (tmpCostOffsetNeed > costOffsetNeed))
1092 {
1093 costMaxKey = i;
1094 costMaxRightFlag = 0;
1095 }
1096 }
1097 leftCostEvaluationEnd:{
1098 }
1099
1100 // Evaluate cost of right child node
1101 if (HuffTreeCtrl[i].rightOffsetNeed)
1102 {
1103 tmpCostHWord = (s16)HuffTable[HuffTreeCtrl[i].rightNodeNo].HWord;
1104
1105 if ((tmpCostHWord + offsetNeedNum) > 64)
1106 {
1107 goto rightCostEvaluationEnd;
1108 }
1109 if (!(HuffRemainingNodeCanSetOffset((u8)tmpCostHWord)))
1110 {
1111 goto rightCostEvaluationEnd;
1112 }
1113 if (tmpCostHWord > costHWord)
1114 {
1115 costMaxKey = i;
1116 costMaxRightFlag = 1;
1117 }
1118 else if ((tmpCostHWord == costHWord) && (tmpCostOffsetNeed > costOffsetNeed))
1119 {
1120 costMaxKey = i;
1121 costMaxRightFlag = 1;
1122 }
1123 }
1124 rightCostEvaluationEnd:{
1125 }
1126 }
1127
1128 // Store entire subtree in HuffTree
1129 if (costMaxKey >= 0)
1130 {
1131 HuffMakeSubsetHuffTree((u8)costMaxKey, (u8)costMaxRightFlag);
1132 goto nextTreeMaking;
1133 }
1134 else
1135 {
1136 // Search for node with largest required offset
1137 for (i = 0; i < HuffTreeTop; i++)
1138 {
1139 tmp = 0;
1140 tmpRightFlag = 0;
1141 if (HuffTreeCtrl[i].leftOffsetNeed)
1142 {
1143 tmp = HuffTable[HuffTreeCtrl[i].leftNodeNo].HWord;
1144 }
1145 if (HuffTreeCtrl[i].rightOffsetNeed)
1146 {
1147 if (HuffTable[HuffTreeCtrl[i].rightNodeNo].HWord > tmp)
1148 {
1149 tmpRightFlag = 1;
1150 }
1151 }
1152 if ((tmp != 0) || (tmpRightFlag))
1153 {
1154 HuffSetOneNodeOffset((u8)i, tmpRightFlag);
1155 goto nextTreeMaking;
1156 }
1157 }
1158 }
1159 return;
1160 nextTreeMaking:{
1161 }
1162 }
1163 }
1164
1165 //-----------------------------------------------------------------------
1166 // Store entire subtree in HuffTree
1167 //-----------------------------------------------------------------------
HuffMakeSubsetHuffTree(u16 huffTreeNo,u8 rightNodeFlag)1168 static void HuffMakeSubsetHuffTree(u16 huffTreeNo, u8 rightNodeFlag)
1169 {
1170 u8 i;
1171
1172 i = HuffTreeTop;
1173 HuffSetOneNodeOffset(huffTreeNo, rightNodeFlag);
1174
1175 if (rightNodeFlag)
1176 {
1177 HuffTreeCtrl[huffTreeNo].rightOffsetNeed = 0;
1178 }
1179 else
1180 {
1181 HuffTreeCtrl[huffTreeNo].leftOffsetNeed = 0;
1182 }
1183
1184 while (i < HuffTreeTop)
1185 {
1186 if (HuffTreeCtrl[i].leftOffsetNeed)
1187 {
1188 HuffSetOneNodeOffset(i, 0);
1189 HuffTreeCtrl[i].leftOffsetNeed = 0;
1190 }
1191 if (HuffTreeCtrl[i].rightOffsetNeed)
1192 {
1193 HuffSetOneNodeOffset(i, 1);
1194 HuffTreeCtrl[i].rightOffsetNeed = 0;
1195 }
1196 i++;
1197 }
1198 }
1199
1200 //-----------------------------------------------------------------------
1201 // Check if there are any problems with HuffTree construction even after subtree of the given data size is decompressed.
1202 //-----------------------------------------------------------------------
HuffRemainingNodeCanSetOffset(u8 costHWord)1203 static u8 HuffRemainingNodeCanSetOffset(u8 costHWord)
1204 {
1205 u8 i;
1206 s16 capacity;
1207
1208 capacity = (s16)(64 - costHWord);
1209
1210 // The offset number is larger for smaller values of i, so you should calculate without sorting, with i=0 -> HuffTreeTop
1211 for (i = 0; i < HuffTreeTop; i++)
1212 {
1213 if (HuffTreeCtrl[i].leftOffsetNeed)
1214 {
1215 if ((HuffTreeTop - i) <= capacity)
1216 {
1217 capacity--;
1218 }
1219 else
1220 {
1221 return 0;
1222 }
1223 }
1224 if (HuffTreeCtrl[i].rightOffsetNeed)
1225 {
1226 if ((HuffTreeTop - i) <= capacity)
1227 {
1228 capacity--;
1229 }
1230 else
1231 {
1232 return 0;
1233 }
1234 }
1235 }
1236
1237 return 1;
1238 }
1239
1240 //-----------------------------------------------------------------------
1241 // Create Huffman code table for one node
1242 //-----------------------------------------------------------------------
HuffSetOneNodeOffset(u16 huffTreeNo,u8 rightNodeFlag)1243 static void HuffSetOneNodeOffset(u16 huffTreeNo, u8 rightNodeFlag)
1244 {
1245 u16 nodeNo;
1246 u8 offsetData = 0;
1247
1248 if (rightNodeFlag)
1249 {
1250 nodeNo = HuffTreeCtrl[huffTreeNo].rightNodeNo;
1251 HuffTreeCtrl[huffTreeNo].rightOffsetNeed = 0;
1252 }
1253 else
1254 {
1255 nodeNo = HuffTreeCtrl[huffTreeNo].leftNodeNo;
1256 HuffTreeCtrl[huffTreeNo].leftOffsetNeed = 0;
1257 }
1258
1259 // Left child node
1260 if (HuffTable[HuffTable[nodeNo].ChNo[0]].LeafDepth == 0)
1261 {
1262 offsetData |= 0x80;
1263 HuffTree[HuffTreeTop * 2 + 0] = (u8)HuffTable[nodeNo].ChNo[0];
1264 HuffTreeCtrl[HuffTreeTop].leftNodeNo = (u8)HuffTable[nodeNo].ChNo[0];
1265 HuffTreeCtrl[HuffTreeTop].leftOffsetNeed = 0; // Offset no longer required
1266 }
1267 else
1268 {
1269 HuffTreeCtrl[HuffTreeTop].leftNodeNo = (u16)HuffTable[nodeNo].ChNo[0]; // Offset is required
1270 }
1271
1272 // Right child node
1273 if (HuffTable[HuffTable[nodeNo].ChNo[1]].LeafDepth == 0)
1274 {
1275 offsetData |= 0x40;
1276 HuffTree[HuffTreeTop * 2 + 1] = (u8)HuffTable[nodeNo].ChNo[1];
1277 HuffTreeCtrl[HuffTreeTop].rightNodeNo = (u8)HuffTable[nodeNo].ChNo[1];
1278 HuffTreeCtrl[HuffTreeTop].rightOffsetNeed = 0; // Offset no longer required
1279 }
1280 else
1281 {
1282 HuffTreeCtrl[HuffTreeTop].rightNodeNo = (u16)HuffTable[nodeNo].ChNo[1]; // Offset is required
1283 }
1284
1285 offsetData |= (u8)(HuffTreeTop - huffTreeNo - 1);
1286 HuffTree[huffTreeNo * 2 + rightNodeFlag] = offsetData;
1287
1288 HuffTreeTop++;
1289 }
1290
1291
1292 //-----------------------------------------------------------------------
1293 // When creating binary tree and when combining subtrees, add 1 to the depth of every node in the subtree
1294 //-----------------------------------------------------------------------
HuffUpdateParentDepth(u16 leftNo,u16 rightNo)1295 static void HuffUpdateParentDepth(u16 leftNo, u16 rightNo)
1296 {
1297 HuffTable[leftNo].PaDepth++;
1298 HuffTable[rightNo].PaDepth++;
1299
1300 if (HuffTable[leftNo].LeafDepth != 0)
1301 {
1302 HuffUpdateParentDepth((u16)HuffTable[leftNo].ChNo[0], (u16)HuffTable[leftNo].ChNo[1]);
1303 }
1304 if (HuffTable[rightNo].LeafDepth != 0)
1305 {
1306 HuffUpdateParentDepth((u16)HuffTable[rightNo].ChNo[0], (u16)HuffTable[rightNo].ChNo[1]);
1307 }
1308 }
1309
1310 //-----------------------------------------------------------------------
1311 // Create Huffman code
1312 //-----------------------------------------------------------------------
HuffMakeCode(u16 nodeNo,u32 paHuffCode)1313 static void HuffMakeCode(u16 nodeNo, u32 paHuffCode)
1314 {
1315 HuffTable[nodeNo].HuffCode = (paHuffCode << 1) | HuffTable[nodeNo].Bit;
1316
1317 if (HuffTable[nodeNo].LeafDepth != 0)
1318 {
1319 HuffMakeCode((u16)HuffTable[nodeNo].ChNo[0], HuffTable[nodeNo].HuffCode);
1320 HuffMakeCode((u16)HuffTable[nodeNo].ChNo[1], HuffTable[nodeNo].HuffCode);
1321 }
1322 }
1323
1324 //-----------------------------------------------------------------------
1325 // Data volume required by intermediate node to create HuffTree
1326 //-----------------------------------------------------------------------
HuffCountHWord(u16 nodeNo)1327 static u8 HuffCountHWord(u16 nodeNo)
1328 {
1329 u8 leftHWord, rightHWord;
1330
1331 switch (HuffTable[nodeNo].LeafDepth)
1332 {
1333 case 0:
1334 return 0;
1335 case 1:
1336 leftHWord = rightHWord = 0;
1337 break;
1338 default:
1339 leftHWord = HuffCountHWord((u16)HuffTable[nodeNo].ChNo[0]);
1340 rightHWord = HuffCountHWord((u16)HuffTable[nodeNo].ChNo[1]);
1341 break;
1342 }
1343
1344 HuffTable[nodeNo].HWord = (u16)(leftHWord + rightHWord + 1);
1345 return (u8)(leftHWord + rightHWord + 1);
1346 }
1347