using System; using ICSharpCode.SharpZipLib.Checksum; namespace ICSharpCode.SharpZipLib.Zip.Compression { /// /// Strategies for deflater /// public enum DeflateStrategy { /// /// The default strategy /// Default = 0, /// /// This strategy will only allow longer string repetitions. It is /// useful for random data with a small character set. /// Filtered = 1, /// /// This strategy will not look for string repetitions at all. It /// only encodes with Huffman trees (which means, that more common /// characters get a smaller encoding. /// HuffmanOnly = 2 } // DEFLATE ALGORITHM: // // The uncompressed stream is inserted into the window array. When // the window array is full the first half is thrown away and the // second half is copied to the beginning. // // The head array is a hash table. Three characters build a hash value // and they the value points to the corresponding index in window of // the last string with this hash. The prev array implements a // linked list of matches with the same hash: prev[index & WMASK] points // to the previous index with the same hash. // /// /// Low level compression engine for deflate algorithm which uses a 32K sliding window /// with secondary compression from Huffman/Shannon-Fano codes. /// public class DeflaterEngine { #region Constants const int TooFar = 4096; #endregion #region Constructors /// /// Construct instance with pending buffer /// /// /// Pending buffer to use /// > public DeflaterEngine(DeflaterPending pending) { this.pending = pending; huffman = new DeflaterHuffman(pending); adler = new Adler32(); window = new byte[2 * DeflaterConstants.WSIZE]; head = new short[DeflaterConstants.HASH_SIZE]; prev = new short[DeflaterConstants.WSIZE]; // We start at index 1, to avoid an implementation deficiency, that // we cannot build a repeat pattern at index 0. blockStart = strstart = 1; } #endregion /// /// Deflate drives actual compression of data /// /// True to flush input buffers /// Finish deflation with the current input. /// Returns true if progress has been made. public bool Deflate(bool flush, bool finish) { bool progress; do { FillWindow(); bool canFlush = flush && (inputOff == inputEnd); #if DebugDeflation if (DeflaterConstants.DEBUGGING) { Console.WriteLine("window: [" + blockStart + "," + strstart + "," + lookahead + "], " + compressionFunction + "," + canFlush); } #endif switch (compressionFunction) { case DeflaterConstants.DEFLATE_STORED: progress = DeflateStored(canFlush, finish); break; case DeflaterConstants.DEFLATE_FAST: progress = DeflateFast(canFlush, finish); break; case DeflaterConstants.DEFLATE_SLOW: progress = DeflateSlow(canFlush, finish); break; default: throw new InvalidOperationException("unknown compressionFunction"); } } while (pending.IsFlushed && progress); // repeat while we have no pending output and progress was made return progress; } /// /// Sets input data to be deflated. Should only be called when NeedsInput() /// returns true /// /// The buffer containing input data. /// The offset of the first byte of data. /// The number of bytes of data to use as input. public void SetInput(byte[] buffer, int offset, int count) { if (buffer == null) { throw new ArgumentNullException("nameof(buffer)"); } if (offset < 0) { throw new ArgumentOutOfRangeException("nameof(offset)"); } if (count < 0) { throw new ArgumentOutOfRangeException("nameof(count)"); } if (inputOff < inputEnd) { throw new InvalidOperationException("Old input was not completely processed"); } int end = offset + count; /* We want to throw an ArrayIndexOutOfBoundsException early. The * check is very tricky: it also handles integer wrap around. */ if ((offset > end) || (end > buffer.Length)) { throw new ArgumentOutOfRangeException("nameof(count)"); } inputBuf = buffer; inputOff = offset; inputEnd = end; } /// /// Determines if more input is needed. /// /// Return true if input is needed via SetInput public bool NeedsInput() { return (inputEnd == inputOff); } /// /// Set compression dictionary /// /// The buffer containing the dictionary data /// The offset in the buffer for the first byte of data /// The length of the dictionary data. public void SetDictionary(byte[] buffer, int offset, int length) { #if DebugDeflation if (DeflaterConstants.DEBUGGING && (strstart != 1) ) { throw new InvalidOperationException("strstart not 1"); } #endif adler.Update(buffer, offset, length); if (length < DeflaterConstants.MIN_MATCH) { return; } if (length > DeflaterConstants.MAX_DIST) { offset += length - DeflaterConstants.MAX_DIST; length = DeflaterConstants.MAX_DIST; } System.Array.Copy(buffer, offset, window, strstart, length); UpdateHash(); --length; while (--length > 0) { InsertString(); strstart++; } strstart += 2; blockStart = strstart; } /// /// Reset internal state /// public void Reset() { huffman.Reset(); adler.Reset(); blockStart = strstart = 1; lookahead = 0; totalIn = 0; prevAvailable = false; matchLen = DeflaterConstants.MIN_MATCH - 1; for (int i = 0; i < DeflaterConstants.HASH_SIZE; i++) { head[i] = 0; } for (int i = 0; i < DeflaterConstants.WSIZE; i++) { prev[i] = 0; } } /// /// Reset Adler checksum /// public void ResetAdler() { adler.Reset(); } /// /// Get current value of Adler checksum /// public int Adler { get { return unchecked((int)adler.Value); } } /// /// Total data processed /// public long TotalIn { get { return totalIn; } } /// /// Get/set the deflate strategy /// public DeflateStrategy Strategy { get { return strategy; } set { strategy = value; } } /// /// Set the deflate level (0-9) /// /// The value to set the level to. public void SetLevel(int level) { if ((level < 0) || (level > 9)) { throw new ArgumentOutOfRangeException("nameof(level)"); } goodLength = DeflaterConstants.GOOD_LENGTH[level]; max_lazy = DeflaterConstants.MAX_LAZY[level]; niceLength = DeflaterConstants.NICE_LENGTH[level]; max_chain = DeflaterConstants.MAX_CHAIN[level]; if (DeflaterConstants.COMPR_FUNC[level] != compressionFunction) { #if DebugDeflation if (DeflaterConstants.DEBUGGING) { Console.WriteLine("Change from " + compressionFunction + " to " + DeflaterConstants.COMPR_FUNC[level]); } #endif switch (compressionFunction) { case DeflaterConstants.DEFLATE_STORED: if (strstart > blockStart) { huffman.FlushStoredBlock(window, blockStart, strstart - blockStart, false); blockStart = strstart; } UpdateHash(); break; case DeflaterConstants.DEFLATE_FAST: if (strstart > blockStart) { huffman.FlushBlock(window, blockStart, strstart - blockStart, false); blockStart = strstart; } break; case DeflaterConstants.DEFLATE_SLOW: if (prevAvailable) { huffman.TallyLit(window[strstart - 1] & 0xff); } if (strstart > blockStart) { huffman.FlushBlock(window, blockStart, strstart - blockStart, false); blockStart = strstart; } prevAvailable = false; matchLen = DeflaterConstants.MIN_MATCH - 1; break; } compressionFunction = DeflaterConstants.COMPR_FUNC[level]; } } /// /// Fill the window /// public void FillWindow() { /* If the window is almost full and there is insufficient lookahead, * move the upper half to the lower one to make room in the upper half. */ if (strstart >= DeflaterConstants.WSIZE + DeflaterConstants.MAX_DIST) { SlideWindow(); } /* If there is not enough lookahead, but still some input left, * read in the input */ if (lookahead < DeflaterConstants.MIN_LOOKAHEAD && inputOff < inputEnd) { int more = 2 * DeflaterConstants.WSIZE - lookahead - strstart; if (more > inputEnd - inputOff) { more = inputEnd - inputOff; } System.Array.Copy(inputBuf, inputOff, window, strstart + lookahead, more); adler.Update(inputBuf, inputOff, more); inputOff += more; totalIn += more; lookahead += more; } if (lookahead >= DeflaterConstants.MIN_MATCH) { UpdateHash(); } } void UpdateHash() { /* if (DEBUGGING) { Console.WriteLine("updateHash: "+strstart); } */ ins_h = (window[strstart] << DeflaterConstants.HASH_SHIFT) ^ window[strstart + 1]; } /// /// Inserts the current string in the head hash and returns the previous /// value for this hash. /// /// The previous hash value int InsertString() { short match; int hash = ((ins_h << DeflaterConstants.HASH_SHIFT) ^ window[strstart + (DeflaterConstants.MIN_MATCH - 1)]) & DeflaterConstants.HASH_MASK; #if DebugDeflation if (DeflaterConstants.DEBUGGING) { if (hash != (((window[strstart] << (2*HASH_SHIFT)) ^ (window[strstart + 1] << HASH_SHIFT) ^ (window[strstart + 2])) & HASH_MASK)) { throw new SharpZipBaseException("hash inconsistent: " + hash + "/" +window[strstart] + "," +window[strstart + 1] + "," +window[strstart + 2] + "," + HASH_SHIFT); } } #endif prev[strstart & DeflaterConstants.WMASK] = match = head[hash]; head[hash] = unchecked((short)strstart); ins_h = hash; return match & 0xffff; } void SlideWindow() { Array.Copy(window, DeflaterConstants.WSIZE, window, 0, DeflaterConstants.WSIZE); matchStart -= DeflaterConstants.WSIZE; strstart -= DeflaterConstants.WSIZE; blockStart -= DeflaterConstants.WSIZE; // Slide the hash table (could be avoided with 32 bit values // at the expense of memory usage). for (int i = 0; i < DeflaterConstants.HASH_SIZE; ++i) { int m = head[i] & 0xffff; head[i] = (short)(m >= DeflaterConstants.WSIZE ? (m - DeflaterConstants.WSIZE) : 0); } // Slide the prev table. for (int i = 0; i < DeflaterConstants.WSIZE; i++) { int m = prev[i] & 0xffff; prev[i] = (short)(m >= DeflaterConstants.WSIZE ? (m - DeflaterConstants.WSIZE) : 0); } } /// /// Find the best (longest) string in the window matching the /// string starting at strstart. /// /// Preconditions: /// /// strstart + DeflaterConstants.MAX_MATCH <= window.length. /// /// /// True if a match greater than the minimum length is found bool FindLongestMatch( int curMatch ) { int match; int scan = strstart; // scanMax is the highest position that we can look at int scanMax = scan + Math.Min( DeflaterConstants.MAX_MATCH, lookahead ) - 1; int limit = Math.Max( scan - DeflaterConstants.MAX_DIST, 0 ); byte[] window = this.window; short[] prev = this.prev; int chainLength = this.max_chain; int niceLength = Math.Min( this.niceLength, lookahead ); matchLen = Math.Max( matchLen, DeflaterConstants.MIN_MATCH - 1 ); if (scan + matchLen > scanMax) return false; byte scan_end1 = window[scan + matchLen - 1]; byte scan_end = window[scan + matchLen]; // Do not waste too much time if we already have a good match: if (matchLen >= this.goodLength) chainLength >>= 2; do { match = curMatch; scan = strstart; if (window[match + matchLen] != scan_end || window[match + matchLen - 1] != scan_end1 || window[match] != window[scan] || window[++match] != window[++scan]) { continue; } // scan is set to strstart+1 and the comparison passed, so // scanMax - scan is the maximum number of bytes we can compare. // below we compare 8 bytes at a time, so first we compare // (scanMax - scan) % 8 bytes, so the remainder is a multiple of 8 switch( (scanMax - scan) % 8 ) { case 1: if (window[++scan] == window[++match]) break; break; case 2: if (window[++scan] == window[++match] && window[++scan] == window[++match]) break; break; case 3: if (window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match]) break; break; case 4: if (window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match]) break; break; case 5: if (window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match]) break; break; case 6: if (window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match]) break; break; case 7: if (window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match]) break; break; } if (window[scan] == window[match]) { /* We check for insufficient lookahead only every 8th comparison; * the 256th check will be made at strstart + 258 unless lookahead is * exhausted first. */ do { if (scan == scanMax) { ++scan; // advance to first position not matched ++match; break; } } while (window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match] && window[++scan] == window[++match]); } if (scan - strstart > matchLen) { #if DebugDeflation if (DeflaterConstants.DEBUGGING && (ins_h == 0) ) Console.Error.WriteLine("Found match: " + curMatch + "-" + (scan - strstart)); #endif matchStart = curMatch; matchLen = scan - strstart; if (matchLen >= niceLength) break; scan_end1 = window[scan - 1]; scan_end = window[scan]; } } while ((curMatch = (prev[curMatch & DeflaterConstants.WMASK] & 0xffff)) > limit && 0 != --chainLength ); return matchLen >= DeflaterConstants.MIN_MATCH; } bool DeflateStored(bool flush, bool finish) { if (!flush && (lookahead == 0)) { return false; } strstart += lookahead; lookahead = 0; int storedLength = strstart - blockStart; if ((storedLength >= DeflaterConstants.MAX_BLOCK_SIZE) || // Block is full (blockStart < DeflaterConstants.WSIZE && storedLength >= DeflaterConstants.MAX_DIST) || // Block may move out of window flush) { bool lastBlock = finish; if (storedLength > DeflaterConstants.MAX_BLOCK_SIZE) { storedLength = DeflaterConstants.MAX_BLOCK_SIZE; lastBlock = false; } #if DebugDeflation if (DeflaterConstants.DEBUGGING) { Console.WriteLine("storedBlock[" + storedLength + "," + lastBlock + "]"); } #endif huffman.FlushStoredBlock(window, blockStart, storedLength, lastBlock); blockStart += storedLength; return !lastBlock; } return true; } bool DeflateFast(bool flush, bool finish) { if (lookahead < DeflaterConstants.MIN_LOOKAHEAD && !flush) { return false; } while (lookahead >= DeflaterConstants.MIN_LOOKAHEAD || flush) { if (lookahead == 0) { // We are flushing everything huffman.FlushBlock(window, blockStart, strstart - blockStart, finish); blockStart = strstart; return false; } if (strstart > 2 * DeflaterConstants.WSIZE - DeflaterConstants.MIN_LOOKAHEAD) { /* slide window, as FindLongestMatch needs this. * This should only happen when flushing and the window * is almost full. */ SlideWindow(); } int hashHead; if (lookahead >= DeflaterConstants.MIN_MATCH && (hashHead = InsertString()) != 0 && strategy != DeflateStrategy.HuffmanOnly && strstart - hashHead <= DeflaterConstants.MAX_DIST && FindLongestMatch(hashHead)) { // longestMatch sets matchStart and matchLen #if DebugDeflation if (DeflaterConstants.DEBUGGING) { for (int i = 0 ; i < matchLen; i++) { if (window[strstart + i] != window[matchStart + i]) { throw new SharpZipBaseException("Match failure"); } } } #endif bool full = huffman.TallyDist(strstart - matchStart, matchLen); lookahead -= matchLen; if (matchLen <= max_lazy && lookahead >= DeflaterConstants.MIN_MATCH) { while (--matchLen > 0) { ++strstart; InsertString(); } ++strstart; } else { strstart += matchLen; if (lookahead >= DeflaterConstants.MIN_MATCH - 1) { UpdateHash(); } } matchLen = DeflaterConstants.MIN_MATCH - 1; if (!full) { continue; } } else { // No match found huffman.TallyLit(window[strstart] & 0xff); ++strstart; --lookahead; } if (huffman.IsFull()) { bool lastBlock = finish && (lookahead == 0); huffman.FlushBlock(window, blockStart, strstart - blockStart, lastBlock); blockStart = strstart; return !lastBlock; } } return true; } bool DeflateSlow(bool flush, bool finish) { if (lookahead < DeflaterConstants.MIN_LOOKAHEAD && !flush) { return false; } while (lookahead >= DeflaterConstants.MIN_LOOKAHEAD || flush) { if (lookahead == 0) { if (prevAvailable) { huffman.TallyLit(window[strstart - 1] & 0xff); } prevAvailable = false; // We are flushing everything #if DebugDeflation if (DeflaterConstants.DEBUGGING && !flush) { throw new SharpZipBaseException("Not flushing, but no lookahead"); } #endif huffman.FlushBlock(window, blockStart, strstart - blockStart, finish); blockStart = strstart; return false; } if (strstart >= 2 * DeflaterConstants.WSIZE - DeflaterConstants.MIN_LOOKAHEAD) { /* slide window, as FindLongestMatch needs this. * This should only happen when flushing and the window * is almost full. */ SlideWindow(); } int prevMatch = matchStart; int prevLen = matchLen; if (lookahead >= DeflaterConstants.MIN_MATCH) { int hashHead = InsertString(); if (strategy != DeflateStrategy.HuffmanOnly && hashHead != 0 && strstart - hashHead <= DeflaterConstants.MAX_DIST && FindLongestMatch(hashHead)) { // longestMatch sets matchStart and matchLen // Discard match if too small and too far away if (matchLen <= 5 && (strategy == DeflateStrategy.Filtered || (matchLen == DeflaterConstants.MIN_MATCH && strstart - matchStart > TooFar))) { matchLen = DeflaterConstants.MIN_MATCH - 1; } } } // previous match was better if ((prevLen >= DeflaterConstants.MIN_MATCH) && (matchLen <= prevLen)) { #if DebugDeflation if (DeflaterConstants.DEBUGGING) { for (int i = 0 ; i < matchLen; i++) { if (window[strstart-1+i] != window[prevMatch + i]) throw new SharpZipBaseException(); } } #endif huffman.TallyDist(strstart - 1 - prevMatch, prevLen); prevLen -= 2; do { strstart++; lookahead--; if (lookahead >= DeflaterConstants.MIN_MATCH) { InsertString(); } } while (--prevLen > 0); strstart++; lookahead--; prevAvailable = false; matchLen = DeflaterConstants.MIN_MATCH - 1; } else { if (prevAvailable) { huffman.TallyLit(window[strstart - 1] & 0xff); } prevAvailable = true; strstart++; lookahead--; } if (huffman.IsFull()) { int len = strstart - blockStart; if (prevAvailable) { len--; } bool lastBlock = (finish && (lookahead == 0) && !prevAvailable); huffman.FlushBlock(window, blockStart, len, lastBlock); blockStart += len; return !lastBlock; } } return true; } #region Instance Fields // Hash index of string to be inserted int ins_h; /// /// Hashtable, hashing three characters to an index for window, so /// that window[index]..window[index+2] have this hash code. /// Note that the array should really be unsigned short, so you need /// to and the values with 0xffff. /// short[] head; /// /// prev[index & WMASK] points to the previous index that has the /// same hash code as the string starting at index. This way /// entries with the same hash code are in a linked list. /// Note that the array should really be unsigned short, so you need /// to and the values with 0xffff. /// short[] prev; int matchStart; // Length of best match int matchLen; // Set if previous match exists bool prevAvailable; int blockStart; /// /// Points to the current character in the window. /// int strstart; /// /// lookahead is the number of characters starting at strstart in /// window that are valid. /// So window[strstart] until window[strstart+lookahead-1] are valid /// characters. /// int lookahead; /// /// This array contains the part of the uncompressed stream that /// is of relevance. The current character is indexed by strstart. /// byte[] window; DeflateStrategy strategy; int max_chain, max_lazy, niceLength, goodLength; /// /// The current compression function. /// int compressionFunction; /// /// The input data for compression. /// byte[] inputBuf; /// /// The total bytes of input read. /// long totalIn; /// /// The offset into inputBuf, where input data starts. /// int inputOff; /// /// The end offset of the input data. /// int inputEnd; DeflaterPending pending; DeflaterHuffman huffman; /// /// The adler checksum /// Adler32 adler; #endregion } }