LzwInputStream.cs 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538
  1. using System;
  2. using System.IO;
  3. namespace ICSharpCode.SharpZipLib.Lzw
  4. {
  5. /// <summary>
  6. /// This filter stream is used to decompress a LZW format stream.
  7. /// Specifically, a stream that uses the LZC compression method.
  8. /// This file format is usually associated with the .Z file extension.
  9. ///
  10. /// See http://en.wikipedia.org/wiki/Compress
  11. /// See http://wiki.wxwidgets.org/Development:_Z_File_Format
  12. ///
  13. /// The file header consists of 3 (or optionally 4) bytes. The first two bytes
  14. /// contain the magic marker "0x1f 0x9d", followed by a byte of flags.
  15. ///
  16. /// Based on Java code by Ronald Tschalar, which in turn was based on the unlzw.c
  17. /// code in the gzip package.
  18. /// </summary>
  19. /// <example> This sample shows how to unzip a compressed file
  20. /// <code>
  21. /// using System;
  22. /// using System.IO;
  23. ///
  24. /// using ICSharpCode.SharpZipLib.Core;
  25. /// using ICSharpCode.SharpZipLib.LZW;
  26. ///
  27. /// class MainClass
  28. /// {
  29. /// public static void Main(string[] args)
  30. /// {
  31. /// using (Stream inStream = new LzwInputStream(File.OpenRead(args[0])))
  32. /// using (FileStream outStream = File.Create(Path.GetFileNameWithoutExtension(args[0]))) {
  33. /// byte[] buffer = new byte[4096];
  34. /// StreamUtils.Copy(inStream, outStream, buffer);
  35. /// // OR
  36. /// inStream.Read(buffer, 0, buffer.Length);
  37. /// // now do something with the buffer
  38. /// }
  39. /// }
  40. /// }
  41. /// </code>
  42. /// </example>
  43. public class LzwInputStream : Stream
  44. {
  45. /// <summary>
  46. /// Gets or sets a flag indicating ownership of underlying stream.
  47. /// When the flag is true <see cref="Stream.Dispose()" /> will close the underlying stream also.
  48. /// </summary>
  49. /// <remarks>The default value is true.</remarks>
  50. private bool isStreamOwner = true;
  51. public bool IsStreamOwner
  52. {
  53. get { return isStreamOwner;}
  54. set { isStreamOwner = value; }
  55. }
  56. /// <summary>
  57. /// Creates a LzwInputStream
  58. /// </summary>
  59. /// <param name="baseInputStream">
  60. /// The stream to read compressed data from (baseInputStream LZW format)
  61. /// </param>
  62. public LzwInputStream(Stream baseInputStream)
  63. {
  64. this.baseInputStream = baseInputStream;
  65. }
  66. /// <summary>
  67. /// See <see cref="System.IO.Stream.ReadByte"/>
  68. /// </summary>
  69. /// <returns></returns>
  70. public override int ReadByte()
  71. {
  72. int b = Read(one, 0, 1);
  73. if (b == 1)
  74. return (one[0] & 0xff);
  75. return -1;
  76. }
  77. /// <summary>
  78. /// Reads decompressed data into the provided buffer byte array
  79. /// </summary>
  80. /// <param name ="buffer">
  81. /// The array to read and decompress data into
  82. /// </param>
  83. /// <param name ="offset">
  84. /// The offset indicating where the data should be placed
  85. /// </param>
  86. /// <param name ="count">
  87. /// The number of bytes to decompress
  88. /// </param>
  89. /// <returns>The number of bytes read. Zero signals the end of stream</returns>
  90. public override int Read(byte[] buffer, int offset, int count)
  91. {
  92. if (!headerParsed)
  93. ParseHeader();
  94. if (eof)
  95. return 0;
  96. int start = offset;
  97. /* Using local copies of various variables speeds things up by as
  98. * much as 30% in Java! Performance not tested in C#.
  99. */
  100. int[] lTabPrefix = tabPrefix;
  101. byte[] lTabSuffix = tabSuffix;
  102. byte[] lStack = stack;
  103. int lNBits = nBits;
  104. int lMaxCode = maxCode;
  105. int lMaxMaxCode = maxMaxCode;
  106. int lBitMask = bitMask;
  107. int lOldCode = oldCode;
  108. byte lFinChar = finChar;
  109. int lStackP = stackP;
  110. int lFreeEnt = freeEnt;
  111. byte[] lData = data;
  112. int lBitPos = bitPos;
  113. // empty stack if stuff still left
  114. int sSize = lStack.Length - lStackP;
  115. if (sSize > 0) {
  116. int num = (sSize >= count) ? count : sSize;
  117. Array.Copy(lStack, lStackP, buffer, offset, num);
  118. offset += num;
  119. count -= num;
  120. lStackP += num;
  121. }
  122. if (count == 0) {
  123. stackP = lStackP;
  124. return offset - start;
  125. }
  126. // loop, filling local buffer until enough data has been decompressed
  127. MainLoop:
  128. do {
  129. if (end < EXTRA) {
  130. Fill();
  131. }
  132. int bitIn = (got > 0) ? (end - end % lNBits) << 3 :
  133. (end << 3) - (lNBits - 1);
  134. while (lBitPos < bitIn) {
  135. #region A
  136. // handle 1-byte reads correctly
  137. if (count == 0) {
  138. nBits = lNBits;
  139. maxCode = lMaxCode;
  140. maxMaxCode = lMaxMaxCode;
  141. bitMask = lBitMask;
  142. oldCode = lOldCode;
  143. finChar = lFinChar;
  144. stackP = lStackP;
  145. freeEnt = lFreeEnt;
  146. bitPos = lBitPos;
  147. return offset - start;
  148. }
  149. // check for code-width expansion
  150. if (lFreeEnt > lMaxCode) {
  151. int nBytes = lNBits << 3;
  152. lBitPos = (lBitPos - 1) +
  153. nBytes - (lBitPos - 1 + nBytes) % nBytes;
  154. lNBits++;
  155. lMaxCode = (lNBits == maxBits) ? lMaxMaxCode :
  156. (1 << lNBits) - 1;
  157. lBitMask = (1 << lNBits) - 1;
  158. lBitPos = ResetBuf(lBitPos);
  159. goto MainLoop;
  160. }
  161. #endregion
  162. #region B
  163. // read next code
  164. int pos = lBitPos >> 3;
  165. int code = (((lData[pos] & 0xFF) |
  166. ((lData[pos + 1] & 0xFF) << 8) |
  167. ((lData[pos + 2] & 0xFF) << 16)) >>
  168. (lBitPos & 0x7)) & lBitMask;
  169. lBitPos += lNBits;
  170. // handle first iteration
  171. if (lOldCode == -1) {
  172. if (code >= 256)
  173. throw new LzwException("corrupt input: " + code + " > 255");
  174. lFinChar = (byte)(lOldCode = code);
  175. buffer[offset++] = lFinChar;
  176. count--;
  177. continue;
  178. }
  179. // handle CLEAR code
  180. if (code == TBL_CLEAR && blockMode) {
  181. Array.Copy(zeros, 0, lTabPrefix, 0, zeros.Length);
  182. lFreeEnt = TBL_FIRST - 1;
  183. int nBytes = lNBits << 3;
  184. lBitPos = (lBitPos - 1) + nBytes - (lBitPos - 1 + nBytes) % nBytes;
  185. lNBits = LzwConstants.INIT_BITS;
  186. lMaxCode = (1 << lNBits) - 1;
  187. lBitMask = lMaxCode;
  188. // Code tables reset
  189. lBitPos = ResetBuf(lBitPos);
  190. goto MainLoop;
  191. }
  192. #endregion
  193. #region C
  194. // setup
  195. int inCode = code;
  196. lStackP = lStack.Length;
  197. // Handle KwK case
  198. if (code >= lFreeEnt) {
  199. if (code > lFreeEnt) {
  200. throw new LzwException("corrupt input: code=" + code +
  201. ", freeEnt=" + lFreeEnt);
  202. }
  203. lStack[--lStackP] = lFinChar;
  204. code = lOldCode;
  205. }
  206. // Generate output characters in reverse order
  207. while (code >= 256) {
  208. lStack[--lStackP] = lTabSuffix[code];
  209. code = lTabPrefix[code];
  210. }
  211. lFinChar = lTabSuffix[code];
  212. buffer[offset++] = lFinChar;
  213. count--;
  214. // And put them out in forward order
  215. sSize = lStack.Length - lStackP;
  216. int num = (sSize >= count) ? count : sSize;
  217. Array.Copy(lStack, lStackP, buffer, offset, num);
  218. offset += num;
  219. count -= num;
  220. lStackP += num;
  221. #endregion
  222. #region D
  223. // generate new entry in table
  224. if (lFreeEnt < lMaxMaxCode) {
  225. lTabPrefix[lFreeEnt] = lOldCode;
  226. lTabSuffix[lFreeEnt] = lFinChar;
  227. lFreeEnt++;
  228. }
  229. // Remember previous code
  230. lOldCode = inCode;
  231. // if output buffer full, then return
  232. if (count == 0) {
  233. nBits = lNBits;
  234. maxCode = lMaxCode;
  235. bitMask = lBitMask;
  236. oldCode = lOldCode;
  237. finChar = lFinChar;
  238. stackP = lStackP;
  239. freeEnt = lFreeEnt;
  240. bitPos = lBitPos;
  241. return offset - start;
  242. }
  243. #endregion
  244. } // while
  245. lBitPos = ResetBuf(lBitPos);
  246. } while (got > 0); // do..while
  247. nBits = lNBits;
  248. maxCode = lMaxCode;
  249. bitMask = lBitMask;
  250. oldCode = lOldCode;
  251. finChar = lFinChar;
  252. stackP = lStackP;
  253. freeEnt = lFreeEnt;
  254. bitPos = lBitPos;
  255. eof = true;
  256. return offset - start;
  257. }
  258. /// <summary>
  259. /// Moves the unread data in the buffer to the beginning and resets
  260. /// the pointers.
  261. /// </summary>
  262. /// <param name="bitPosition"></param>
  263. /// <returns></returns>
  264. private int ResetBuf(int bitPosition)
  265. {
  266. int pos = bitPosition >> 3;
  267. Array.Copy(data, pos, data, 0, end - pos);
  268. end -= pos;
  269. return 0;
  270. }
  271. private void Fill()
  272. {
  273. got = baseInputStream.Read(data, end, data.Length - 1 - end);
  274. if (got > 0) {
  275. end += got;
  276. }
  277. }
  278. private void ParseHeader()
  279. {
  280. headerParsed = true;
  281. byte[] hdr = new byte[LzwConstants.HDR_SIZE];
  282. int result = baseInputStream.Read(hdr, 0, hdr.Length);
  283. // Check the magic marker
  284. if (result < 0)
  285. throw new LzwException("Failed to read LZW header");
  286. if (hdr[0] != (LzwConstants.MAGIC >> 8) || hdr[1] != (LzwConstants.MAGIC & 0xff)) {
  287. throw new LzwException(String.Format(
  288. "Wrong LZW header. Magic bytes don't match. 0x{0:x2} 0x{1:x2}",
  289. hdr[0], hdr[1]));
  290. }
  291. // Check the 3rd header byte
  292. blockMode = (hdr[2] & LzwConstants.BLOCK_MODE_MASK) > 0;
  293. maxBits = hdr[2] & LzwConstants.BIT_MASK;
  294. if (maxBits > LzwConstants.MAX_BITS) {
  295. throw new LzwException("Stream compressed with " + maxBits +
  296. " bits, but decompression can only handle " +
  297. LzwConstants.MAX_BITS + " bits.");
  298. }
  299. if ((hdr[2] & LzwConstants.RESERVED_MASK) > 0) {
  300. throw new LzwException("Unsupported bits set in the header.");
  301. }
  302. // Initialize variables
  303. maxMaxCode = 1 << maxBits;
  304. nBits = LzwConstants.INIT_BITS;
  305. maxCode = (1 << nBits) - 1;
  306. bitMask = maxCode;
  307. oldCode = -1;
  308. finChar = 0;
  309. freeEnt = blockMode ? TBL_FIRST : 256;
  310. tabPrefix = new int[1 << maxBits];
  311. tabSuffix = new byte[1 << maxBits];
  312. stack = new byte[1 << maxBits];
  313. stackP = stack.Length;
  314. for (int idx = 255; idx >= 0; idx--)
  315. tabSuffix[idx] = (byte)idx;
  316. }
  317. #region Stream Overrides
  318. /// <summary>
  319. /// Gets a value indicating whether the current stream supports reading
  320. /// </summary>
  321. public override bool CanRead {
  322. get {
  323. return baseInputStream.CanRead;
  324. }
  325. }
  326. /// <summary>
  327. /// Gets a value of false indicating seeking is not supported for this stream.
  328. /// </summary>
  329. public override bool CanSeek {
  330. get {
  331. return false;
  332. }
  333. }
  334. /// <summary>
  335. /// Gets a value of false indicating that this stream is not writeable.
  336. /// </summary>
  337. public override bool CanWrite {
  338. get {
  339. return false;
  340. }
  341. }
  342. /// <summary>
  343. /// A value representing the length of the stream in bytes.
  344. /// </summary>
  345. public override long Length {
  346. get {
  347. return got;
  348. }
  349. }
  350. /// <summary>
  351. /// The current position within the stream.
  352. /// Throws a NotSupportedException when attempting to set the position
  353. /// </summary>
  354. /// <exception cref="NotSupportedException">Attempting to set the position</exception>
  355. public override long Position {
  356. get {
  357. return baseInputStream.Position;
  358. }
  359. set {
  360. throw new NotSupportedException("InflaterInputStream Position not supported");
  361. }
  362. }
  363. /// <summary>
  364. /// Flushes the baseInputStream
  365. /// </summary>
  366. public override void Flush()
  367. {
  368. baseInputStream.Flush();
  369. }
  370. /// <summary>
  371. /// Sets the position within the current stream
  372. /// Always throws a NotSupportedException
  373. /// </summary>
  374. /// <param name="offset">The relative offset to seek to.</param>
  375. /// <param name="origin">The <see cref="SeekOrigin"/> defining where to seek from.</param>
  376. /// <returns>The new position in the stream.</returns>
  377. /// <exception cref="NotSupportedException">Any access</exception>
  378. public override long Seek(long offset, SeekOrigin origin)
  379. {
  380. throw new NotSupportedException("Seek not supported");
  381. }
  382. /// <summary>
  383. /// Set the length of the current stream
  384. /// Always throws a NotSupportedException
  385. /// </summary>
  386. /// <param name="value">The new length value for the stream.</param>
  387. /// <exception cref="NotSupportedException">Any access</exception>
  388. public override void SetLength(long value)
  389. {
  390. throw new NotSupportedException("InflaterInputStream SetLength not supported");
  391. }
  392. /// <summary>
  393. /// Writes a sequence of bytes to stream and advances the current position
  394. /// This method always throws a NotSupportedException
  395. /// </summary>
  396. /// <param name="buffer">Thew buffer containing data to write.</param>
  397. /// <param name="offset">The offset of the first byte to write.</param>
  398. /// <param name="count">The number of bytes to write.</param>
  399. /// <exception cref="NotSupportedException">Any access</exception>
  400. public override void Write(byte[] buffer, int offset, int count)
  401. {
  402. throw new NotSupportedException("InflaterInputStream Write not supported");
  403. }
  404. /// <summary>
  405. /// Writes one byte to the current stream and advances the current position
  406. /// Always throws a NotSupportedException
  407. /// </summary>
  408. /// <param name="value">The byte to write.</param>
  409. /// <exception cref="NotSupportedException">Any access</exception>
  410. public override void WriteByte(byte value)
  411. {
  412. throw new NotSupportedException("InflaterInputStream WriteByte not supported");
  413. }
  414. /// <summary>
  415. /// Closes the input stream. When <see cref="IsStreamOwner"></see>
  416. /// is true the underlying stream is also closed.
  417. /// </summary>
  418. protected override void Dispose(bool disposing)
  419. {
  420. if (!isClosed) {
  421. isClosed = true;
  422. if (IsStreamOwner) {
  423. baseInputStream.Dispose();
  424. }
  425. }
  426. }
  427. #endregion
  428. #region Instance Fields
  429. Stream baseInputStream;
  430. /// <summary>
  431. /// Flag indicating wether this instance has been closed or not.
  432. /// </summary>
  433. bool isClosed;
  434. readonly byte[] one = new byte[1];
  435. bool headerParsed;
  436. // string table stuff
  437. private const int TBL_CLEAR = 0x100;
  438. private const int TBL_FIRST = TBL_CLEAR + 1;
  439. private int[] tabPrefix;
  440. private byte[] tabSuffix;
  441. private readonly int[] zeros = new int[256];
  442. private byte[] stack;
  443. // various state
  444. private bool blockMode;
  445. private int nBits;
  446. private int maxBits;
  447. private int maxMaxCode;
  448. private int maxCode;
  449. private int bitMask;
  450. private int oldCode;
  451. private byte finChar;
  452. private int stackP;
  453. private int freeEnt;
  454. // input buffer
  455. private readonly byte[] data = new byte[1024 * 8];
  456. private int bitPos;
  457. private int end;
  458. int got;
  459. private bool eof;
  460. private const int EXTRA = 64;
  461. #endregion
  462. }
  463. }