GzipInputStream.cs 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. using System;
  2. using System.IO;
  3. using ICSharpCode.SharpZipLib.Checksum;
  4. using ICSharpCode.SharpZipLib.Zip.Compression;
  5. using ICSharpCode.SharpZipLib.Zip.Compression.Streams;
  6. namespace ICSharpCode.SharpZipLib.GZip
  7. {
  8. /// <summary>
  9. /// This filter stream is used to decompress a "GZIP" format stream.
  10. /// The "GZIP" format is described baseInputStream RFC 1952.
  11. ///
  12. /// author of the original java version : John Leuner
  13. /// </summary>
  14. /// <example> This sample shows how to unzip a gzipped file
  15. /// <code>
  16. /// using System;
  17. /// using System.IO;
  18. ///
  19. /// using ICSharpCode.SharpZipLib.Core;
  20. /// using ICSharpCode.SharpZipLib.GZip;
  21. ///
  22. /// class MainClass
  23. /// {
  24. /// public static void Main(string[] args)
  25. /// {
  26. /// using (Stream inStream = new GZipInputStream(File.OpenRead(args[0])))
  27. /// using (FileStream outStream = File.Create(Path.GetFileNameWithoutExtension(args[0]))) {
  28. /// byte[] buffer = new byte[4096];
  29. /// StreamUtils.Copy(inStream, outStream, buffer);
  30. /// }
  31. /// }
  32. /// }
  33. /// </code>
  34. /// </example>
  35. public class GZipInputStream : InflaterInputStream
  36. {
  37. #region Instance Fields
  38. /// <summary>
  39. /// CRC-32 value for uncompressed data
  40. /// </summary>
  41. protected Crc32 crc;
  42. /// <summary>
  43. /// Flag to indicate if we've read the GZIP header yet for the current member (block of compressed data).
  44. /// This is tracked per-block as the file is parsed.
  45. /// </summary>
  46. bool readGZIPHeader;
  47. /// <summary>
  48. /// Flag to indicate if at least one block in a stream with concatenated blocks was read successfully.
  49. /// This allows us to exit gracefully if downstream data is not in gzip format.
  50. /// </summary>
  51. bool completedLastBlock;
  52. #endregion
  53. #region Constructors
  54. /// <summary>
  55. /// Creates a GZipInputStream with the default buffer size
  56. /// </summary>
  57. /// <param name="baseInputStream">
  58. /// The stream to read compressed data from (baseInputStream GZIP format)
  59. /// </param>
  60. public GZipInputStream(Stream baseInputStream)
  61. : this(baseInputStream, 4096)
  62. {
  63. }
  64. /// <summary>
  65. /// Creates a GZIPInputStream with the specified buffer size
  66. /// </summary>
  67. /// <param name="baseInputStream">
  68. /// The stream to read compressed data from (baseInputStream GZIP format)
  69. /// </param>
  70. /// <param name="size">
  71. /// Size of the buffer to use
  72. /// </param>
  73. public GZipInputStream(Stream baseInputStream, int size)
  74. : base(baseInputStream, new Inflater(true), size)
  75. {
  76. }
  77. #endregion
  78. #region Stream overrides
  79. /// <summary>
  80. /// Reads uncompressed data into an array of bytes
  81. /// </summary>
  82. /// <param name="buffer">
  83. /// The buffer to read uncompressed data into
  84. /// </param>
  85. /// <param name="offset">
  86. /// The offset indicating where the data should be placed
  87. /// </param>
  88. /// <param name="count">
  89. /// The number of uncompressed bytes to be read
  90. /// </param>
  91. /// <returns>Returns the number of bytes actually read.</returns>
  92. public override int Read(byte[] buffer, int offset, int count)
  93. {
  94. // A GZIP file can contain multiple blocks of compressed data, although this is quite rare.
  95. // A compressed block could potentially be empty, so we need to loop until we reach EOF or
  96. // we find data.
  97. while (true) {
  98. // If we haven't read the header for this block, read it
  99. if (!readGZIPHeader) {
  100. // Try to read header. If there is no header (0 bytes available), this is EOF. If there is
  101. // an incomplete header, this will throw an exception.
  102. try
  103. {
  104. if (!ReadHeader())
  105. {
  106. return 0;
  107. }
  108. }
  109. catch (Exception ex)
  110. {
  111. if (completedLastBlock && (ex is GZipException || ex is EndOfStreamException))
  112. {
  113. // if we completed the last block (i.e. we're in a stream that has multiple blocks concatenated
  114. // we want to return gracefully from any header parsing exceptions since sometimes there may
  115. // be trailing garbage on a stream
  116. return 0;
  117. }
  118. throw;
  119. }
  120. }
  121. // Try to read compressed data
  122. int bytesRead = base.Read(buffer, offset, count);
  123. if (bytesRead > 0) {
  124. crc.Update(buffer, offset, bytesRead);
  125. }
  126. // If this is the end of stream, read the footer
  127. if (inf.IsFinished) {
  128. ReadFooter();
  129. }
  130. if (bytesRead > 0) {
  131. return bytesRead;
  132. }
  133. }
  134. }
  135. #endregion
  136. #region Support routines
  137. bool ReadHeader()
  138. {
  139. // Initialize CRC for this block
  140. crc = new Crc32();
  141. // Make sure there is data in file. We can't rely on ReadLeByte() to fill the buffer, as this could be EOF,
  142. // which is fine, but ReadLeByte() throws an exception if it doesn't find data, so we do this part ourselves.
  143. if (inputBuffer.Available <= 0) {
  144. inputBuffer.Fill();
  145. if (inputBuffer.Available <= 0) {
  146. // No header, EOF.
  147. return false;
  148. }
  149. }
  150. // 1. Check the two magic bytes
  151. var headCRC = new Crc32();
  152. int magic = inputBuffer.ReadLeByte();
  153. if (magic < 0) {
  154. throw new EndOfStreamException("EOS reading GZIP header");
  155. }
  156. headCRC.Update(magic);
  157. if (magic != (GZipConstants.GZIP_MAGIC >> 8)) {
  158. throw new GZipException("Error GZIP header, first magic byte doesn't match");
  159. }
  160. //magic = baseInputStream.ReadByte();
  161. magic = inputBuffer.ReadLeByte();
  162. if (magic < 0) {
  163. throw new EndOfStreamException("EOS reading GZIP header");
  164. }
  165. if (magic != (GZipConstants.GZIP_MAGIC & 0xFF)) {
  166. throw new GZipException("Error GZIP header, second magic byte doesn't match");
  167. }
  168. headCRC.Update(magic);
  169. // 2. Check the compression type (must be 8)
  170. int compressionType = inputBuffer.ReadLeByte();
  171. if (compressionType < 0) {
  172. throw new EndOfStreamException("EOS reading GZIP header");
  173. }
  174. if (compressionType != 8) {
  175. throw new GZipException("Error GZIP header, data not in deflate format");
  176. }
  177. headCRC.Update(compressionType);
  178. // 3. Check the flags
  179. int flags = inputBuffer.ReadLeByte();
  180. if (flags < 0) {
  181. throw new EndOfStreamException("EOS reading GZIP header");
  182. }
  183. headCRC.Update(flags);
  184. /* This flag byte is divided into individual bits as follows:
  185. bit 0 FTEXT
  186. bit 1 FHCRC
  187. bit 2 FEXTRA
  188. bit 3 FNAME
  189. bit 4 FCOMMENT
  190. bit 5 reserved
  191. bit 6 reserved
  192. bit 7 reserved
  193. */
  194. // 3.1 Check the reserved bits are zero
  195. if ((flags & 0xE0) != 0) {
  196. throw new GZipException("Reserved flag bits in GZIP header != 0");
  197. }
  198. // 4.-6. Skip the modification time, extra flags, and OS type
  199. for (int i = 0; i < 6; i++) {
  200. int readByte = inputBuffer.ReadLeByte();
  201. if (readByte < 0) {
  202. throw new EndOfStreamException("EOS reading GZIP header");
  203. }
  204. headCRC.Update(readByte);
  205. }
  206. // 7. Read extra field
  207. if ((flags & GZipConstants.FEXTRA) != 0) {
  208. // XLEN is total length of extra subfields, we will skip them all
  209. int len1, len2;
  210. len1 = inputBuffer.ReadLeByte();
  211. len2 = inputBuffer.ReadLeByte();
  212. if ((len1 < 0) || (len2 < 0)) {
  213. throw new EndOfStreamException("EOS reading GZIP header");
  214. }
  215. headCRC.Update(len1);
  216. headCRC.Update(len2);
  217. int extraLen = (len2 << 8) | len1; // gzip is LSB first
  218. for (int i = 0; i < extraLen; i++) {
  219. int readByte = inputBuffer.ReadLeByte();
  220. if (readByte < 0) {
  221. throw new EndOfStreamException("EOS reading GZIP header");
  222. }
  223. headCRC.Update(readByte);
  224. }
  225. }
  226. // 8. Read file name
  227. if ((flags & GZipConstants.FNAME) != 0) {
  228. int readByte;
  229. while ((readByte = inputBuffer.ReadLeByte()) > 0) {
  230. headCRC.Update(readByte);
  231. }
  232. if (readByte < 0) {
  233. throw new EndOfStreamException("EOS reading GZIP header");
  234. }
  235. headCRC.Update(readByte);
  236. }
  237. // 9. Read comment
  238. if ((flags & GZipConstants.FCOMMENT) != 0) {
  239. int readByte;
  240. while ((readByte = inputBuffer.ReadLeByte()) > 0) {
  241. headCRC.Update(readByte);
  242. }
  243. if (readByte < 0) {
  244. throw new EndOfStreamException("EOS reading GZIP header");
  245. }
  246. headCRC.Update(readByte);
  247. }
  248. // 10. Read header CRC
  249. if ((flags & GZipConstants.FHCRC) != 0) {
  250. int tempByte;
  251. int crcval = inputBuffer.ReadLeByte();
  252. if (crcval < 0) {
  253. throw new EndOfStreamException("EOS reading GZIP header");
  254. }
  255. tempByte = inputBuffer.ReadLeByte();
  256. if (tempByte < 0) {
  257. throw new EndOfStreamException("EOS reading GZIP header");
  258. }
  259. crcval = (crcval << 8) | tempByte;
  260. if (crcval != ((int)headCRC.Value & 0xffff)) {
  261. throw new GZipException("Header CRC value mismatch");
  262. }
  263. }
  264. readGZIPHeader = true;
  265. return true;
  266. }
  267. void ReadFooter()
  268. {
  269. byte[] footer = new byte[8];
  270. // End of stream; reclaim all bytes from inf, read the final byte count, and reset the inflator
  271. long bytesRead = inf.TotalOut & 0xffffffff;
  272. inputBuffer.Available += inf.RemainingInput;
  273. inf.Reset();
  274. // Read footer from inputBuffer
  275. int needed = 8;
  276. while (needed > 0) {
  277. int count = inputBuffer.ReadClearTextBuffer(footer, 8 - needed, needed);
  278. if (count <= 0) {
  279. throw new EndOfStreamException("EOS reading GZIP footer");
  280. }
  281. needed -= count; // Jewel Jan 16
  282. }
  283. // Calculate CRC
  284. int crcval = (footer[0] & 0xff) | ((footer[1] & 0xff) << 8) | ((footer[2] & 0xff) << 16) | (footer[3] << 24);
  285. if (crcval != (int)crc.Value) {
  286. throw new GZipException("GZIP crc sum mismatch, theirs \"" + crcval + "\" and ours \"" + (int)crc.Value);
  287. }
  288. // NOTE The total here is the original total modulo 2 ^ 32.
  289. uint total =
  290. (uint)((uint)footer[4] & 0xff) |
  291. (uint)(((uint)footer[5] & 0xff) << 8) |
  292. (uint)(((uint)footer[6] & 0xff) << 16) |
  293. (uint)((uint)footer[7] << 24);
  294. if (bytesRead != total) {
  295. throw new GZipException("Number of bytes mismatch in footer");
  296. }
  297. // Mark header read as false so if another header exists, we'll continue reading through the file
  298. readGZIPHeader = false;
  299. // Indicate that we succeeded on at least one block so we can exit gracefully if there is trailing garbage downstream
  300. completedLastBlock = true;
  301. }
  302. #endregion
  303. }
  304. }