GzipInputStream.cs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. // GzipInputStream.cs
  2. //
  3. // Copyright (C) 2001 Mike Krueger
  4. //
  5. // This file was translated from java, it was part of the GNU Classpath
  6. // Copyright (C) 2001 Free Software Foundation, Inc.
  7. //
  8. // This program is free software; you can redistribute it and/or
  9. // modify it under the terms of the GNU General Public License
  10. // as published by the Free Software Foundation; either version 2
  11. // of the License, or (at your option) any later version.
  12. //
  13. // This program is distributed in the hope that it will be useful,
  14. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. // GNU General Public License for more details.
  17. //
  18. // You should have received a copy of the GNU General Public License
  19. // along with this program; if not, write to the Free Software
  20. // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  21. //
  22. // Linking this library statically or dynamically with other modules is
  23. // making a combined work based on this library. Thus, the terms and
  24. // conditions of the GNU General Public License cover the whole
  25. // combination.
  26. //
  27. // As a special exception, the copyright holders of this library give you
  28. // permission to link this library with independent modules to produce an
  29. // executable, regardless of the license terms of these independent
  30. // modules, and to copy and distribute the resulting executable under
  31. // terms of your choice, provided that you also meet, for each linked
  32. // independent module, the terms and conditions of the license of that
  33. // module. An independent module is a module which is not derived from
  34. // or based on this library. If you modify this library, you may extend
  35. // this exception to your version of the library, but you are not
  36. // obligated to do so. If you do not wish to do so, delete this
  37. // exception statement from your version.
  38. // HISTORY
  39. // 2009-08-11 T9121 Geoff Hart Added Multi-member gzip support
  40. // 2012-06-03 Z-1802 Incorrect endianness and subfield in FEXTRA handling.
  41. using System;
  42. using System.IO;
  43. using CommonMPQ.SharpZipLib.Checksums;
  44. using CommonMPQ.SharpZipLib.Zip.Compression;
  45. using CommonMPQ.SharpZipLib.Zip.Compression.Streams;
  46. namespace CommonMPQ.SharpZipLib.GZip
  47. {
  48. /// <summary>
  49. /// This filter stream is used to decompress a "GZIP" format stream.
  50. /// The "GZIP" format is described baseInputStream RFC 1952.
  51. ///
  52. /// author of the original java version : John Leuner
  53. /// </summary>
  54. /// <example> This sample shows how to unzip a gzipped file
  55. /// <code>
  56. /// using System;
  57. /// using System.IO;
  58. ///
  59. /// using CommonMPQ.SharpZipLib.Core;
  60. /// using CommonMPQ.SharpZipLib.GZip;
  61. ///
  62. /// class MainClass
  63. /// {
  64. /// public static void Main(string[] args)
  65. /// {
  66. /// using (Stream inStream = new GZipInputStream(File.OpenRead(args[0])))
  67. /// using (FileStream outStream = File.Create(Path.GetFileNameWithoutExtension(args[0]))) {
  68. /// byte[] buffer = new byte[4096];
  69. /// StreamUtils.Copy(inStream, outStream, buffer);
  70. /// }
  71. /// }
  72. /// }
  73. /// </code>
  74. /// </example>
  75. public class GZipInputStream : InflaterInputStream
  76. {
  77. #region Instance Fields
  78. /// <summary>
  79. /// CRC-32 value for uncompressed data
  80. /// </summary>
  81. protected Crc32 crc;
  82. /// <summary>
  83. /// Flag to indicate if we've read the GZIP header yet for the current member (block of compressed data).
  84. /// This is tracked per-block as the file is parsed.
  85. /// </summary>
  86. bool readGZIPHeader;
  87. #endregion
  88. #region Constructors
  89. /// <summary>
  90. /// Creates a GZipInputStream with the default buffer size
  91. /// </summary>
  92. /// <param name="baseInputStream">
  93. /// The stream to read compressed data from (baseInputStream GZIP format)
  94. /// </param>
  95. public GZipInputStream(Stream baseInputStream)
  96. : this(baseInputStream, 4096)
  97. {
  98. }
  99. /// <summary>
  100. /// Creates a GZIPInputStream with the specified buffer size
  101. /// </summary>
  102. /// <param name="baseInputStream">
  103. /// The stream to read compressed data from (baseInputStream GZIP format)
  104. /// </param>
  105. /// <param name="size">
  106. /// Size of the buffer to use
  107. /// </param>
  108. public GZipInputStream(Stream baseInputStream, int size)
  109. : base(baseInputStream, new Inflater(true), size)
  110. {
  111. }
  112. #endregion
  113. #region Stream overrides
  114. /// <summary>
  115. /// Reads uncompressed data into an array of bytes
  116. /// </summary>
  117. /// <param name="buffer">
  118. /// The buffer to read uncompressed data into
  119. /// </param>
  120. /// <param name="offset">
  121. /// The offset indicating where the data should be placed
  122. /// </param>
  123. /// <param name="count">
  124. /// The number of uncompressed bytes to be read
  125. /// </param>
  126. /// <returns>Returns the number of bytes actually read.</returns>
  127. public override int Read(byte[] buffer, int offset, int count)
  128. {
  129. // A GZIP file can contain multiple blocks of compressed data, although this is quite rare.
  130. // A compressed block could potentially be empty, so we need to loop until we reach EOF or
  131. // we find data.
  132. while (true) {
  133. // If we haven't read the header for this block, read it
  134. if (! readGZIPHeader) {
  135. // Try to read header. If there is no header (0 bytes available), this is EOF. If there is
  136. // an incomplete header, this will throw an exception.
  137. if (! ReadHeader()) {
  138. return 0;
  139. }
  140. }
  141. // Try to read compressed data
  142. int bytesRead = base.Read(buffer, offset, count);
  143. if (bytesRead > 0) {
  144. crc.Update(buffer, offset, bytesRead);
  145. }
  146. // If this is the end of stream, read the footer
  147. if (inf.IsFinished) {
  148. ReadFooter();
  149. }
  150. if (bytesRead > 0) {
  151. return bytesRead;
  152. }
  153. }
  154. }
  155. #endregion
  156. #region Support routines
  157. bool ReadHeader()
  158. {
  159. // Initialize CRC for this block
  160. crc = new Crc32();
  161. // Make sure there is data in file. We can't rely on ReadLeByte() to fill the buffer, as this could be EOF,
  162. // which is fine, but ReadLeByte() throws an exception if it doesn't find data, so we do this part ourselves.
  163. if (inputBuffer.Available <= 0) {
  164. inputBuffer.Fill();
  165. if (inputBuffer.Available <= 0) {
  166. // No header, EOF.
  167. return false;
  168. }
  169. }
  170. // 1. Check the two magic bytes
  171. Crc32 headCRC = new Crc32();
  172. int magic = inputBuffer.ReadLeByte();
  173. if (magic < 0) {
  174. throw new EndOfStreamException("EOS reading GZIP header");
  175. }
  176. headCRC.Update(magic);
  177. if (magic != (GZipConstants.GZIP_MAGIC >> 8)) {
  178. throw new GZipException("Error GZIP header, first magic byte doesn't match");
  179. }
  180. //magic = baseInputStream.ReadByte();
  181. magic = inputBuffer.ReadLeByte();
  182. if (magic < 0) {
  183. throw new EndOfStreamException("EOS reading GZIP header");
  184. }
  185. if (magic != (GZipConstants.GZIP_MAGIC & 0xFF)) {
  186. throw new GZipException("Error GZIP header, second magic byte doesn't match");
  187. }
  188. headCRC.Update(magic);
  189. // 2. Check the compression type (must be 8)
  190. int compressionType = inputBuffer.ReadLeByte();
  191. if ( compressionType < 0 ) {
  192. throw new EndOfStreamException("EOS reading GZIP header");
  193. }
  194. if ( compressionType != 8 ) {
  195. throw new GZipException("Error GZIP header, data not in deflate format");
  196. }
  197. headCRC.Update(compressionType);
  198. // 3. Check the flags
  199. int flags = inputBuffer.ReadLeByte();
  200. if (flags < 0) {
  201. throw new EndOfStreamException("EOS reading GZIP header");
  202. }
  203. headCRC.Update(flags);
  204. /* This flag byte is divided into individual bits as follows:
  205. bit 0 FTEXT
  206. bit 1 FHCRC
  207. bit 2 FEXTRA
  208. bit 3 FNAME
  209. bit 4 FCOMMENT
  210. bit 5 reserved
  211. bit 6 reserved
  212. bit 7 reserved
  213. */
  214. // 3.1 Check the reserved bits are zero
  215. if ((flags & 0xE0) != 0) {
  216. throw new GZipException("Reserved flag bits in GZIP header != 0");
  217. }
  218. // 4.-6. Skip the modification time, extra flags, and OS type
  219. for (int i=0; i< 6; i++) {
  220. int readByte = inputBuffer.ReadLeByte();
  221. if (readByte < 0) {
  222. throw new EndOfStreamException("EOS reading GZIP header");
  223. }
  224. headCRC.Update(readByte);
  225. }
  226. // 7. Read extra field
  227. if ((flags & GZipConstants.FEXTRA) != 0) {
  228. // XLEN is total length of extra subfields, we will skip them all
  229. int len1, len2;
  230. len1 = inputBuffer.ReadLeByte();
  231. len2 = inputBuffer.ReadLeByte();
  232. if ((len1 < 0) || (len2 < 0)) {
  233. throw new EndOfStreamException("EOS reading GZIP header");
  234. }
  235. headCRC.Update(len1);
  236. headCRC.Update(len2);
  237. int extraLen = (len2 << 8) | len1; // gzip is LSB first
  238. for (int i = 0; i < extraLen;i++) {
  239. int readByte = inputBuffer.ReadLeByte();
  240. if (readByte < 0)
  241. {
  242. throw new EndOfStreamException("EOS reading GZIP header");
  243. }
  244. headCRC.Update(readByte);
  245. }
  246. }
  247. // 8. Read file name
  248. if ((flags & GZipConstants.FNAME) != 0) {
  249. int readByte;
  250. while ( (readByte = inputBuffer.ReadLeByte()) > 0) {
  251. headCRC.Update(readByte);
  252. }
  253. if (readByte < 0) {
  254. throw new EndOfStreamException("EOS reading GZIP header");
  255. }
  256. headCRC.Update(readByte);
  257. }
  258. // 9. Read comment
  259. if ((flags & GZipConstants.FCOMMENT) != 0) {
  260. int readByte;
  261. while ( (readByte = inputBuffer.ReadLeByte()) > 0) {
  262. headCRC.Update(readByte);
  263. }
  264. if (readByte < 0) {
  265. throw new EndOfStreamException("EOS reading GZIP header");
  266. }
  267. headCRC.Update(readByte);
  268. }
  269. // 10. Read header CRC
  270. if ((flags & GZipConstants.FHCRC) != 0) {
  271. int tempByte;
  272. int crcval = inputBuffer.ReadLeByte();
  273. if (crcval < 0) {
  274. throw new EndOfStreamException("EOS reading GZIP header");
  275. }
  276. tempByte = inputBuffer.ReadLeByte();
  277. if (tempByte < 0) {
  278. throw new EndOfStreamException("EOS reading GZIP header");
  279. }
  280. crcval = (crcval << 8) | tempByte;
  281. if (crcval != ((int) headCRC.Value & 0xffff)) {
  282. throw new GZipException("Header CRC value mismatch");
  283. }
  284. }
  285. readGZIPHeader = true;
  286. return true;
  287. }
  288. void ReadFooter()
  289. {
  290. byte[] footer = new byte[8];
  291. // End of stream; reclaim all bytes from inf, read the final byte count, and reset the inflator
  292. long bytesRead = inf.TotalOut & 0xffffffff;
  293. inputBuffer.Available += inf.RemainingInput;
  294. inf.Reset();
  295. // Read footer from inputBuffer
  296. int needed = 8;
  297. while (needed > 0) {
  298. int count = inputBuffer.ReadClearTextBuffer(footer, 8 - needed, needed);
  299. if (count <= 0) {
  300. throw new EndOfStreamException("EOS reading GZIP footer");
  301. }
  302. needed -= count; // Jewel Jan 16
  303. }
  304. // Calculate CRC
  305. int crcval = (footer[0] & 0xff) | ((footer[1] & 0xff) << 8) | ((footer[2] & 0xff) << 16) | (footer[3] << 24);
  306. if (crcval != (int) crc.Value) {
  307. throw new GZipException("GZIP crc sum mismatch, theirs \"" + crcval + "\" and ours \"" + (int) crc.Value);
  308. }
  309. // NOTE The total here is the original total modulo 2 ^ 32.
  310. uint total =
  311. (uint)((uint)footer[4] & 0xff) |
  312. (uint)(((uint)footer[5] & 0xff) << 8) |
  313. (uint)(((uint)footer[6] & 0xff) << 16) |
  314. (uint)((uint)footer[7] << 24);
  315. if (bytesRead != total) {
  316. throw new GZipException("Number of bytes mismatch in footer");
  317. }
  318. // Mark header read as false so if another header exists, we'll continue reading through the file
  319. readGZIPHeader = false;
  320. }
  321. #endregion
  322. }
  323. }