LzwInputStream.cs 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599
  1. // LzwInputStream.cs
  2. //
  3. // Copyright (C) 2009 Gabriel Burca
  4. //
  5. // This program is free software; you can redistribute it and/or
  6. // modify it under the terms of the GNU General Public License
  7. // as published by the Free Software Foundation; either version 2
  8. // of the License, or (at your option) any later version.
  9. //
  10. // This program is distributed in the hope that it will be useful,
  11. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. // GNU General Public License for more details.
  14. //
  15. // You should have received a copy of the GNU General Public License
  16. // along with this program; if not, write to the Free Software
  17. // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18. //
  19. // Linking this library statically or dynamically with other modules is
  20. // making a combined work based on this library. Thus, the terms and
  21. // conditions of the GNU General Public License cover the whole
  22. // combination.
  23. //
  24. // As a special exception, the copyright holders of this library give you
  25. // permission to link this library with independent modules to produce an
  26. // executable, regardless of the license terms of these independent
  27. // modules, and to copy and distribute the resulting executable under
  28. // terms of your choice, provided that you also meet, for each linked
  29. // independent module, the terms and conditions of the license of that
  30. // module. An independent module is a module which is not derived from
  31. // or based on this library. If you modify this library, you may extend
  32. // this exception to your version of the library, but you are not
  33. // obligated to do so. If you do not wish to do so, delete this
  34. // exception statement from your version.
  35. using System;
  36. using System.IO;
  37. namespace CommonMPQ.SharpZipLib.LZW
  38. {
  39. /// <summary>
  40. /// This filter stream is used to decompress a LZW format stream.
  41. /// Specifically, a stream that uses the LZC compression method.
  42. /// This file format is usually associated with the .Z file extension.
  43. ///
  44. /// See http://en.wikipedia.org/wiki/Compress
  45. /// See http://wiki.wxwidgets.org/Development:_Z_File_Format
  46. ///
  47. /// The file header consists of 3 (or optionally 4) bytes. The first two bytes
  48. /// contain the magic marker "0x1f 0x9d", followed by a byte of flags.
  49. ///
  50. /// Based on Java code by Ronald Tschalar, which in turn was based on the unlzw.c
  51. /// code in the gzip package.
  52. /// </summary>
  53. /// <example> This sample shows how to unzip a compressed file
  54. /// <code>
  55. /// using System;
  56. /// using System.IO;
  57. ///
  58. /// using CommonMPQ.SharpZipLib.Core;
  59. /// using CommonMPQ.SharpZipLib.LZW;
  60. ///
  61. /// class MainClass
  62. /// {
  63. /// public static void Main(string[] args)
  64. /// {
  65. /// using (Stream inStream = new LzwInputStream(File.OpenRead(args[0])))
  66. /// using (FileStream outStream = File.Create(Path.GetFileNameWithoutExtension(args[0]))) {
  67. /// byte[] buffer = new byte[4096];
  68. /// StreamUtils.Copy(inStream, outStream, buffer);
  69. /// // OR
  70. /// inStream.Read(buffer, 0, buffer.Length);
  71. /// // now do something with the buffer
  72. /// }
  73. /// }
  74. /// }
  75. /// </code>
  76. /// </example>
  77. public class LzwInputStream : Stream
  78. {
  79. /// <summary>
  80. /// Get/set flag indicating ownership of underlying stream.
  81. /// When the flag is true <see cref="Close"/> will close the underlying stream also.
  82. /// </summary>
  83. /// <remarks>
  84. /// The default value is true.
  85. /// </remarks>
  86. public bool IsStreamOwner
  87. {
  88. get { return isStreamOwner; }
  89. set { isStreamOwner = value; }
  90. }
  91. /// <summary>
  92. /// Creates a LzwInputStream
  93. /// </summary>
  94. /// <param name="baseInputStream">
  95. /// The stream to read compressed data from (baseInputStream LZW format)
  96. /// </param>
  97. public LzwInputStream(Stream baseInputStream) {
  98. this.baseInputStream = baseInputStream;
  99. }
  100. /// <summary>
  101. /// See <see cref="System.IO.Stream.ReadByte"/>
  102. /// </summary>
  103. /// <returns></returns>
  104. public override int ReadByte() {
  105. int b = Read(one, 0, 1);
  106. if (b == 1)
  107. return (one[0] & 0xff);
  108. return -1;
  109. }
  110. /// <summary>
  111. /// Reads decompressed data into the provided buffer byte array
  112. /// </summary>
  113. /// <param name ="buffer">
  114. /// The array to read and decompress data into
  115. /// </param>
  116. /// <param name ="offset">
  117. /// The offset indicating where the data should be placed
  118. /// </param>
  119. /// <param name ="count">
  120. /// The number of bytes to decompress
  121. /// </param>
  122. /// <returns>The number of bytes read. Zero signals the end of stream</returns>
  123. public override int Read(byte[] buffer, int offset, int count) {
  124. if (!headerParsed) ParseHeader();
  125. if (eof) return 0;
  126. int start = offset;
  127. /* Using local copies of various variables speeds things up by as
  128. * much as 30% in Java! Performance not tested in C#.
  129. */
  130. int[] lTabPrefix = tabPrefix;
  131. byte[] lTabSuffix = tabSuffix;
  132. byte[] lStack = stack;
  133. int lNBits = nBits;
  134. int lMaxCode = maxCode;
  135. int lMaxMaxCode = maxMaxCode;
  136. int lBitMask = bitMask;
  137. int lOldCode = oldCode;
  138. byte lFinChar = finChar;
  139. int lStackP = stackP;
  140. int lFreeEnt = freeEnt;
  141. byte[] lData = data;
  142. int lBitPos = bitPos;
  143. // empty stack if stuff still left
  144. int sSize = lStack.Length - lStackP;
  145. if (sSize > 0) {
  146. int num = (sSize >= count) ? count : sSize;
  147. Array.Copy(lStack, lStackP, buffer, offset, num);
  148. offset += num;
  149. count -= num;
  150. lStackP += num;
  151. }
  152. if (count == 0) {
  153. stackP = lStackP;
  154. return offset - start;
  155. }
  156. // loop, filling local buffer until enough data has been decompressed
  157. MainLoop: do {
  158. if (end < EXTRA) {
  159. Fill();
  160. }
  161. int bitIn = (got > 0) ? (end - end % lNBits) << 3 :
  162. (end << 3) - (lNBits - 1);
  163. while (lBitPos < bitIn) {
  164. #region A
  165. // handle 1-byte reads correctly
  166. if (count == 0) {
  167. nBits = lNBits;
  168. maxCode = lMaxCode;
  169. maxMaxCode = lMaxMaxCode;
  170. bitMask = lBitMask;
  171. oldCode = lOldCode;
  172. finChar = lFinChar;
  173. stackP = lStackP;
  174. freeEnt = lFreeEnt;
  175. bitPos = lBitPos;
  176. return offset - start;
  177. }
  178. // check for code-width expansion
  179. if (lFreeEnt > lMaxCode) {
  180. int nBytes = lNBits << 3;
  181. lBitPos = (lBitPos - 1) +
  182. nBytes - (lBitPos - 1 + nBytes) % nBytes;
  183. lNBits++;
  184. lMaxCode = (lNBits == maxBits) ? lMaxMaxCode :
  185. (1 << lNBits) - 1;
  186. lBitMask = (1 << lNBits) - 1;
  187. lBitPos = ResetBuf(lBitPos);
  188. goto MainLoop;
  189. }
  190. #endregion
  191. #region B
  192. // read next code
  193. int pos = lBitPos >> 3;
  194. int code = (((lData[pos] & 0xFF) |
  195. ((lData[pos + 1] & 0xFF) << 8) |
  196. ((lData[pos + 2] & 0xFF) << 16)) >>
  197. (lBitPos & 0x7)) & lBitMask;
  198. lBitPos += lNBits;
  199. // handle first iteration
  200. if (lOldCode == -1) {
  201. if (code >= 256) throw new LzwException("corrupt input: " + code + " > 255");
  202. lFinChar = (byte) (lOldCode = code);
  203. buffer[offset++] = lFinChar;
  204. count--;
  205. continue;
  206. }
  207. // handle CLEAR code
  208. if (code == TBL_CLEAR && blockMode) {
  209. Array.Copy(zeros, 0, lTabPrefix, 0, zeros.Length);
  210. lFreeEnt = TBL_FIRST - 1;
  211. int nBytes = lNBits << 3;
  212. lBitPos = (lBitPos - 1) + nBytes - (lBitPos - 1 + nBytes) % nBytes;
  213. lNBits = LzwConstants.INIT_BITS;
  214. lMaxCode = (1 << lNBits) - 1;
  215. lBitMask = lMaxCode;
  216. // Code tables reset
  217. lBitPos = ResetBuf(lBitPos);
  218. goto MainLoop;
  219. }
  220. #endregion
  221. #region C
  222. // setup
  223. int inCode = code;
  224. lStackP = lStack.Length;
  225. // Handle KwK case
  226. if (code >= lFreeEnt) {
  227. if (code > lFreeEnt) {
  228. throw new LzwException("corrupt input: code=" + code +
  229. ", freeEnt=" + lFreeEnt);
  230. }
  231. lStack[--lStackP] = lFinChar;
  232. code = lOldCode;
  233. }
  234. // Generate output characters in reverse order
  235. while (code >= 256) {
  236. lStack[--lStackP] = lTabSuffix[code];
  237. code = lTabPrefix[code];
  238. }
  239. lFinChar = lTabSuffix[code];
  240. buffer[offset++] = lFinChar;
  241. count--;
  242. // And put them out in forward order
  243. sSize = lStack.Length - lStackP;
  244. int num = (sSize >= count) ? count : sSize;
  245. Array.Copy(lStack, lStackP, buffer, offset, num);
  246. offset += num;
  247. count -= num;
  248. lStackP += num;
  249. #endregion
  250. #region D
  251. // generate new entry in table
  252. if (lFreeEnt < lMaxMaxCode) {
  253. lTabPrefix[lFreeEnt] = lOldCode;
  254. lTabSuffix[lFreeEnt] = lFinChar;
  255. lFreeEnt++;
  256. }
  257. // Remember previous code
  258. lOldCode = inCode;
  259. // if output buffer full, then return
  260. if (count == 0) {
  261. nBits = lNBits;
  262. maxCode = lMaxCode;
  263. bitMask = lBitMask;
  264. oldCode = lOldCode;
  265. finChar = lFinChar;
  266. stackP = lStackP;
  267. freeEnt = lFreeEnt;
  268. bitPos = lBitPos;
  269. return offset - start;
  270. }
  271. #endregion
  272. } // while
  273. lBitPos = ResetBuf(lBitPos);
  274. } while (got > 0); // do..while
  275. nBits = lNBits;
  276. maxCode = lMaxCode;
  277. bitMask = lBitMask;
  278. oldCode = lOldCode;
  279. finChar = lFinChar;
  280. stackP = lStackP;
  281. freeEnt = lFreeEnt;
  282. bitPos = lBitPos;
  283. eof = true;
  284. return offset - start;
  285. }
  286. /// <summary>
  287. /// Moves the unread data in the buffer to the beginning and resets
  288. /// the pointers.
  289. /// </summary>
  290. /// <param name="bitPosition"></param>
  291. /// <returns></returns>
  292. private int ResetBuf(int bitPosition) {
  293. int pos = bitPosition >> 3;
  294. Array.Copy(data, pos, data, 0, end - pos);
  295. end -= pos;
  296. return 0;
  297. }
  298. private void Fill() {
  299. got = baseInputStream.Read(data, end, data.Length - 1 - end);
  300. if (got > 0) {
  301. end += got;
  302. }
  303. }
  304. private void ParseHeader() {
  305. headerParsed = true;
  306. byte[] hdr = new byte[LzwConstants.HDR_SIZE];
  307. int result = baseInputStream.Read(hdr, 0, hdr.Length);
  308. // Check the magic marker
  309. if (result < 0)
  310. throw new LzwException("Failed to read LZW header");
  311. if (hdr[0] != (LzwConstants.MAGIC >> 8) || hdr[1] != (LzwConstants.MAGIC & 0xff)) {
  312. throw new LzwException(String.Format(
  313. "Wrong LZW header. Magic bytes don't match. 0x{0:x2} 0x{1:x2}",
  314. hdr[0], hdr[1]));
  315. }
  316. // Check the 3rd header byte
  317. blockMode = (hdr[2] & LzwConstants.BLOCK_MODE_MASK) > 0;
  318. maxBits = hdr[2] & LzwConstants.BIT_MASK;
  319. if (maxBits > LzwConstants.MAX_BITS) {
  320. throw new LzwException("Stream compressed with " + maxBits +
  321. " bits, but decompression can only handle " +
  322. LzwConstants.MAX_BITS + " bits.");
  323. }
  324. if ((hdr[2] & LzwConstants.RESERVED_MASK) > 0) {
  325. throw new LzwException("Unsupported bits set in the header.");
  326. }
  327. // Initialize variables
  328. maxMaxCode = 1 << maxBits;
  329. nBits = LzwConstants.INIT_BITS;
  330. maxCode = (1 << nBits) - 1;
  331. bitMask = maxCode;
  332. oldCode = -1;
  333. finChar = 0;
  334. freeEnt = blockMode ? TBL_FIRST : 256;
  335. tabPrefix = new int[1 << maxBits];
  336. tabSuffix = new byte[1 << maxBits];
  337. stack = new byte[1 << maxBits];
  338. stackP = stack.Length;
  339. for (int idx = 255; idx >= 0; idx--)
  340. tabSuffix[idx] = (byte)idx;
  341. }
  342. #region Stream Overrides
  343. /// <summary>
  344. /// Gets a value indicating whether the current stream supports reading
  345. /// </summary>
  346. public override bool CanRead
  347. {
  348. get
  349. {
  350. return baseInputStream.CanRead;
  351. }
  352. }
  353. /// <summary>
  354. /// Gets a value of false indicating seeking is not supported for this stream.
  355. /// </summary>
  356. public override bool CanSeek
  357. {
  358. get
  359. {
  360. return false;
  361. }
  362. }
  363. /// <summary>
  364. /// Gets a value of false indicating that this stream is not writeable.
  365. /// </summary>
  366. public override bool CanWrite
  367. {
  368. get
  369. {
  370. return false;
  371. }
  372. }
  373. /// <summary>
  374. /// A value representing the length of the stream in bytes.
  375. /// </summary>
  376. public override long Length
  377. {
  378. get
  379. {
  380. return got;
  381. }
  382. }
  383. /// <summary>
  384. /// The current position within the stream.
  385. /// Throws a NotSupportedException when attempting to set the position
  386. /// </summary>
  387. /// <exception cref="NotSupportedException">Attempting to set the position</exception>
  388. public override long Position
  389. {
  390. get
  391. {
  392. return baseInputStream.Position;
  393. }
  394. set
  395. {
  396. throw new NotSupportedException("InflaterInputStream Position not supported");
  397. }
  398. }
  399. /// <summary>
  400. /// Flushes the baseInputStream
  401. /// </summary>
  402. public override void Flush()
  403. {
  404. baseInputStream.Flush();
  405. }
  406. /// <summary>
  407. /// Sets the position within the current stream
  408. /// Always throws a NotSupportedException
  409. /// </summary>
  410. /// <param name="offset">The relative offset to seek to.</param>
  411. /// <param name="origin">The <see cref="SeekOrigin"/> defining where to seek from.</param>
  412. /// <returns>The new position in the stream.</returns>
  413. /// <exception cref="NotSupportedException">Any access</exception>
  414. public override long Seek(long offset, SeekOrigin origin)
  415. {
  416. throw new NotSupportedException("Seek not supported");
  417. }
  418. /// <summary>
  419. /// Set the length of the current stream
  420. /// Always throws a NotSupportedException
  421. /// </summary>
  422. /// <param name="value">The new length value for the stream.</param>
  423. /// <exception cref="NotSupportedException">Any access</exception>
  424. public override void SetLength(long value)
  425. {
  426. throw new NotSupportedException("InflaterInputStream SetLength not supported");
  427. }
  428. /// <summary>
  429. /// Writes a sequence of bytes to stream and advances the current position
  430. /// This method always throws a NotSupportedException
  431. /// </summary>
  432. /// <param name="buffer">Thew buffer containing data to write.</param>
  433. /// <param name="offset">The offset of the first byte to write.</param>
  434. /// <param name="count">The number of bytes to write.</param>
  435. /// <exception cref="NotSupportedException">Any access</exception>
  436. public override void Write(byte[] buffer, int offset, int count)
  437. {
  438. throw new NotSupportedException("InflaterInputStream Write not supported");
  439. }
  440. /// <summary>
  441. /// Writes one byte to the current stream and advances the current position
  442. /// Always throws a NotSupportedException
  443. /// </summary>
  444. /// <param name="value">The byte to write.</param>
  445. /// <exception cref="NotSupportedException">Any access</exception>
  446. public override void WriteByte(byte value)
  447. {
  448. throw new NotSupportedException("InflaterInputStream WriteByte not supported");
  449. }
  450. /// <summary>
  451. /// Entry point to begin an asynchronous write. Always throws a NotSupportedException.
  452. /// </summary>
  453. /// <param name="buffer">The buffer to write data from</param>
  454. /// <param name="offset">Offset of first byte to write</param>
  455. /// <param name="count">The maximum number of bytes to write</param>
  456. /// <param name="callback">The method to be called when the asynchronous write operation is completed</param>
  457. /// <param name="state">A user-provided object that distinguishes this particular asynchronous write request from other requests</param>
  458. /// <returns>An <see cref="System.IAsyncResult">IAsyncResult</see> that references the asynchronous write</returns>
  459. /// <exception cref="NotSupportedException">Any access</exception>
  460. public override IAsyncResult BeginWrite(byte[] buffer, int offset, int count, AsyncCallback callback, object state)
  461. {
  462. throw new NotSupportedException("InflaterInputStream BeginWrite not supported");
  463. }
  464. /// <summary>
  465. /// Closes the input stream. When <see cref="IsStreamOwner"></see>
  466. /// is true the underlying stream is also closed.
  467. /// </summary>
  468. public override void Close()
  469. {
  470. if (!isClosed)
  471. {
  472. isClosed = true;
  473. if (isStreamOwner)
  474. {
  475. baseInputStream.Close();
  476. }
  477. }
  478. }
  479. #endregion
  480. #region Instance Fields
  481. Stream baseInputStream;
  482. /// <summary>
  483. /// Flag indicating wether this instance is designated the stream owner.
  484. /// When closing if this flag is true the underlying stream is closed.
  485. /// </summary>
  486. bool isStreamOwner = true;
  487. /// <summary>
  488. /// Flag indicating wether this instance has been closed or not.
  489. /// </summary>
  490. bool isClosed;
  491. readonly byte[] one = new byte[1];
  492. bool headerParsed;
  493. // string table stuff
  494. private const int TBL_CLEAR = 0x100;
  495. private const int TBL_FIRST = TBL_CLEAR + 1;
  496. private int[] tabPrefix;
  497. private byte[] tabSuffix;
  498. private readonly int[] zeros = new int[256];
  499. private byte[] stack;
  500. // various state
  501. private bool blockMode;
  502. private int nBits;
  503. private int maxBits;
  504. private int maxMaxCode;
  505. private int maxCode;
  506. private int bitMask;
  507. private int oldCode;
  508. private byte finChar;
  509. private int stackP;
  510. private int freeEnt;
  511. // input buffer
  512. private readonly byte[] data = new byte[1024 * 8];
  513. private int bitPos;
  514. private int end;
  515. int got;
  516. private bool eof;
  517. private const int EXTRA = 64;
  518. #endregion
  519. }
  520. }