View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.vfs.provider.tar;
18  
19  import java.io.FilterInputStream;
20  import java.io.IOException;
21  import java.io.InputStream;
22  import java.io.OutputStream;
23  
24  /***
25   * The TarInputStream reads a UNIX tar archive as an InputStream. methods are
26   * provided to position at each successive entry in the archive, and the read
27   * each entry as a normal input stream using read().
28   *
29   * @author <a href="mailto:time@ice.com">Timothy Gerard Endres</a>
30   * @author <a href="mailto:stefano@apache.org">Stefano Mazzocchi</a>
31   * @author <a href="mailto:peter@apache.org">Peter Donald</a>
32   * @version $Revision: 480428 $ $Date: 2006-11-29 07:15:24 +0100 (Mi, 29 Nov 2006) $
33   * @see TarInputStream
34   * @see TarEntry
35   */
36  class TarInputStream
37      extends FilterInputStream
38  {
39      private TarBuffer m_buffer;
40      private TarEntry m_currEntry;
41      private boolean m_debug;
42      private int m_entryOffset;
43      private int m_entrySize;
44      private boolean m_hasHitEOF;
45      private byte[] m_oneBuf;
46      private byte[] m_readBuf;
47  
48      /***
49       * Construct a TarInputStream using specified input
50       * stream and default block and record sizes.
51       *
52       * @param input stream to create TarInputStream from
53       * @see TarBuffer#DEFAULT_BLOCKSIZE
54       * @see TarBuffer#DEFAULT_RECORDSIZE
55       */
56      TarInputStream( final InputStream input )
57      {
58          this( input, TarBuffer.DEFAULT_BLOCKSIZE, TarBuffer.DEFAULT_RECORDSIZE );
59      }
60  
61      /***
62       * Construct a TarInputStream using specified input
63       * stream, block size and default record sizes.
64       *
65       * @param input stream to create TarInputStream from
66       * @param blockSize the block size to use
67       * @see TarBuffer#DEFAULT_RECORDSIZE
68       */
69      TarInputStream( final InputStream input,
70                             final int blockSize )
71      {
72          this( input, blockSize, TarBuffer.DEFAULT_RECORDSIZE );
73      }
74  
75      /***
76       * Construct a TarInputStream using specified input
77       * stream, block size and record sizes.
78       *
79       * @param input stream to create TarInputStream from
80       * @param blockSize the block size to use
81       * @param recordSize the record size to use
82       */
83      TarInputStream( final InputStream input,
84                             final int blockSize,
85                             final int recordSize )
86      {
87          super( input );
88  
89          m_buffer = new TarBuffer( input, blockSize, recordSize );
90          m_oneBuf = new byte[ 1 ];
91      }
92  
93      /***
94       * Sets the debugging flag.
95       *
96       * @param debug The new Debug value
97       */
98      public void setDebug( final boolean debug )
99      {
100         m_debug = debug;
101         m_buffer.setDebug( debug );
102     }
103 
104     /***
105      * Get the next entry in this tar archive. This will skip over any remaining
106      * data in the current entry, if there is one, and place the input stream at
107      * the header of the next entry, and read the header and instantiate a new
108      * TarEntry from the header bytes and return that entry. If there are no
109      * more entries in the archive, null will be returned to indicate that the
110      * end of the archive has been reached.
111      *
112      * @return The next TarEntry in the archive, or null.
113      * @exception IOException Description of Exception
114      */
115     public TarEntry getNextEntry()
116         throws IOException
117     {
118         if( m_hasHitEOF )
119         {
120             return null;
121         }
122 
123         if( m_currEntry != null )
124         {
125             final int numToSkip = m_entrySize - m_entryOffset;
126 
127             if( m_debug )
128             {
129                 final String message = "TarInputStream: SKIP currENTRY '" +
130                     m_currEntry.getName() + "' SZ " + m_entrySize +
131                     " OFF " + m_entryOffset + "  skipping " + numToSkip + " bytes";
132                 debug( message );
133             }
134 
135             if( numToSkip > 0 )
136             {
137                 skip( numToSkip );
138             }
139 
140             m_readBuf = null;
141         }
142 
143         final byte[] headerBuf = m_buffer.readRecord();
144         if( headerBuf == null )
145         {
146             if( m_debug )
147             {
148                 debug( "READ NULL RECORD" );
149             }
150             m_hasHitEOF = true;
151         }
152         else if( m_buffer.isEOFRecord( headerBuf ) )
153         {
154             if( m_debug )
155             {
156                 debug( "READ EOF RECORD" );
157             }
158             m_hasHitEOF = true;
159         }
160 
161         if( m_hasHitEOF )
162         {
163             m_currEntry = null;
164         }
165         else
166         {
167             m_currEntry = new TarEntry( headerBuf );
168 
169             if( !( headerBuf[ 257 ] == 'u' && headerBuf[ 258 ] == 's' &&
170                 headerBuf[ 259 ] == 't' && headerBuf[ 260 ] == 'a' &&
171                 headerBuf[ 261 ] == 'r' ) )
172             {
173                 //Must be v7Format
174             }
175 
176             if( m_debug )
177             {
178                 final String message = "TarInputStream: SET CURRENTRY '" +
179                     m_currEntry.getName() + "' size = " + m_currEntry.getSize();
180                 debug( message );
181             }
182 
183             m_entryOffset = 0;
184 
185             // REVIEW How do we resolve this discrepancy?!
186             m_entrySize = (int)m_currEntry.getSize();
187         }
188 
189         if( null != m_currEntry && m_currEntry.isGNULongNameEntry() )
190         {
191             // read in the name
192             final StringBuffer longName = new StringBuffer();
193             final byte[] buffer = new byte[ 256 ];
194             int length = 0;
195             while( ( length = read( buffer ) ) >= 0 )
196             {
197                 final String str = new String( buffer, 0, length );
198                 longName.append( str );
199             }
200             getNextEntry();
201 
202             // remove trailing null terminator
203             if (longName.length() > 0
204                 && longName.charAt(longName.length() - 1) == 0) {
205                 longName.deleteCharAt(longName.length() - 1);
206             }
207             
208             m_currEntry.setName( longName.toString() );
209         }
210 
211         return m_currEntry;
212     }
213 
214     /***
215      * Get the record size being used by this stream's TarBuffer.
216      *
217      * @return The TarBuffer record size.
218      */
219     public int getRecordSize()
220     {
221         return m_buffer.getRecordSize();
222     }
223 
224     /***
225      * Get the available data that can be read from the current entry in the
226      * archive. This does not indicate how much data is left in the entire
227      * archive, only in the current entry. This value is determined from the
228      * entry's size header field and the amount of data already read from the
229      * current entry.
230      *
231      * @return The number of available bytes for the current entry.
232      * @exception IOException when an IO error causes operation to fail
233      */
234     public int available()
235         throws IOException
236     {
237         return m_entrySize - m_entryOffset;
238     }
239 
240     /***
241      * Closes this stream. Calls the TarBuffer's close() method.
242      *
243      * @exception IOException when an IO error causes operation to fail
244      */
245     public void close()
246         throws IOException
247     {
248         m_buffer.close();
249     }
250 
251     /***
252      * Copies the contents of the current tar archive entry directly into an
253      * output stream.
254      *
255      * @param output The OutputStream into which to write the entry's data.
256      * @exception IOException when an IO error causes operation to fail
257      */
258     public void copyEntryContents( final OutputStream output )
259         throws IOException
260     {
261         final byte[] buffer = new byte[ 32 * 1024 ];
262         while( true )
263         {
264             final int numRead = read( buffer, 0, buffer.length );
265             if( numRead == -1 )
266             {
267                 break;
268             }
269 
270             output.write( buffer, 0, numRead );
271         }
272     }
273 
274     /***
275      * Since we do not support marking just yet, we do nothing.
276      *
277      * @param markLimit The limit to mark.
278      */
279     public void mark( int markLimit )
280     {
281     }
282 
283     /***
284      * Since we do not support marking just yet, we return false.
285      *
286      * @return False.
287      */
288     public boolean markSupported()
289     {
290         return false;
291     }
292 
293     /***
294      * Reads a byte from the current tar archive entry. This method simply calls
295      * read( byte[], int, int ).
296      *
297      * @return The byte read, or -1 at EOF.
298      * @exception IOException when an IO error causes operation to fail
299      */
300     public int read()
301         throws IOException
302     {
303         final int num = read( m_oneBuf, 0, 1 );
304         if( num == -1 )
305         {
306             return num;
307         }
308         else
309         {
310             return (int)m_oneBuf[ 0 ];
311         }
312     }
313 
314     /***
315      * Reads bytes from the current tar archive entry. This method simply calls
316      * read( byte[], int, int ).
317      *
318      * @param buffer The buffer into which to place bytes read.
319      * @return The number of bytes read, or -1 at EOF.
320      * @exception IOException when an IO error causes operation to fail
321      */
322     public int read( final byte[] buffer )
323         throws IOException
324     {
325         return read( buffer, 0, buffer.length );
326     }
327 
328     /***
329      * Reads bytes from the current tar archive entry. This method is aware of
330      * the boundaries of the current entry in the archive and will deal with
331      * them as if they were this stream's start and EOF.
332      *
333      * @param buffer The buffer into which to place bytes read.
334      * @param offset The offset at which to place bytes read.
335      * @param count The number of bytes to read.
336      * @return The number of bytes read, or -1 at EOF.
337      * @exception IOException when an IO error causes operation to fail
338      */
339     public int read( final byte[] buffer,
340                      final int offset,
341                      final int count )
342         throws IOException
343     {
344         int position = offset;
345         int numToRead = count;
346         int totalRead = 0;
347 
348         if( m_entryOffset >= m_entrySize )
349         {
350             return -1;
351         }
352 
353         if( ( numToRead + m_entryOffset ) > m_entrySize )
354         {
355             numToRead = ( m_entrySize - m_entryOffset );
356         }
357 
358         if( null != m_readBuf )
359         {
360             final int size =
361                 ( numToRead > m_readBuf.length ) ? m_readBuf.length : numToRead;
362 
363             System.arraycopy( m_readBuf, 0, buffer, position, size );
364 
365             if( size >= m_readBuf.length )
366             {
367                 m_readBuf = null;
368             }
369             else
370             {
371                 final int newLength = m_readBuf.length - size;
372                 final byte[] newBuffer = new byte[ newLength ];
373 
374                 System.arraycopy( m_readBuf, size, newBuffer, 0, newLength );
375 
376                 m_readBuf = newBuffer;
377             }
378 
379             totalRead += size;
380             numToRead -= size;
381             position += size;
382         }
383 
384         while( numToRead > 0 )
385         {
386             final byte[] rec = m_buffer.readRecord();
387             if( null == rec )
388             {
389                 // Unexpected EOF!
390                 final String message =
391                     "unexpected EOF with " + numToRead + " bytes unread";
392                 throw new IOException( message );
393             }
394 
395             int size = numToRead;
396             final int recordLength = rec.length;
397 
398             if( recordLength > size )
399             {
400                 System.arraycopy( rec, 0, buffer, position, size );
401 
402                 m_readBuf = new byte[ recordLength - size ];
403 
404                 System.arraycopy( rec, size, m_readBuf, 0, recordLength - size );
405             }
406             else
407             {
408                 size = recordLength;
409 
410                 System.arraycopy( rec, 0, buffer, position, recordLength );
411             }
412 
413             totalRead += size;
414             numToRead -= size;
415             position += size;
416         }
417 
418         m_entryOffset += totalRead;
419 
420         return totalRead;
421     }
422 
423     /***
424      * Since we do not support marking just yet, we do nothing.
425      */
426     public void reset()
427     {
428     }
429 
430     /***
431      * Skip bytes in the input buffer. This skips bytes in the current entry's
432      * data, not the entire archive, and will stop at the end of the current
433      * entry's data if the number to skip extends beyond that point.
434      *
435      * @param numToSkip The number of bytes to skip.
436      * @exception IOException when an IO error causes operation to fail
437      */
438     public void skip( final int numToSkip )
439         throws IOException
440     {
441         // REVIEW
442         // This is horribly inefficient, but it ensures that we
443         // properly skip over bytes via the TarBuffer...
444         //
445         final byte[] skipBuf = new byte[ 8 * 1024 ];
446         int num = numToSkip;
447         while( num > 0 )
448         {
449             final int count = ( num > skipBuf.length ) ? skipBuf.length : num;
450             final int numRead = read( skipBuf, 0, count );
451             if( numRead == -1 )
452             {
453                 break;
454             }
455 
456             num -= numRead;
457         }
458     }
459 
460     /***
461      * Utility method to do debugging.
462      * Capable of being overidden in sub-classes.
463      *
464      * @param message the message to use in debugging
465      */
466     protected void debug( final String message )
467     {
468         if( m_debug )
469         {
470             System.err.println( message );
471         }
472     }
473 }