Java Source Code: org.eclipse.emf.common.archive.ArchiveURLConnection


   1: /**
   2:  * <copyright> 
   3:  *
   4:  * Copyright (c) 2004-2005 IBM Corporation and others.
   5:  * All rights reserved.   This program and the accompanying materials
   6:  * are made available under the terms of the Eclipse Public License v1.0
   7:  * which accompanies this distribution, and is available at
   8:  * http://www.eclipse.org/legal/epl-v10.html
   9:  * 
  10:  * Contributors: 
  11:  *   IBM - Initial API and implementation
  12:  *
  13:  * </copyright>
  14:  *
  15:  * $Id: ArchiveURLConnection.java,v 1.1 2006/10/22 07:37:59 marcelop Exp $
  16:  */
  17: package org.eclipse.emf.common.archive;
  18: 
  19: import java.io.File;
  20: import java.io.FileInputStream;
  21: import java.io.FileOutputStream;
  22: import java.io.FilterInputStream;
  23: import java.io.FilterOutputStream;
  24: import java.io.IOException;
  25: import java.io.InputStream;
  26: import java.io.OutputStream;
  27: import java.net.MalformedURLException;
  28: import java.net.URL;
  29: import java.net.URLConnection;
  30: import java.util.zip.ZipEntry;
  31: import java.util.zip.ZipFile;
  32: import java.util.zip.ZipInputStream;
  33: import java.util.zip.ZipOutputStream;
  34: 
  35: /**
  36:  * A connection that can access an entry in an archive, and then recursively an entry in that archive, and so on.
  37:  * For example, it can be used just like jar: or zip:, only the archive paths can repeat, e.g.,
  38:  *<pre>
  39:  *  archive:file:///c:/temp/example.zip!/org/example/nested.zip!/org/example/deeply-nested.html
  40:  *</pre>
  41:  * The general recursive pattern is
  42:  *<pre>
  43:  *  archive:$nestedURL${/!$archivePath$}+
  44:  *</pre>
  45:  * So the nested URL for the example above is
  46:  *<pre>
  47:  *  file:///c:/temp/example.zip
  48:  *</pre>
  49:  * 
  50:  * <p>
  51:  * Since the nested URL may itself contain archive schemes,
  52:  * the subsequence of the archive paths that should be associated with the nested URL 
  53:  * is determined by finding the nth archive separator, i.e., the nth !/, 
  54:  * where n is the number of ":"s before the first "/" of the nested URL, i.e., the number of nested schemes.
  55:  * For example, for a more complex case where the nested URL is itself an archive-based scheme, e.g.,
  56:  *<pre>
  57:  *  archive:jar:file:///c:/temp/example.zip!/org/example/nested.zip!/org/example/deeply-nested.html
  58:  *</pre>
  59:  * the nested URL is correctly parsed to skip to the second archive separator as
  60:  *<pre>
  61:  *  jar:file:///c:/temp/example.zip!/org/example/nested.zip
  62:  *</pre>
  63:  * </p>
  64:  *
  65:  * <p>
  66:  * The logic for accessing archives can be tailored and reused independant from its usage as a URL connection.
  67:  * This is normally done by using the constructor {@link #ArchiveURLConnection(String)}
  68:  * and overriding {@link #createInputStream(String)} and {@link #createOutputStream(String)}.
  69:  * The behavior can be tailored by overriding {@link #emulateArchiveScheme()} and {@link #useZipFile()}.
  70:  * </p>
  71:  */
  72: public class ArchiveURLConnection extends URLConnection
  73:	  {
  74:  /**
  75:   * The cached string version of the {@link #url URL}.
  76:   */
  77:  protected String urlString;
  78:  
  79:  /**
  80:   * Constructs a new connection for the URL.
  81:   * @param url the URL of this connection.
  82:   */
  83:  public ArchiveURLConnection(URL url)
  84:	    {
  85:    super(url);
  86:    urlString = url.toString();
  87:  }
  88:  
  89:  /**
  90:   * Constructs a new archive accessor.
  91:   * This constructor forwards a null URL to be super constructor, 
  92:   * so an instance built with this constructor <b>cannot</b> be used as a URLConnection.
  93:   * The logic for accessing archives and for delegating to the nested URL can be reused in other applications,
  94:   * without creating an URLs.
  95:   * @param url the URL of the archive.
  96:   */
  97:  protected ArchiveURLConnection(String url)
  98:	    {
  99:    super(null);
 100:    urlString = url;
 101:  }
 102:  
 103:  /**
 104:   * </p>
 105:   * Returns whether the implementation will handle all the archive accessors directly.
 106:   * For example, whether
 107:   *<pre>
 108:   *  archive:jar:file:///c:/temp/example.zip!/org/example/nested.zip!/org/example/deeply-nested.html
 109:   *</pre>
 110:   * will be handled as if it were specified as
 111:   *<pre>
 112:   *  archive:file:///c:/temp/example.zip!/org/example/nested.zip!/org/example/deeply-nested.html
 113:   *</pre>
 114:   * Override this only if you are reusing the logic of retrieving an input stream into an archive 
 115:   * and hence are likely to be overriding createInputStream, 
 116:   * which is the point of delegation to the nested URL for recursive stream creation.
 117:   * </p>
 118:   * @return whether the implementation will handle all the archive accessors directly.
 119:   */
 120:  protected boolean emulateArchiveScheme()
 121:	    {
 122:    return false;
 123:  }
 124:  
 125:  /**
 126:   * Returns whether to handle the special case of a nested URL with file: schema using a {@link ZipFile}.
 127:   * This gives more efficient direct access to the root entry, e.g., 
 128:   *<pre>
 129:   *  archive:file:///c:/temp/example.zip!/org/example/nested.html
 130:   *</pre>
 131:   * @return whether to handle the special case of a nested URL with file: schema using a ZipFile.
 132:   */
 133:  protected boolean useZipFile()
 134:	    {
 135:    return false;
 136:  }
 137:        
 138:  /**
 139:   * Record that this is connected.
 140:   */
 141:  public void connect() throws IOException
 142:	    {
 143:    connected = true;
 144:  }
 145:  
 146:  /**
 147:   * Creates the input stream for the URL.
 148:   * @return the input stream for the URL.
 149:   */
 150:  public InputStream getInputStream() throws IOException 
 151:	    {
 152:    // There must be at least one archive path.
 153:    //
 154:    int archiveSeparator = urlString.indexOf("!/");
 155:    if (archiveSeparator < 0)
 156:	      {
 157:      throw new MalformedURLException("missing archive separators " + urlString);
 158:    }
 159:    
 160:    // There needs to be another URL protocol right after the archive protocol, and not a "/".
 161:    //
 162:    int start = urlString.indexOf(':') + 1;
 163:    if (start > urlString.length() || urlString.charAt(start) == '/')
 164:	      {
 165:      throw 
 166:        new IllegalArgumentException
 167:          ("archive protocol must be immediately followed by another URL protocol " + urlString);
 168:    }
 169:    
 170:    // Parse to extract the archives that will be delegated to the nested URL based on the number of schemes at the start.
 171:    //
 172:    for (int i = start, end = urlString.indexOf("/") - 1; (i = urlString.indexOf(":", i)) < end; )
 173:	      {
 174:      if (emulateArchiveScheme())
 175:	        {
 176:        // Skip a scheme for the achive accessor to be handled directly here.
 177:        //
 178:        start = ++i;
 179:      }
 180:      else
 181:	        {
 182:        // Skip an archive accessor to be handled by delegation to the scheme in nested URL.
 183:        //
 184:        archiveSeparator = urlString.indexOf("!/", archiveSeparator + 2);
 185:        if (archiveSeparator < 0)
 186:	          {
 187:          throw new MalformedURLException("too few archive separators " + urlString);
 188:        }
 189:        ++i;
 190:      }
 191:    }
 192:          
 193:    // System.out.println("archive: " + urlString.substring(start, archiveSeparator) + " -> " + urlString.substring(archiveSeparator + 2));
 194:          
 195:    // Create the delegate URL.
 196:    //
 197:    String nestedURL = urlString.substring(start, archiveSeparator);
 198:          
 199:    // The cutoff point to the next archive.
 200:    //
 201:    int nextArchiveSeparator = urlString.indexOf("!/", archiveSeparator + 2);
 202:          
 203:    // Construct the input stream in a special efficient way for case of a file scheme.
 204:    //
 205:    InputStream inputStream;
 206:    if (!useZipFile() || !nestedURL.startsWith("file:"))
 207:	      {
 208:      // Just get the stream from the URL.
 209:      //
 210:      inputStream =  createInputStream(nestedURL);
 211:    }
 212:    else
 213:	      {
 214:      // The name to be used for the entry.
 215:      //
 216:      String entry = 
 217:         nextArchiveSeparator < 0 ?
 218:           urlString.substring(archiveSeparator + 2) :
 219:           urlString.substring(archiveSeparator + 2, nextArchiveSeparator);
 220:                 
 221:      // Skip over this archive path to the next one, since we are handling this one special.
 222:      //
 223:      archiveSeparator = nextArchiveSeparator;
 224:      nextArchiveSeparator = urlString.indexOf("!/", archiveSeparator + 2);
 225:            
 226:      // Go directly to the right entry in the zip file, 
 227:      // get the stream, 
 228:      // and wrap it so that closing it closes the zip file.
 229:      //
 230:      final ZipFile zipFile = new ZipFile(nestedURL.substring(5));
 231:      ZipEntry zipEntry = zipFile.getEntry(entry);
 232:      if (zipEntry == null)
 233:	        {
 234:        throw new IOException("archive entry not found " + entry);
 235:      }
 236:      inputStream = 
 237:        new FilterInputStream(zipFile.getInputStream(zipEntry))
 238:	          {
 239:          public void close() throws IOException
 240:	            {
 241:            super.close();
 242:            zipFile.close();
 243:          }
 244:        };
 245:    }
 246:          
 247:    // Loop over the archive paths.
 248:    //
 249:    LOOP:
 250:    while (archiveSeparator > 0)
 251:	      {
 252:      // The entry name to be matched.
 253:      //
 254:      String entry = 
 255:         nextArchiveSeparator < 0 ?
 256:           urlString.substring(archiveSeparator + 2) :
 257:           urlString.substring(archiveSeparator + 2, nextArchiveSeparator);
 258:            
 259:      // Wrap the input stream as a zip stream to scan it's contents for a match.
 260:      //
 261:      ZipInputStream zipInputStream = new ZipInputStream(inputStream);
 262:      while (zipInputStream.available() >= 0)
 263:	        {
 264:        ZipEntry zipEntry = zipInputStream.getNextEntry();
 265:        if (zipEntry == null)
 266:	          {
 267:          break;
 268:        }
 269:        else if (entry.equals(zipEntry.getName()))
 270:	          {
 271:          inputStream = zipInputStream;
 272:                  
 273:          // Skip to the next archive path and continue the loop.
 274:          //
 275:          archiveSeparator = nextArchiveSeparator;
 276:          nextArchiveSeparator = urlString.indexOf("!/", archiveSeparator + 2);
 277:          continue LOOP;
 278:        }
 279:      }
 280:            
 281:      // Unless we matched an entry, we're done.
 282:      //
 283:      break;
 284:    }
 285:          
 286:    return inputStream;
 287:  }
 288:  
 289:  /**
 290:   * Creates an input stream for the nested URL by calling {@link URL#openStream()opening} a stream on it.
 291:   * @param nestedURL the nested URL for which a stream is required.
 292:   * @return the open stream of the nested URL.
 293:   */
 294:  protected InputStream createInputStream(String nestedURL) throws IOException
 295:	    {
 296:    return new URL(nestedURL).openStream();
 297:  }
 298:  
 299:  /**
 300:   * Creates the output stream for the URL.
 301:   * @return the output stream for the URL.
 302:   */
 303:  public OutputStream getOutputStream() throws IOException
 304:	    {
 305:    // There must be at least one archive separator.
 306:    //
 307:    int archiveSeparator = urlString.indexOf("!/");
 308:    if (archiveSeparator < 0)
 309:	      {
 310:      throw new MalformedURLException("missing archive separator in " + urlString);
 311:    }
 312:    
 313:    // There needs to be another URL protocol right after the archive protocol, and not a "/".
 314:    //
 315:    int start = urlString.indexOf(':') + 1;
 316:    if (start > urlString.length() || urlString.charAt(start) == '/')
 317:	      {
 318:      throw 
 319:        new IllegalArgumentException
 320:          ("archive protocol must be immediately followed by another URL protocol " + urlString);
 321:    }
 322:    
 323:    // Parse the URI to extract the nested/delegate URI based on preference and the number of schemes at the start.
 324:    //
 325:    for (int i = start, end = urlString.indexOf("/") - 1; (i = urlString.indexOf(":", i)) < end; )
 326:	      {
 327:      if (emulateArchiveScheme())
 328:	        {
 329:        // Skip a scheme for the achive accessor to be handled directly here.
 330:        //
 331:        start = ++i;
 332:      }
 333:      else
 334:	        {
 335:        // Skip an archive accessor to be handled by delegation to the scheme in nested URI.
 336:        //
 337:        archiveSeparator = urlString.indexOf("!/", archiveSeparator + 2);
 338:        if (archiveSeparator < 0)
 339:	          {
 340:          throw new MalformedURLException("too few archive separators in " + urlString);
 341:        }
 342:        ++i;
 343:      }
 344:    }
 345:    
 346:    // System.out.println("archive: -> " + urlString.substring(start, archiveSeparator) + " -> " + urlString.substring(archiveSeparator + 2));
 347:    
 348:    // Create the delegate URL
 349:    //
 350:    final String nestedURL = urlString.substring(start, archiveSeparator);
 351:    
 352:    // Create a temporary file where the existing contents of the archive can be written 
 353:    // before the new contents are added.
 354:    //
 355:    final File tempFile = File.createTempFile("Archive", "zip");
 356:    
 357:    // Record the input and output streams for closing in case of failure so that handles are not left open.
 358:    //
 359:    InputStream sourceInputStream =  null;
 360:    OutputStream tempOutputStream = null;
 361:    try
 362:	      {
 363:      // Create the output stream to the temporary file and the input stream for the delegate URL.
 364:      //
 365:      tempOutputStream = new FileOutputStream(tempFile);
 366:      try
 367:	        {
 368:        sourceInputStream =  createInputStream(nestedURL);
 369:      }
 370:      catch (IOException exception)
 371:	        {
 372:        // Continue processing if the file doesn't exist so that we try create a new empty one.
 373:      }
 374:      
 375:      // Record them as generic streams to record state during the loop that emulates recursion.
 376:      //
 377:      OutputStream outputStream = tempOutputStream;
 378:      InputStream inputStream =  sourceInputStream;
 379:      
 380:      // The cutoff point to the next archive.
 381:      //
 382:      int nextArchiveSeparator = urlString.indexOf("!/", archiveSeparator + 2);
 383:      
 384:      // The most deeply nested output stream that will be returned wrapped as the result.
 385:      //
 386:      ZipOutputStream zipOutputStream;
 387:      
 388:      // A buffer for transferring archive contents.
 389:      //
 390:      final byte [] bytes = new byte [4096];
 391:              
 392:      // We expect there to be at least one archive path.
 393:      //
 394:      do
 395:	        {
 396:        // The name that will be used as the archive entry.
 397:        //
 398:        String entry = 
 399:           nextArchiveSeparator < 0 ?
 400:             urlString.substring(archiveSeparator + 2) :
 401:             urlString.substring(archiveSeparator + 2, nextArchiveSeparator);
 402:             
 403:        // Wrap the current result as a zip stream, and record it for loop-based recursion.
 404:        //
 405:        zipOutputStream =  new ZipOutputStream(outputStream);
 406:        outputStream = zipOutputStream;
 407:        
 408:        // Wrap the current input as a zip stream, and record it for loop-based recursion.
 409:        //
 410:        ZipInputStream zipInputStream = inputStream == null ? null : new ZipInputStream(inputStream);
 411:        inputStream = zipInputStream;
 412:        
 413:        // Loop over the entries in the zip stream.
 414:        //
 415:        while (zipInputStream != null && zipInputStream.available() >= 0)
 416:	          {
 417:          // If this entry isn't the end marker 
 418:          // and isn't the matching one that we are replacing...
 419:          //
 420:          ZipEntry zipEntry = zipInputStream.getNextEntry();
 421:          if (zipEntry == null)
 422:	            {
 423:            break;
 424:          }
 425:          else if (!entry.equals(zipEntry.getName()))
 426:	            {
 427:            // Transfer the entry and its contents.
 428:            //
 429:            zipOutputStream.putNextEntry(zipEntry);
 430:            for (int size; (size = zipInputStream.read(bytes, 0, bytes.length)) > -1; )
 431:	              {
 432:              zipOutputStream.write(bytes, 0, size);
 433:            }
 434:          }
 435:        }
 436:        
 437:        // Create a new or replaced entry.
 438:        //
 439:        zipOutputStream.putNextEntry(new ZipEntry(entry));
 440:        
 441:        // Find the next archive path and continue "recursively" if there is one.
 442:        //
 443:        archiveSeparator = nextArchiveSeparator;
 444:        nextArchiveSeparator = urlString.indexOf("!/", archiveSeparator + 2);
 445:      }
 446:      while (archiveSeparator > 0);
 447:      
 448:      // Ensure that it won't be closed in the finally block.
 449:      //
 450:      tempOutputStream = null;
 451:      
 452:      // Wrap the deepest result so that on close, the results are finally transferred.
 453:      //
 454:      final boolean deleteRequired = sourceInputStream != null;
 455:      return
 456:        new FilterOutputStream(zipOutputStream)
 457:	          {
 458:          protected boolean isClosed;
 459:          
 460:          public void close() throws IOException
 461:	            {
 462:            // Make sure we close only once.
 463:            //
 464:            if (!isClosed)
 465:	              {
 466:              isClosed = true;
 467:              
 468:              // Close for real so that the temporary file is ready to be read.
 469:              //
 470:              super.close();
 471:              
 472:              boolean useRenameTo = nestedURL.startsWith("file:");
 473:              
 474:              // If the delegate URI can be handled as a file, 
 475:              // we'll hope that renaming it will be really efficient.
 476:              //
 477:              if (useRenameTo)
 478:	                {
 479:                File targetFile = new File(nestedURL.substring(5));
 480:                if (deleteRequired && !targetFile.delete())
 481:	                  {
 482:                  throw new IOException("cannot delete " + targetFile.getPath());
 483:                }
 484:                else if (!tempFile.renameTo(targetFile))
 485:	                  {
 486:                  useRenameTo = false;
 487:                }
 488:              }
 489:              if (!useRenameTo)
 490:	                {
 491:                // Try to transfer it by reading the contents of the temporary file 
 492:                // and writing them to the output stream of the delegate.
 493:                //
 494:                InputStream inputStream = null;
 495:                OutputStream outputStream = null;
 496:                try
 497:	                  {
 498:                  inputStream = new FileInputStream(tempFile);
 499:                  outputStream = createOutputStream(nestedURL);
 500:                  for (int size; (size = inputStream.read(bytes, 0, bytes.length)) > -1; )
 501:	                    {
 502:                    outputStream.write(bytes, 0, size);
 503:                  }
 504:                }
 505:                finally
 506:	                  {
 507:                  // Make sure they are closed no matter what bad thing happens.
 508:                  //
 509:                  if (inputStream != null) 
 510:	                    {
 511:                    inputStream.close();
 512:                  }
 513:                  if (outputStream != null) 
 514:	                    {
 515:                    outputStream.close();
 516:                  }
 517:                }
 518:              }
 519:            }
 520:          }
 521:       };
 522:    }
 523:    finally
 524:	      {
 525:      // Close in case of failure to complete.
 526:      //
 527:      if (tempOutputStream != null)
 528:	        {
 529:        tempOutputStream.close();
 530:      }
 531:      
 532:      // Close if we created this.
 533:      //
 534:      if (sourceInputStream != null)
 535:	        {
 536:        sourceInputStream.close();
 537:      }
 538:    }
 539:  }
 540:  
 541:  /**
 542:   * Creates an input stream for the nested URL by calling {@link URL#openStream()opening} a stream on it.
 543:   * @param nestedURL the nested URL for which a stream is required.
 544:   * @return the open stream of the nested URL.
 545:   */
 546:  protected OutputStream createOutputStream(String nestedURL) throws IOException
 547:	    {
 548:    URL url = new URL(nestedURL.toString());
 549:    URLConnection urlConnection = url.openConnection();
 550:    urlConnection.setDoOutput(true);
 551:    return urlConnection.getOutputStream(); 
 552:  }
 553:}