Java Source Code: com.gargoylesoftware.htmlunit.HttpWebConnection


   1: /*
   2:  * Copyright (c) 2002-2006 Gargoyle Software Inc. All rights reserved.
   3:  *
   4:  * Redistribution and use in source and binary forms, with or without
   5:  * modification, are permitted provided that the following conditions are met:
   6:  *
   7:  * 1. Redistributions of source code must retain the above copyright notice,
   8:  *    this list of conditions and the following disclaimer.
   9:  * 2. Redistributions in binary form must reproduce the above copyright notice,
  10:  *    this list of conditions and the following disclaimer in the documentation
  11:  *    and/or other materials provided with the distribution.
  12:  * 3. The end-user documentation included with the redistribution, if any, must
  13:  *    include the following acknowledgment:
  14:  *
  15:  *       "This product includes software developed by Gargoyle Software Inc.
  16:  *        (http://www.GargoyleSoftware.com/)."
  17:  *
  18:  *    Alternately, this acknowledgment may appear in the software itself, if
  19:  *    and wherever such third-party acknowledgments normally appear.
  20:  * 4. The name "Gargoyle Software" must not be used to endorse or promote
  21:  *    products derived from this software without prior written permission.
  22:  *    For written permission, please contact info@GargoyleSoftware.com.
  23:  * 5. Products derived from this software may not be called "HtmlUnit", nor may
  24:  *    "HtmlUnit" appear in their name, without prior written permission of
  25:  *    Gargoyle Software Inc.
  26:  *
  27:  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
  28:  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  29:  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GARGOYLE
  30:  * SOFTWARE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  31:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  32:  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
  33:  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  34:  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  35:  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  36:  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  37:  */
  38: package com.gargoylesoftware.htmlunit;
  39: 
  40: import java.io.IOException;
  41: import java.net.URL;
  42: import java.util.ArrayList;
  43: import java.util.Iterator;
  44: import java.util.List;
  45: import java.util.Map;
  46: 
  47: import org.apache.commons.httpclient.Header;
  48: import org.apache.commons.httpclient.HostConfiguration;
  49: import org.apache.commons.httpclient.HttpClient;
  50: import org.apache.commons.httpclient.HttpException;
  51: import org.apache.commons.httpclient.HttpMethod;
  52: import org.apache.commons.httpclient.HttpMethodBase;
  53: import org.apache.commons.httpclient.HttpMethodRetryHandler;
  54: import org.apache.commons.httpclient.HttpState;
  55: import org.apache.commons.httpclient.HttpStatus;
  56: import org.apache.commons.httpclient.NameValuePair;
  57: import org.apache.commons.httpclient.URI;
  58: import org.apache.commons.httpclient.URIException;
  59: import org.apache.commons.httpclient.auth.CredentialsProvider;
  60: import org.apache.commons.httpclient.methods.GetMethod;
  61: import org.apache.commons.httpclient.methods.PostMethod;
  62: import org.apache.commons.httpclient.methods.StringRequestEntity;
  63: import org.apache.commons.httpclient.methods.multipart.FilePart;
  64: import org.apache.commons.httpclient.methods.multipart.MultipartRequestEntity;
  65: import org.apache.commons.httpclient.methods.multipart.Part;
  66: import org.apache.commons.httpclient.methods.multipart.PartBase;
  67: import org.apache.commons.httpclient.methods.multipart.StringPart;
  68: import org.apache.commons.httpclient.params.HttpMethodParams;
  69: import org.apache.commons.logging.Log;
  70: import org.apache.commons.logging.LogFactory;
  71: import org.apache.commons.logging.impl.SimpleLog;
  72: 
  73: /**
  74:  * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br/>
  75:  *  
  76:  * An object that handles the actual communication portion of page
  77:  * retrieval/submission
  78:  *
  79:  * @version  $Revision: 1.57 $
  80:  * @author  <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
  81:  * @author Noboru Sinohara
  82:  * @author David D. Kilzer
  83:  * @author Marc Guillemot
  84:  * @author Brad Clarke
  85:  */
  86:	  public class HttpWebConnection extends WebConnectionImpl {
  87:    private HttpClient httpClient_;
  88:
  89:    private String virtualHost_ = null;
  90:
  91:    // http://jakarta.apache.org/commons/httpclient/3.0/exception-handling.html#Automatic%20exception%20recovery
  92:	      private static final HttpMethodRetryHandler NoAutoRetry = new HttpMethodRetryHandler() {
  93:	          public boolean retryMethod(final HttpMethod arg0, final IOException arg1, final int arg2) {
  94:            return false;
  95:        }
  96:    };
  97:
  98:
  99:    /**
 100:     * Create a new HTTP web connection instance.
 101:     * @param  webClient The WebClient that is using this connection
 102:     */
 103:	      public HttpWebConnection( final WebClient webClient ) {
 104:        super(webClient);
 105:    }
 106:
 107:
 108:    /**
 109:     *  Submit a request and retrieve a response
 110:     *
 111:     * @param  webRequestSettings Settings to make the request with
 112:     * @return  See above
 113:     * @exception  IOException If an IO error occurs
 114:     */
 115:	      public WebResponse getResponse(final WebRequestSettings webRequestSettings) throws IOException {
 116:
 117:        final URL url = webRequestSettings.getURL();
 118:
 119:        final HttpClient httpClient = getHttpClient();
 120:
 121:	          try {
 122:            final HttpMethodBase httpMethod = makeHttpMethod(webRequestSettings);
 123:            final HostConfiguration hostConfiguration = getHostConfiguration(webRequestSettings);
 124:            final long startTime = System.currentTimeMillis();
 125:            final int responseCode = httpClient.executeMethod(hostConfiguration, httpMethod);
 126:            final long endTime = System.currentTimeMillis();
 127:            return makeWebResponse( responseCode, httpMethod, url, endTime-startTime );
 128:        }
 129:	          catch( final HttpException e ) {
 130:            // KLUDGE: hitting www.yahoo.com will cause an exception to be thrown while
 131:            // www.yahoo.com/ (note the trailing slash) will not.  If an exception is
 132:            // caught here then check to see if this is the situation.  If so, then retry
 133:            // it with a trailing slash.  The bug manifests itself with httpClient
 134:            // complaining about not being able to find a line with HTTP/ on it.
 135:	              if( url.getPath().length() == 0 ) {
 136:                final StringBuffer buffer = new StringBuffer();
 137:                buffer.append(url.getProtocol());
 138:                buffer.append("://");
 139:                buffer.append(url.getHost());
 140:                buffer.append("/");
 141:	                  if( url.getQuery() != null ) {
 142:                    buffer.append(url.getQuery());
 143:                }
 144:                //TODO: There might be a bug here since the original encoding type is lost.
 145:                final WebRequestSettings newRequest = new WebRequestSettings(new URL(buffer.toString()));
 146:                newRequest.setSubmitMethod(webRequestSettings.getSubmitMethod());
 147:                newRequest.setRequestParameters(webRequestSettings.getRequestParameters());
 148:                newRequest.setAdditionalHeaders(webRequestSettings.getAdditionalHeaders());
 149:                return getResponse(newRequest);
 150:            }
 151:	              else {
 152:                e.printStackTrace();
 153:                throw new RuntimeException( "HTTP Error: " + e.getMessage() );
 154:            }
 155:        }
 156:    }
 157:
 158:    /**
 159:     * Gets the host configuration for the request.
 160:     * Should we cache it?
 161:     * @param webRequestSettings the current request settings
 162:     * @return the host configuration to use for this request
 163:     */
 164:	      private HostConfiguration getHostConfiguration(final WebRequestSettings webRequestSettings) {
 165:        final HostConfiguration hostConfiguration = new HostConfiguration();
 166:        final URL url = webRequestSettings.getURL();
 167:        final URI uri;
 168:	          try {
 169:            uri = new URI(url.toExternalForm(), false);
 170:        }
 171:	          catch( final URIException e ) {
 172:            // Theoretically impossible but ....
 173:            throw new IllegalStateException("Unable to create URI from URL: "+url.toExternalForm());
 174:        }
 175:        hostConfiguration.setHost(uri);
 176:	          if( webRequestSettings.getProxyHost() != null ) {
 177:            final String proxyHost = webRequestSettings.getProxyHost();
 178:            final int proxyPort = webRequestSettings.getProxyPort();
 179:            hostConfiguration.setProxy( proxyHost, proxyPort );
 180:        }
 181:        return hostConfiguration;
 182:    }
 183:
 184:
 185:    /**
 186:     * Creates an <tt>HttpMethod</tt> instance according to the specified parameters.
 187:     * @param webRequestSettings the parameters.
 188:     * @return The <tt>HttpMethod</tt> instance constructed according to the specified parameters.
 189:     * @throws IOException
 190:     */
 191:    private HttpMethodBase makeHttpMethod(final WebRequestSettings webRequestSettings)
 192:        throws
 193:	              IOException {
 194:
 195:        final HttpMethodBase httpMethod;
 196:        String path = webRequestSettings.getURL().getPath();
 197:	          if( path.length() == 0 ) {
 198:            path = "/";
 199:        }
 200:	          if (SubmitMethod.GET == webRequestSettings.getSubmitMethod()) {
 201:            httpMethod = new GetMethod( path );
 202:
 203:	              if (webRequestSettings.getRequestParameters().isEmpty() ) {
 204:                final String queryString = webRequestSettings.getURL().getQuery();
 205:                httpMethod.setQueryString( queryString );
 206:            }
 207:	              else {
 208:                final NameValuePair[] pairs = new NameValuePair[webRequestSettings.getRequestParameters().size()];
 209:                webRequestSettings.getRequestParameters().toArray( pairs );
 210:                httpMethod.setQueryString( pairs );
 211:            }
 212:        }
 213:	          else if (SubmitMethod.POST  == webRequestSettings.getSubmitMethod()) {
 214:            final PostMethod postMethod = new PostMethod( path );
 215:            postMethod.getParams().setContentCharset(webRequestSettings.getCharset());
 216:
 217:            final String queryString = webRequestSettings.getURL().getQuery();
 218:	              if( queryString != null ) {
 219:                postMethod.setQueryString(queryString);
 220:            }
 221:	              if (webRequestSettings.getRequestBody() != null ) {
 222:                postMethod.setRequestEntity( new StringRequestEntity(webRequestSettings.getRequestBody()) );
 223:            }
 224:
 225:            // Note that this has to be done in two loops otherwise it won't
 226:            // be able to support two elements with the same name.
 227:	              if (webRequestSettings.getEncodingType() == FormEncodingType.URL_ENCODED) {
 228:                Iterator iterator = webRequestSettings.getRequestParameters().iterator();
 229:	                  while( iterator.hasNext() ) {
 230:                    final NameValuePair pair = ( NameValuePair )iterator.next();
 231:                    postMethod.removeParameter( pair.getName(), pair.getValue() );
 232:                }
 233:
 234:                iterator = webRequestSettings.getRequestParameters().iterator();
 235:	                  while( iterator.hasNext() ) {
 236:                    final NameValuePair pair = ( NameValuePair )iterator.next();
 237:                    postMethod.addParameter( pair.getName(), pair.getValue() );
 238:                }
 239:            }
 240:	              else {
 241:                final List partList = new ArrayList();
 242:                final Iterator iterator = webRequestSettings.getRequestParameters().iterator();
 243:	                  while (iterator.hasNext()) {
 244:                    final PartBase newPart;
 245:                    final KeyValuePair pair = (KeyValuePair) iterator.next();
 246:	                      if (pair instanceof KeyDataPair) {
 247:                        final KeyDataPair pairWithFile = (KeyDataPair) pair;
 248:                        newPart = new FilePart(
 249:                                pairWithFile.getName(),
 250:                                pairWithFile.getValue(),
 251:                                pairWithFile.getFile(),
 252:                                pairWithFile.getContentType(),
 253:                                null);
 254:                        // Firefox and IE seem not to specify a charset for a file part
 255:                        newPart.setCharSet(null);
 256:                    }
 257:	                      else {
 258:                        newPart = new StringPart(pair.getName(), pair.getValue(), webRequestSettings.getCharset());
 259:                        newPart.setContentType(null); // Firefox and IE seem not to send a content type
 260:                    }
 261:                    newPart.setTransferEncoding(null); // Firefox and IE don't send transfer encoding headers
 262:                    partList.add(newPart);
 263:                }
 264:                Part[] parts = new Part[partList.size()];
 265:                parts = (Part[]) partList.toArray(parts);
 266:                postMethod.setRequestEntity(new MultipartRequestEntity(
 267:                        parts,
 268:                        postMethod.getParams()));
 269:            }
 270:            httpMethod = postMethod;
 271:        }
 272:	          else {
 273:            throw new IllegalStateException("Submit method not yet supported: " + webRequestSettings.getSubmitMethod());
 274:        }
 275:
 276:        httpMethod.setRequestHeader(
 277:                "User-Agent", getWebClient().getBrowserVersion().getUserAgent() );
 278:
 279:        writeRequestHeadersToHttpMethod(httpMethod, webRequestSettings.getAdditionalHeaders());
 280:        httpMethod.setFollowRedirects(false);
 281:
 282:        httpMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, NoAutoRetry);
 283:	          if (webRequestSettings.getCredentialsProvider() != null) {
 284:            httpMethod.getParams().setParameter(CredentialsProvider.PROVIDER,
 285:                    webRequestSettings.getCredentialsProvider());
 286:        }
 287:        return httpMethod;
 288:    }
 289:
 290:	      private synchronized HttpClient getHttpClient() {
 291:
 292:	          if (httpClient_ == null ) {
 293:            httpClient_ = createHttpClient();
 294:
 295:            // Disable informational messages from httpclient
 296:            final Log log = LogFactory.getLog("httpclient.wire");
 297:	              if( log instanceof SimpleLog ) {
 298:                ((SimpleLog)log).setLevel( SimpleLog.LOG_LEVEL_WARN );
 299:            }
 300:
 301:            final int timeout = getWebClient().getTimeout();
 302:            httpClient_.getHttpConnectionManager().getParams().setSoTimeout(timeout);
 303:            httpClient_.getHttpConnectionManager().getParams().setConnectionTimeout(timeout);
 304:
 305:
 306:	              if (virtualHost_ != null) {
 307:                httpClient_.getParams().setVirtualHost(virtualHost_);
 308:            }
 309:        }
 310:
 311:        // Tell the client where to get its credentials from
 312:        // (it may have changed on the webClient since last call to getHttpClientFor(...))
 313:        httpClient_.getParams().setParameter( CredentialsProvider.PROVIDER, getWebClient().getCredentialsProvider() );
 314:
 315:        return httpClient_;
 316:    }
 317:
 318:    /**
 319:     * Creates the httpClient that will be used by this WebConnection.
 320:     * Extensions may override this method to create the HttpClient with for instance a custom 
 321:     * {@link org.apache.commons.httpclient.HttpConnectionManager} to perform some tracking 
 322:     * (see feature request 1438216).
 323:     * @return the client
 324:     */
 325:	      protected HttpClient createHttpClient() {
 326:        return new HttpClient();
 327:    }
 328:
 329:
 330:    /**
 331:     * Return the log object for this class
 332:     * @return The log object
 333:     */
 334:	      protected final Log getLog() {
 335:        return LogFactory.getLog(getClass());
 336:    }
 337:
 338:    /**
 339:     * set the virtual host
 340:     * @param virtualHost The virtualHost to set.
 341:     */
 342:	      public void setVirtualHost(final String virtualHost) {
 343:        virtualHost_ = virtualHost;
 344:    }
 345:
 346:    /**
 347:     * Get the virtual host
 348:     * @return virtualHost The current virtualHost 
 349:     */
 350:	      public String getVirtualHost() {
 351:        return virtualHost_;
 352:    }
 353:
 354:    /**
 355:     * Return the {@link HttpState} that is being used.
 356:     * @return The state.
 357:     */
 358:	      public HttpState getState() {
 359:        return getHttpClient().getState();
 360:    }
 361:
 362:    /**
 363:     * Converts the HttpMethod into a WebResponse
 364:     */
 365:    private WebResponse makeWebResponse(final int statusCode, final HttpMethodBase method,
 366:	              final URL originatingURL, final long loadTime) throws IOException {
 367:
 368:        String statusMessage = method.getStatusText();
 369:	          if (statusMessage == null || statusMessage.length() == 0) {
 370:            statusMessage = HttpStatus.getStatusText(statusCode);
 371:        }
 372:	          if (statusMessage == null) {
 373:            statusMessage = "Unknown status code";
 374:        }
 375:        final List headers = new ArrayList();
 376:        final Header[] array = method.getResponseHeaders();
 377:	          for (int i = 0; i < array.length; i++) {
 378:            headers.add(new NameValuePair(array[i].getName(), array[i].getValue()));
 379:        }
 380:
 381:        final WebResponseData responseData = new WebResponseData(
 382:                method.getResponseBodyAsStream(),
 383:                statusCode,
 384:                statusMessage,
 385:                headers);
 386:
 387:        final SubmitMethod requestMethod = SubmitMethod.getInstance(method.getName());
 388:        return new WebResponseImpl(responseData, originatingURL, requestMethod, loadTime);
 389:    }
 390:
 391:
 392:	      private void writeRequestHeadersToHttpMethod( final HttpMethod httpMethod, final Map requestHeaders ) {
 393:	          synchronized( requestHeaders ) {
 394:            final Iterator iterator = requestHeaders.entrySet().iterator();
 395:	              while( iterator.hasNext() ) {
 396:                final Map.Entry entry = ( Map.Entry )iterator.next();
 397:                httpMethod.setRequestHeader( ( String )entry.getKey(), ( String )entry.getValue() );
 398:            }
 399:        }
 400:    }
 401:
 402:}