Package java.net

Examples of java.net.URL


   * will only be called on startup !
   */
  private void updateDialogFromRobot() {

    // start URL
    URL u = jobobase.getRobot().getStartURL();
    if (u != null) {
      urlField.setText(u.toString());
    } else {
      urlField.setText("");
    }

  }
View Full Code Here


   ***********************************************************************
   ***********************************************************************/

  public void setHttpToolDocUrl(String url) {
    try {
      currentUrlField.setText(urlToString(new URL(url)));
    } catch (MalformedURLException e) {}
  }
View Full Code Here

  this.referer=referer;
 
 
  public URL getUrl() {
  try {
    return new URL(urlString);
  } catch (MalformedURLException e) {
    e.printStackTrace();
    return null;
  }
  }
View Full Code Here

    if (opt.getOptionBoolean("?")) {
      printUsage();
      return;
    }
   
    URL u = new URL(argv[argv.length-1]);

    HttpDocToFile docStore=new HttpDocToFile(basedir);
    docStore.setMinFileSize(minSize);

    SystemOutHttpToolCallback statusInfo = new SystemOutHttpToolCallback();
View Full Code Here

            throws IOException {
       
        List<T> providers = new LinkedList<T>();
        String providerClassName = providerClass.getName();
        providerClassName = providerClassName.substring(providerClassName.indexOf('.')+1);
        URL servicesURL = loader.findResource("META-INF/services/" + providerClass.getName());
        if (servicesURL != null) {
            BufferedReader in
                = new BufferedReader(new InputStreamReader(servicesURL.openStream()));
            try {
                String className;
                while ((className = in.readLine()) != null) {
                    log.info("Loading the " + providerClassName + " implementation: " + className);
                    try {
View Full Code Here

        {
          try
          {
            WSDLReader wsdlReader = WSDLFactory.newInstance().newWSDLReader();
            Definition def = wsdlReader.readWSDL(f.toURL().toExternalForm());
            URL url = new WSDLParser(def).getServiceLocationURL(serviceName, portName);
            if(url!=null && (def.getTargetNamespace().equals(serviceName.getNamespaceURI())))
            {
              targetWsdlFile = f;
              log.debug("Matching "+processId + " to WSDL file "+targetWsdlFile);
              break;
View Full Code Here

        } catch(IllegalStateException ise) {
          // Ignore for now - endpoint may not exist if first version
        }
      }
     
      URL serviceUrl = new WSDLParser(wsdlRef.getDefinition()).getServiceLocationURL(metaData.getServiceName(), metaData.getPortName());

      ServiceEndpointReference ref = JBossDSPFactory.getServiceDeployer().deploy(metaData, providerImpl.getClass(),
                    serviceUrl, classLoader, warArchive, serverConfig);
     
      /*
 
View Full Code Here

  private String getDefaultWebServiceBaseUrl() {
    try {   
      ServerConfig serverConfig=JBossDSPFactory.getServerConfig();
      String webServiceHost = serverConfig.getWebServiceHost();
      int webServicePort = serverConfig.getWebServicePort();
      URL theURL = new URL("http://" + webServiceHost + ":" + webServicePort);
      if (Boolean.valueOf(properties.getProperty(BPELEngineImpl.BPEL_WEBSERVICE_SECURE, "false"))) {
        int secureWebServicePort = serverConfig.getWebServicePort();
        theURL = new URL("https://" + webServiceHost + ":" + secureWebServicePort);
      }
      return theURL.toExternalForm();
    } catch (MalformedURLException e) {
      log.error("Error in constructing the webservice base url.", e);
      return null;
    }
  }
View Full Code Here

                                                                  String address) {
        // URL rewrite
        if (transport.equals("http") || transport.equals("https")) {
            if (address.indexOf(":") != -1) {
                try {
                    address = new URL(address).getPath();
                } catch (MalformedURLException e) {
                    String msg = "URL " + address + " is malformed";
                    log.error(msg, e);
                    throw new SynapseException(msg, e);
                }
View Full Code Here

   
    long now = System.currentTimeMillis();

    updateProgressInfo();

    URL u = task.getUrl();
    String urlString = u.toString();
    String referer = task.getReferer();
    int depth = task.getMaxDepth();

    if (depth < 0) {
      log.info("Max search depth reached");
      return;
    }

    // we may need this additional check even if we
    // tested it during adding to the tasks list
    if (!isAllowed(u)) {
      log.info("Url '" + u + "' filtered out.");
      return;
    }

    if (u.getFile().equals("")) {
      try {
        urlString = urlString + "/";
        u = new URL(urlString);
        // fix for double retrieved files
        task.setUrl(u);
      } catch (MalformedURLException e) {
        log.error("URL not well formed: " + e.toString());
        // use exception handler to handle exception
        exceptionHandler.handleException(this, u, e);
        return;
      }
    }

    log.info("retrieving " + urlString);
    httpTool.setReferer(referer);

    HttpDoc doc = null;
    Vector links = null;
    boolean cached = false;

    // look in the cache first, but only for static pages
    boolean reScan = true;
    if ((docManager != null && allowCaching)
      && (task.getMethod() == HttpConstants.GET)
      && (task.getParamString() == null)) {
      doc = docManager.retrieveFromCache(u);
/*      if (doc != null) {
        try {
          links = ((UrlCollector) docManager).retrieveLinks(doc);
        } catch (IOException e) {
          log.info("Could not get links for " + u + ": " + e.getMessage());
          links = null;
        }
      }*/
     
      if (doc != null) {
        countCache++;
        long lastRetrieved = doc.getDateAsMilliSeconds();
        double ageInSeconds = (now - lastRetrieved) / 1000;
        if (ageInSeconds < 0) {
          log.warn("DocumentAge < 0!");
        }
        reScan = maxDocumentAge >= 0 && ageInSeconds > maxDocumentAge;
        if (reScan) {
          long lastModified = doc.getLastModifiedAsMilliSeconds();
          Date lastModifiedDate = new Date(lastModified);
          httpTool.setIfModifiedSince(lastModifiedDate);
        }
      } else {
        httpTool.setIfModifiedSince(null);
      }
    }

    // if not found in cache, retrieve from the web page
    if (reScan) {
      HttpDoc newDoc;
      boolean error = false;
      try {
        if (u.getProtocol().equalsIgnoreCase("file")) {
          // retrieve from file
          newDoc = retrieveFileURL(u, httpTool.getIfModifiedSince());
        } else {
          // retrieve from Web
          newDoc = httpTool.retrieveDocument(u, task.getMethod(), task.getParamString());
          if (newDoc != null) {
            newDoc.setDate(now);
          }
          sleepNow();
        }
       
        if (newDoc!= null && !newDoc.isNotModified()) {
          if (!(newDoc.isOk() || newDoc.isRedirect())) {
            error = true;
          }
        } else {
          // (newDoc == null || newDoc.isNotModified()) && doc != null
          // -> Not modified
          // -> refresh time stamp
          if (doc != null) {
            doc.setDate(now);
            doc.setCached(false);
            newDoc = null;
          }
        }
      } catch (HttpException hex) {
        error = true; newDoc = null;
      }
      if (error) {
        int retry = task.retry();
        if (retry <= maxRetries) {
          synchronized(visited) {
            todo.add(task);
            visited.remove(task);
          }
          log.info("Adding " + u + " for retry no. " + retry);
          return;
        } else {
          doc = docManager.retrieveFromCache(u);
          if (doc == null) {
            log.warn("Unsuccessfull retries for " + u);
            return;
          } else {
            long docDate = doc.getDateAsMilliSeconds();
            long age = (now - docDate);
            age /= 1000;
            if (expirationAge < 0 || age < expirationAge) {
              newDoc = doc;
              cached = true;
              log.info("Cached document not expired: " + u);
            } else {
              log.warn("Cached document expired: " + u);
              docManager.removeDocument(u);
              return;
            }
          }
        }
      }
     
      if (newDoc != null) {
        countWeb++;
        doc = newDoc;
        links = null; // force recalculation of links
        countRefresh++;
      } else {
        cached = true;
        countNoRefresh++;
      }
    } else {
      cached = true;
      log.debug("Page " + u + " retrieved from cache");
    }

    // Add it to the visited vector
    // needs to be synchronized with todo-list
//    visited.add(task);
   
    // got a NULL document, that doc was not retrieved
    // usually, it was not downloaded because a rule didn't allow
    // to download it
    if (doc == null) {
      log.info("not downloaded " + u);
      return;
    }

    // Duplicate check
    String duplicate=null;
    if (duplicateCheck) {
      duplicate = getContentVisitedURL(doc);
      if (duplicate != null) {
        log.info("URLs with same content found: " + urlString + " = " + duplicate);
      } else
        try {
          duplicate = docManager.findDuplicate(doc);
          if (duplicate != null) {
            log.info("URLs with same content found in cache: " + urlString + " = " + duplicate);
          }
        } catch (IOException e) {
          e.printStackTrace();
        }
      }
     
      if (duplicate != null) {
        String pureDuplicate = removeParameters(duplicate);
        String pureUrl = removeParameters(urlString);
        if (!pureUrl.equals(pureDuplicate) && !cached) {
          // different url not yet stored -> store it
          try {
            // retrieve links from original
            HttpDoc linksDoc = docManager.retrieveFromCache(new URL(duplicate));
            if (linksDoc != null) {   
              doc.setLinks(linksDoc.getLinks());
            }
            docManager.storeDocument(doc);
          } catch (Exception e) {
            e.printStackTrace();
          }
        }
        RobotTask newTask;
        try {
          newTask = createRobotTask(new URL(duplicate), depth, referer);
          // check already here for visited tasks to save memory
          if (!visited.contains(newTask)) {
            addTask(newTask);
          }
        } catch (MalformedURLException e) {
          e.printStackTrace(); // Can�t happen
        }
        return;
      }
    }

    // was it an UnAuthorized document ?
    if (doc.isUnauthorized()) {
      log.info("got HTTP Unauthorized for URL " + u);
    }

    if (doc.isOk() || cached) {
      // callback
      if (webRobotCallback != null) {
        int contentLength=0;
        if (doc.getContent() != null) { contentLength=doc.getContent().length; }
        webRobotCallback.webRobotRetrievedDoc(urlString, contentLength);
      }

      // extract links
      try {
        if (doc.isHTML() && (depth > 0)) {
          // solving encoding problem
          // HtmlDocument htmlDoc = new HtmlDocument(u, doc.getContent());
          HtmlDocument htmlDoc = null;
          HttpHeader contentTypeHeader = doc.getHeader("Content-type");
          if (contentTypeHeader != null) {
            String contentType = contentTypeHeader.getValue();
            int index = contentType.toLowerCase().indexOf("charset=");
            if (index > 0) {
              htmlDoc = new HtmlDocument(u, doc.getContent(), contentType.substring(index+8));
            } else {
              htmlDoc = new HtmlDocument(u, doc.getContent());
            }
          } else {
            htmlDoc = new HtmlDocument(u, doc.getContent());
          }
 
          // add links
         
          // this depth-check is critical!
          // otherwise far too many RobotTasks will be created
          // this will cause a premature OutOfMemoryException!
          if (depth > 0) {
            if (duplicate != null) {
              HttpDoc linksDoc = docManager.retrieveFromCache(new URL(duplicate));
              doc.setLinks(linksDoc.getLinks());
            } else if (cached) {
            }
            if (links == null) {
              links = htmlDoc.getLinks();
              doc.setLinks(links);
            }
            if (duplicate == null) {
              HashSet checkedLinks = new HashSet();
              for (int i = 0; i < links.size(); i++) {
                URL link = (URL) links.elementAt(i);
                log.info("Link: "+link);
                // check already here for duplicate links to avoid expensive
                // creation of RobotTasks
                if (!checkedLinks.contains(link)) {
                  checkedLinks.add(link);
                  String myReferer = u.toString();
                  if (u.getUserInfo() != null) {
                    // remove userinfo from referer
                    int endindex = myReferer.indexOf("@")+1;
                    myReferer = "http://"+ myReferer.substring(endindex);
                  }
                 
                  RobotTask newTask = createRobotTask((URL) links.elementAt(i), depth - 1, myReferer);
                  // check already here for visited tasks to save memory
                  if (!visited.contains(newTask)) {
                    // bad workaround to retrieve images first
                    if (newTask.urlString.endsWith(".jpg")) {
                      addTaskAtStart(newTask);
                    } else {
                      addTask(newTask);
                    }
                  }
                }
              }
            }
          }
         
          if (hasFormHandlers) {
            // add forms
            Vector forms = htmlDoc.getElements("form");
            for (int i = 0; i < forms.size(); i++) {
              ExtendedURL eurl = formFiller.fillForm(u, (Element) forms.elementAt(i));
              if (eurl != null) {
                RobotTask newTask = createRobotTask(eurl.getURL(), depth - 1, u.toString());
                newTask.setParamString(eurl.getParams());
                newTask.setMethod(eurl.getRequestMethod());
                addTask(newTask);
              }
            }
          }
 
        }
      // catch any occuring error to keep on processing
      } catch (OutOfMemoryError e) {
        throw e;
      } catch (Throwable e){
        log.error("Unexpected error while extraction links from url '" + u + "':"+e);
        e.printStackTrace();
        // continue processing
      }

      // filter and store the document
      if ((docManager != null)) {
        try {
          if (filters != null) {
            doc = filters.process(doc);
          } else {
            log.debug("No filters defined");
          }
         
          if (isProcessingAllowed(doc)) {
            docManager.processDocument(doc);
          } else  {
            String md5 = doc.getHeaderValue(HttpHeader.CONTENT_MD5);
            doc.setContent("Not for indexing".getBytes());
            doc.setHeaderValue(HttpHeader.CONTENT_MD5, md5);
          }
         
          try {
            docManager.storeDocument(doc);
          } catch (Exception e) {
            log.warn("could not store (not for indexing) " + urlString + ": " + e.getMessage());
          }
          if (activatedContentHistory && duplicate==null) {
            setContentVisitedURL(doc, urlString);
          }
        } catch (DocManagerException e1) {
          log.error("could not process document: " + e1.getMessage());
          exceptionHandler.handleException(this, u, e1);
        } catch (FilterException e2) {
          log.error(e2.getMessage());
        }
      }

    } else {
      // it was NOT a 200 return code !

      if (doc.isRedirect()) {
        String ref = doc.getLocation();
        log.info("Got redirect to " + ref);

        try {
          URL u2 = new URL(u, ref);
          // is it on another host ?

          // On a redirect, browsers use the old Referer instead of the
          // URL that got this redirect
          // Therefore we do not use u.toString as Referer but the old Referer
View Full Code Here

TOP

Related Classes of java.net.URL

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.