Package com.hp.hpl.jena.eyeball.repairtools.analysis

Source Code of com.hp.hpl.jena.eyeball.repairtools.analysis.URIAnalysis

package com.hp.hpl.jena.eyeball.repairtools.analysis;

import java.util.Iterator;
import java.util.NoSuchElementException;

import com.hp.hpl.jena.eyeball.*;
import com.hp.hpl.jena.eyeball.repairtools.*;
import com.hp.hpl.jena.eyeball.vocabulary.EYE;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.NodeIterator;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.ResIterator;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.vocabulary.RDF;
import com.swabunga.spell.engine.Word;

public class URIAnalysis extends BaseAnalysis implements Analysis {
 
  public URIAnalysis() { }
 
  public void analyse( Report r, Model m, Model config )
      {
      this.input = m;
      this.repair = r.model();
     
      ResIterator it1 = repair.listSubjectsWithProperty( EYE.badURI );
      while( it1.hasNext() )
        {
        Resource curr = it1.nextResource();
        NodeIterator it2 = repair.listObjectsOfProperty( curr, EYE.forReason );
        RDFNode uri = repair.listObjectsOfProperty( curr, EYE.badURI ).nextNode();
        String work = uri.asNode().getLiteralLexicalForm();
        while ( it2.hasNext() )
          {
          String fault = it2.nextNode().asNode().getURI();
          repair.add( curr, EYE.checkFix, RDF.subject );
          repair.remove( repair.listStatements( curr, EYE.newValue, work ) );
          if ( fault.contains( "unrecognisedScheme" ) )
              { String fixed = analyseUnrecognisedScheme( config, curr, work );
              }
          else if ( fault.contains( "uriContainsSpaces" ) )
            {
            analyseHasSpaces( curr, work );
            }
          else if ( fault.contains( "uriNoHttpAuthority" ) )
            {
            analyseHasNoAuthority( curr, work );
            }
          else if ( fault.contains( "uriHasNoLocalname" ) )
              {
            analyseHasNoLocalName( curr, work );
              }
          else if ( fault.contains( "schemeShouldBeLowercase" ) )
            {
            analyseShouldBeLowerCase( curr, work );
            }
          else
            System.err.println( ">> Unknown badURI fault found - " + fault + ". No repair possible.");
          }
        }
      r.setMitems( repair );
      }

    private String analyseUnrecognisedScheme( Model config, Resource curr,  String work )
        {
        // Default scheme added
        String fixed;
        try
          { fixed = config.listObjectsOfProperty( EYE.defaultScheme ).nextNode().asNode().getLiteralLexicalForm(); }
        catch ( NoSuchElementException e )
          { fixed = "default:///"; }
        if ( work.contains( ":" ) )
          { // Attempt to spellcheck the current scheme
          String scheme = "";
          for ( int i = 0 ; i < work.length(); i++)
            if ( work.charAt( i ) == ':' )
              break;
            else
              scheme += work.charAt( i );
          NodeIterator it2b = config.listObjectsOfProperty( EYE.validScheme );
          String wordList = "";
          while ( it2b.hasNext() )
            wordList += it2b.nextNode().asNode().getLiteralLexicalForm() + '\n';
          GenericWordChecker check = new WordChecker( wordList );
          Iterator<Word> sugg = check.spellCheck( scheme );
          if ( sugg != null )
            fixed = sugg.next().toString();
          }
       
        if ( work.contains( ":" ) )
          {
          boolean trip = false;
          for ( int i = 0; i < work.length(); i++ )
            if (trip)
              fixed += work.charAt( i );
            else
              if ( work.charAt( i ) == ':' )
                trip = true;
          }
        else
          {
          fixed += work;
          }
        work = fixed;
        repair.add( curr, EYE.repairType, EYE.addDefaultScheme )
              .add( curr, EYE.repairConfidence, EYE.low )
              .add( curr, EYE.newValue, fixed );
        return fixed;
        }

    private void analyseHasSpaces( Resource curr, String work )
        {
        String domainpart = "";
        String localpart = "";
        boolean flag = false;
        for ( int i = 0; i < work.length(); i++ )
          { // Separate the domain and local parts of the URI if possible
          if (!flag)
            if ( work.charAt( i ) == '/' )
              {
              if ( i + 1 < work.length() )
                if ( ( work.charAt( i+1 ) != '/' ) && ( work.charAt( i-1 ) != '/' ) )
                  { // We are at a "/" and not a "//"
                  flag = true;
                  localpart += work.charAt(i);
                  }
                else
                  domainpart += work.charAt(i);
              }
            else
              domainpart += work.charAt(i);
          else
            localpart += work.charAt( i );
          }
        String fixed = domainpart.replace( " ", "" ) + localpart.replace( " ", "%20" );
        work = fixed;
        repair.add( curr, EYE.repairType, EYE.removeSpaces )
            .add( curr, EYE.repairConfidence, EYE.moderate )
              .add( curr, EYE.newValue, fixed );
        }

    private void analyseHasNoAuthority( Resource curr, String work )
        {
        String fixed = work;
        if ( !fixed.startsWith( "http://" ) )
          fixed = "http://" + fixed.replaceFirst( "http:", "" );
        work = fixed;
        repair.add( curr, EYE.repairType, EYE.formHttpAuthority )
            .add( curr, EYE.repairConfidence, EYE.low )
              .add( curr, EYE.newValue, fixed );
        }

    private void analyseShouldBeLowerCase( Resource curr, String work )
        {
        work = work.toLowerCase();
        repair.add( curr, EYE.repairType, EYE.schemeToLowercase )
            .add( curr, EYE.repairConfidence, EYE.good )
              .add( curr, EYE.newValue, work );
        }

    private void analyseHasNoLocalName( Resource curr, String work )
        {
        // Strip off trailing '/', '\', ' ', '.', ',', '!' -- common typo chars that /should/ be invalid
        String fixed = "";
        char charAt = work.charAt( work.length() - 1 );
        if (charAt == '/' || charAt == '\\' || charAt == ' ' || charAt == '.' || charAt == ',' || charAt == '!')
          for (int i = 0; i <= work.length() - 2; i += 1fixed += work.charAt( i );
        work = fixed;
        repair.add( curr, EYE.repairType, EYE.removeIllegalChars )
            .add( curr, EYE.repairConfidence, EYE.moderate )
              .add( curr, EYE.newValue, fixed );
        }

}


/*
* (c) Copyright 2006, 2007, 2008 Hewlett-Packard Development Company, LP
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
*    derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
TOP

Related Classes of com.hp.hpl.jena.eyeball.repairtools.analysis.URIAnalysis

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.