Package xbird.xquery.func.string

Source Code of xbird.xquery.func.string.Tokenize$TokenizerEmuration

/*
* @(#)$Id: Tokenize.java 3619 2008-03-26 07:23:03Z yui $
*
* Copyright 2006-2008 Makoto YUI
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Contributors:
*     Makoto YUI - initial implementation
*/
package xbird.xquery.func.string;

import java.io.ObjectStreamException;
import java.util.regex.*;

import xbird.xquery.DynamicError;
import xbird.xquery.XQueryException;
import xbird.xquery.dm.value.*;
import xbird.xquery.dm.value.literal.XString;
import xbird.xquery.dm.value.sequence.ValueSequence;
import xbird.xquery.func.BuiltInFunction;
import xbird.xquery.func.FunctionSignature;
import xbird.xquery.meta.DynamicContext;
import xbird.xquery.meta.IFocus;
import xbird.xquery.type.Type;
import xbird.xquery.type.TypeRegistry;
import xbird.xquery.type.xs.StringType;

/**
* fn:tokenize.
* <DIV lang="en">
* This function breaks the $input string into a sequence of strings,
* treating any substring that matches $pattern as a separator.
* The separators themselves are not returned.
* <ul>
* <li>fn:tokenize($input as xs:string?, $pattern as xs:string) as xs:string*</li>
* <li>fn:tokenize($input as xs:string?, $pattern as xs:string, $flags as xs:string) as xs:string*</li>
* </ul>
* </DIV>
* <DIV lang="ja"></DIV>
*
* @author Makoto YUI (yuin405+xbird@gmail.com)
* @link http://www.w3.org/TR/xquery-operators/#func-tokenize
*/
public final class Tokenize extends BuiltInFunction {
    private static final long serialVersionUID = 5015360216397174624L;
    public static final String SYMBOL = "fn:tokenize";

    public Tokenize() {
        super(SYMBOL, TypeRegistry.safeGet("xs:string*"));
    }

    protected FunctionSignature[] signatures() {
        final FunctionSignature[] s = new FunctionSignature[2];
        Type strq = TypeRegistry.safeGet("xs:string?");
        s[0] = new FunctionSignature(getName(), new Type[] { strq, StringType.STRING });
        s[1] = new FunctionSignature(getName(), new Type[] { strq, StringType.STRING,
                StringType.STRING });
        return s;
    }

    public Sequence eval(Sequence<? extends Item> contextSeq, ValueSequence argv, DynamicContext dynEnv) throws XQueryException {
        assert (argv != null);
        final int arglen = argv.size();
        assert (arglen == 2 || arglen == 3);
        Item firstItem = argv.getItem(0);
        // If $input is the empty sequence, or if $input is the zero-length string, the result is the empty sequence.
        if(firstItem.isEmpty()) {
            return ValueSequence.EMPTY_SEQUENCE;
        }
        assert (firstItem instanceof XString);
        String input = firstItem.stringValue();
        Item secondItem = argv.getItem(1);
        assert (secondItem instanceof XString);
        String pattern = secondItem.stringValue();
        int flags = Pattern.UNIX_LINES;
        if(arglen == 3) {
            Item thirdItem = argv.getItem(2);
            assert (thirdItem instanceof XString);
            String flagsStr = thirdItem.stringValue();
            flags = Matches.convertFlags(flagsStr);
        }
        final Pattern compiled;
        try {
            compiled = Pattern.compile(pattern, flags);
        } catch (PatternSyntaxException pse) {
            throw new DynamicError("err:FORX0002", "Invalid pattern: " + pattern);
        }
        if(compiled.matcher("").matches()) {
            // If the supplied $pattern matches a zero-length string, that is,
            // if fn:matches("", $pattern, $flags) returns true, then an error is raised.           
            throw new DynamicError("err:FORX0003", "Specified pattern is illegally matched to zero-length string: "
                    + pattern);
        }
        return new TokenizerEmuration(input, compiled, dynEnv);
    }

    private static final class TokenizerEmuration extends AbstractSequence<XString> {

        private final String input;
        private Pattern pattern;
        private transient Matcher matcher;

        public TokenizerEmuration() {
            super(DynamicContext.DUMMY);
            this.input = null;
            this.matcher = null;
        }

        TokenizerEmuration(String input, Pattern pattern, DynamicContext dynEnv) {
            super(dynEnv);
            assert (input != null && pattern != null);
            this.input = input;
            this.pattern = pattern;
            this.matcher = pattern.matcher(input);
        }

        public boolean next(IFocus focus) throws XQueryException {
            if(matcher.find()) {
                String matched = input.substring(matcher.start(), matcher.end());
                focus.setContextItem(XString.valueOf(matched));
            }
            return false;
        }

        public Type getType() {
            return TypeRegistry.safeGet("xs:string*");
        }

        @Override
        public IFocus<XString> iterator() {
            matcher.reset();
            return super.iterator();
        }

        private Object readResolve() throws ObjectStreamException {
            assert (pattern != null);
            assert (input != null);
            this.matcher = pattern.matcher(input);
            return this;
        }

    }

}
TOP

Related Classes of xbird.xquery.func.string.Tokenize$TokenizerEmuration

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.