/**
* Copyright (C) 2009 eXo Platform SAS.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.exoplatform.services.html.tidy;
import org.exoplatform.services.chars.CharsUtil;
import org.exoplatform.services.chars.SpecChar;
import org.exoplatform.services.html.Name;
import org.exoplatform.services.html.parser.HTML;
import org.exoplatform.services.html.parser.NodeImpl;
import org.exoplatform.services.token.TokenParser;
import org.exoplatform.services.token.TypeToken;
import org.exoplatform.services.token.TokenParser.Factory;
/**
*
* @author nhuthuan
* Email: nhudinhthuan@yahoo.com
*/
@SuppressWarnings("serial")
public class TidyToken extends Factory<NodeImpl>
{
private char[] script = {'s', 'c', 'r', 'i', 'p', 't'};
private char[] style = {'s', 't', 'y', 'l', 'e'};
private TokenParser parser;
public int create(char[] data, int start, int end, int type)
{
if (start >= end)
return end;
if (start > data.length)
return data.length;
char[] value = CharsUtil.cutAndTrim(data, start, Math.min(end, data.length));
// NodeConfig config = null;
Name name;
if (value.length < 1)
return end;
if (type != TypeToken.TAG)
{
if (type == TypeToken.COMMENT)
name = Name.COMMENT;
else
name = Name.CONTENT;
push(new NodeImpl(value, name));
return end;
}
if (value[0] == SpecChar.END_TAG)
{
if (value.length <= 1)
return end;
value = CharsUtil.cutAndTrim(value, 1, value.length);
name = HTML.getName(new String(value).toUpperCase());
if (name != null)
{
push(new NodeImpl(value, name, TypeToken.CLOSE));
}
else
{
char[] newValue = new char[value.length + 1];
newValue[0] = SpecChar.END_TAG;
System.arraycopy(value, 0, newValue, 1, value.length);
push(new NodeImpl(newValue, Name.UNKNOWN));
return end;
}
return end;
}
String nameValue = new String(CharsUtil.cutBySpace(value, 0)).toUpperCase();
if (nameValue.charAt(nameValue.length() - 1) == SpecChar.END_TAG)
{
nameValue = nameValue.substring(0, nameValue.length() - 1).trim();
}
if (nameValue.equals("!DOCTYPE"))
return end;
name = HTML.getName(nameValue);
if (name != null)
{
push(new NodeImpl(value, name, TypeToken.TAG));
}
else
{
push(new NodeImpl(value, Name.UNKNOWN));
return end;
}
if (name == Name.SCRIPT)
{
return findEndScript(data, script, end);
}
else if (name == Name.STYLE)
{
return findEndScript(data, style, end);
}
return end;
}
private int findEndScript(char[] value, char[] c, int start)
{
int[] idx = indexEndNode(value, c, start);
if (idx.length < 1)
return start;
create(value, start + 1, idx[0], TypeToken.CONTENT);
return create(value, idx[1], idx[2], TypeToken.TAG);
}
private int[] indexEndNode(char[] value, char[] c, int start)
{
boolean is = false;
int[] idx = new int[3];
for (int i = start; i < value.length; i++)
{
if (value[i] != SpecChar.OPEN_TAG)
continue;
is = true;
idx[0] = i;
int k = i + 1;
if (value[k] == SpecChar.PUNCTUATION_MASK && parser.isComment(value, k))
{
int startComment = k;
int endComment = parser.findEndComment(value, k);
startComment = create(value, startComment - 1, endComment, TypeToken.COMMENT);
if (startComment < value.length && value[startComment] == SpecChar.OPEN_TAG)
{
i = startComment + 1;
continue;
}
break;
}
while (k < value.length)
{
if (value[k] == SpecChar.END_TAG)
idx[1] = k;
if (value[k] != SpecChar.END_TAG && !Character.isWhitespace(value[k]))
break;
k++;
}
for (int j = 0; j < c.length; j++)
{
if (c[j] == Character.toLowerCase(value[k + j]))
continue;
if (k + j == value.length - 1)
{
is = false;
break;
}
is = false;
break;
}
if (!is)
continue;
k += c.length;
while (k < value.length)
{
if (value[k] != SpecChar.END_TAG && !Character.isWhitespace(value[k]))
break;
k++;
}
if (k >= value.length)
return new int[0];
idx[2] = k;
if (value[k] == SpecChar.CLOSE_TAG)
return idx;
}
return new int[0];
}
public TokenParser getParser()
{
return parser;
}
public void setParser(TokenParser parser)
{
this.parser = parser;
}
}