/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package riotcmd;
import java.io.IOException ;
import java.io.InputStream ;
import org.openjena.atlas.AtlasException ;
import org.openjena.atlas.io.IO ;
import org.openjena.atlas.io.InputStreamBuffered ;
import org.openjena.atlas.io.InStreamUTF8 ;
public class utf8
{
/** Simple program to help hunt down bad UTF-8 encoded characters */
public static void main(String[] args)
{
long INIT_LINE = 1 ;
long INIT_COL = 1 ;
if ( args.length == 0 )
args = new String[] {"-"} ;
String label = "" ;
for ( String fn : args )
{
if ( args.length > 1 )
label = fn+": " ;
InputStream in = IO.openFile(fn) ;
in = new InputStreamBuffered(in) ;
long charCount = 0 ;
long lineNum = INIT_LINE ;
long colNum = INIT_COL ;
try {
InStreamUTF8 utf8 = new InStreamUTF8(in) ;
for (;;)
{
int ch = utf8.read() ;
if ( ch == -1 )
break ;
charCount++ ;
if (ch == '\n')
{
lineNum++;
colNum = INIT_COL ;
}
else
colNum++;
if ( ! Character.isDefined(ch) )
throw new AtlasException(String.format("No such codepoint: 0x%04X", ch)) ;
}
System.out.printf("%s: chars = %d , lines = %d\n", fn, charCount, lineNum) ;
} catch (AtlasException ex)
{
System.out.printf(label+"[line=%d, col=%d] %s\n", lineNum, colNum, ex.getMessage()) ;
}
catch (IOException ex)
{
ex.printStackTrace(System.err) ;
}
}
}
}