* if so, is it dominant. */
int ltrCnt = 0;
int rtlCnt = 0;
while (textIter.hasNext()) {
TextPosition position = textIter.next();
String stringValue = position.getCharacter();
for (int a = 0; a < stringValue.length(); a++) {
byte dir = Character.getDirectionality(stringValue.charAt(a));
if ((dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT)
|| (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING)
|| (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE)) {
ltrCnt++;
} else if ((dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
|| (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
|| (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING)
|| (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE)) {
rtlCnt++;
}
}
}
// choose the dominant direction
boolean isRtlDominant = rtlCnt > ltrCnt;
startArticle(!isRtlDominant);
startOfArticle = true;
// we will later use this to skip reordering
boolean hasRtl = rtlCnt > 0;
/* Now cycle through to print the text.
* We queue up a line at a time before we print so that we can convert
* the line from presentation form to logical form (if needed).
*/
List<TextPosition> line = new ArrayList<TextPosition>();
textIter = textList.iterator(); // start from the beginning again
/* PDF files don't always store spaces. We will need to guess where we should add
* spaces based on the distances between TextPositions. Historically, this was done
* based on the size of the space character provided by the font. In general, this worked
* but there were cases where it did not work. Calculating the average character width
* and using that as a metric works better in some cases but fails in some cases where the
* spacing worked. So we use both. NOTE: Adobe reader also fails on some of these examples.
*/
//Keeps track of the previous average character width
float previousAveCharWidth = -1;
while (textIter.hasNext()) {
TextPosition position = textIter.next();
PositionWrapper current = new PositionWrapper(position);
String characterValue = position.getCharacter();
//Resets the average character width when we see a change in font
// or a change in the font size
if (lastPosition != null
&& ((position.getFont() != lastPosition.getTextPosition().getFont()) || (position.getFontSize() != lastPosition
.getTextPosition().getFontSize()))) {
previousAveCharWidth = -1;
}
float positionX;
float positionY;
float positionWidth;
float positionHeight;
/* If we are sorting, then we need to use the text direction
* adjusted coordinates, because they were used in the sorting. */
if (getSortByPosition()) {
positionX = position.getXDirAdj();
positionY = position.getYDirAdj();
positionWidth = position.getWidthDirAdj();
positionHeight = position.getHeightDir();
} else {
positionX = position.getX();
positionY = position.getY();
positionWidth = position.getWidth();
positionHeight = position.getHeight();
}
//The current amount of characters in a word
int wordCharCount = position.getIndividualWidths().length;
/* Estimate the expected width of the space based on the
* space character with some margin. */
float wordSpacing = position.getWidthOfSpace();
float deltaSpace = 0;
if ((wordSpacing == 0) || (wordSpacing == Float.NaN)) {
deltaSpace = Float.MAX_VALUE;
} else {
if (lastWordSpacing < 0) {