package org.apache.roller.util;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.security.MessageDigest;
import java.util.NoSuchElementException;
import java.util.StringTokenizer;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import sun.misc.BASE64Decoder;
import sun.misc.BASE64Encoder;
/**
* General purpose utilities, not for use in templates.
*/
public class Utilities {
/** The <code>Log</code> instance for this class. */
private static Log mLogger = LogFactory.getLog(Utilities.class);
//------------------------------------------------------------------------
/** Strip jsessionid off of a URL */
public static String stripJsessionId( String url ) {
// Strip off jsessionid found in referer URL
int startPos = url.indexOf(";jsessionid=");
if ( startPos != -1 ) {
int endPos = url.indexOf("?",startPos);
if ( endPos == -1 ) {
url = url.substring(0,startPos);
} else {
url = url.substring(0,startPos)
+ url.substring(endPos,url.length());
}
}
return url;
}
//------------------------------------------------------------------------
/**
* Escape, but do not replace HTML.
* The default behaviour is to escape ampersands.
*/
public static String escapeHTML(String s) {
return escapeHTML(s, true);
}
//------------------------------------------------------------------------
/**
* Escape, but do not replace HTML.
* @param escapeAmpersand Optionally escape
* ampersands (&).
*/
public static String escapeHTML(String s, boolean escapeAmpersand) {
// got to do amp's first so we don't double escape
if (escapeAmpersand) {
s = StringUtils.replace(s, "&", "&");
}
s = StringUtils.replace(s, " ", " ");
s = StringUtils.replace(s, "\"", """);
s = StringUtils.replace(s, "<", "<");
s = StringUtils.replace(s, ">", ">");
return s;
}
public static String unescapeHTML(String str) {
return StringEscapeUtils.unescapeHtml(str);
}
//------------------------------------------------------------------------
/**
* Remove occurences of html, defined as any text
* between the characters "<" and ">". Replace
* any HTML tags with a space.
*/
public static String removeHTML(String str) {
return removeHTML(str, true);
}
/**
* Remove occurences of html, defined as any text
* between the characters "<" and ">".
* Optionally replace HTML tags with a space.
*
* @param str
* @param addSpace
* @return
*/
public static String removeHTML(String str, boolean addSpace) {
if (str == null) return "";
StringBuffer ret = new StringBuffer(str.length());
int start = 0;
int beginTag = str.indexOf("<");
int endTag = 0;
if (beginTag == -1)
return str;
while (beginTag >= start) {
if (beginTag > 0) {
ret.append(str.substring(start, beginTag));
// replace each tag with a space (looks better)
if (addSpace) ret.append(" ");
}
endTag = str.indexOf(">", beginTag);
// if endTag found move "cursor" forward
if (endTag > -1) {
start = endTag + 1;
beginTag = str.indexOf("<", start);
}
// if no endTag found, get rest of str and break
else {
ret.append(str.substring(beginTag));
break;
}
}
// append everything after the last endTag
if (endTag > -1 && endTag + 1 < str.length()) {
ret.append(str.substring(endTag + 1));
}
return ret.toString().trim();
}
//------------------------------------------------------------------------
/** Run both removeHTML and escapeHTML on a string.
* @param s String to be run through removeHTML and escapeHTML.
* @return String with HTML removed and HTML special characters escaped.
*/
public static String removeAndEscapeHTML( String s ) {
if ( s==null ) return "";
else return Utilities.escapeHTML( Utilities.removeHTML(s) );
}
//------------------------------------------------------------------------
/**
* Autoformat.
*/
public static String autoformat(String s) {
String ret = StringUtils.replace(s, "\n", "<br />");
return ret;
}
//------------------------------------------------------------------------
/**
* Replaces occurences of non-alphanumeric characters with an underscore.
*/
public static String replaceNonAlphanumeric(String str) {
return replaceNonAlphanumeric(str, '_');
}
//------------------------------------------------------------------------
/**
* Replaces occurences of non-alphanumeric characters with a
* supplied char.
*/
public static String replaceNonAlphanumeric(String str, char subst) {
StringBuffer ret = new StringBuffer(str.length());
char[] testChars = str.toCharArray();
for (int i = 0; i < testChars.length; i++) {
if (Character.isLetterOrDigit(testChars[i])) {
ret.append(testChars[i]);
} else {
ret.append( subst );
}
}
return ret.toString();
}
//------------------------------------------------------------------------
/**
* Remove occurences of non-alphanumeric characters.
*/
public static String removeNonAlphanumeric(String str) {
StringBuffer ret = new StringBuffer(str.length());
char[] testChars = str.toCharArray();
for (int i = 0; i < testChars.length; i++) {
// MR: Allow periods in page links
if (Character.isLetterOrDigit(testChars[i]) ||
testChars[i] == '.') {
ret.append(testChars[i]);
}
}
return ret.toString();
}
//------------------------------------------------------------------------
/**
* @param stringArray
* @param delim
* @return
*/
public static String stringArrayToString(String[] stringArray, String delim) {
String ret = "";
for (int i = 0; i < stringArray.length; i++) {
if (ret.length() > 0)
ret = ret + delim + stringArray[i];
else
ret = stringArray[i];
}
return ret;
}
//--------------------------------------------------------------------------
/** Convert string to string array. */
public static String[] stringToStringArray(String instr, String delim)
throws NoSuchElementException, NumberFormatException {
StringTokenizer toker = new StringTokenizer(instr, delim);
String stringArray[] = new String[toker.countTokens()];
int i = 0;
while (toker.hasMoreTokens()) {
stringArray[i++] = toker.nextToken();
}
return stringArray;
}
//--------------------------------------------------------------------------
/** Convert string to integer array. */
public static int[] stringToIntArray(String instr, String delim)
throws NoSuchElementException, NumberFormatException {
StringTokenizer toker = new StringTokenizer(instr, delim);
int intArray[] = new int[toker.countTokens()];
int i = 0;
while (toker.hasMoreTokens()) {
String sInt = toker.nextToken();
int nInt = Integer.parseInt(sInt);
intArray[i++] = new Integer(nInt).intValue();
}
return intArray;
}
//-------------------------------------------------------------------
/** Convert integer array to a string. */
public static String intArrayToString(int[] intArray) {
String ret = "";
for (int i = 0; i < intArray.length; i++) {
if (ret.length() > 0)
ret = ret + "," + Integer.toString(intArray[i]);
else
ret = Integer.toString(intArray[i]);
}
return ret;
}
//------------------------------------------------------------------------
public static void copyFile(File from, File to) throws IOException {
InputStream in = null;
OutputStream out = null;
try {
in = new FileInputStream(from);
} catch (IOException ex) {
throw new IOException(
"Utilities.copyFile: opening input stream '"
+ from.getPath()
+ "', "
+ ex.getMessage());
}
try {
out = new FileOutputStream(to);
} catch (Exception ex) {
try {
in.close();
} catch (IOException ex1) {
}
throw new IOException(
"Utilities.copyFile: opening output stream '"
+ to.getPath()
+ "', "
+ ex.getMessage());
}
copyInputToOutput(in, out, from.length());
}
//------------------------------------------------------------------------
/**
* Utility method to copy an input stream to an output stream.
* Wraps both streams in buffers. Ensures right numbers of bytes copied.
*/
public static void copyInputToOutput(
InputStream input,
OutputStream output,
long byteCount)
throws IOException {
int bytes;
long length;
BufferedInputStream in = new BufferedInputStream(input);
BufferedOutputStream out = new BufferedOutputStream(output);
byte[] buffer;
buffer = new byte[8192];
for (length = byteCount; length > 0;) {
bytes = (int) (length > 8192 ? 8192 : length);
try {
bytes = in.read(buffer, 0, bytes);
} catch (IOException ex) {
try {
in.close();
out.close();
} catch (IOException ex1) {
}
throw new IOException(
"Reading input stream, " + ex.getMessage());
}
if (bytes < 0)
break;
length -= bytes;
try {
out.write(buffer, 0, bytes);
} catch (IOException ex) {
try {
in.close();
out.close();
} catch (IOException ex1) {
}
throw new IOException(
"Writing output stream, " + ex.getMessage());
}
}
try {
in.close();
out.close();
} catch (IOException ex) {
throw new IOException("Closing file streams, " + ex.getMessage());
}
}
//------------------------------------------------------------------------
public static void copyInputToOutput(
InputStream input,
OutputStream output)
throws IOException {
BufferedInputStream in = new BufferedInputStream(input);
BufferedOutputStream out = new BufferedOutputStream(output);
byte buffer[] = new byte[8192];
for (int count = 0; count != -1;) {
count = in.read(buffer, 0, 8192);
if (count != -1)
out.write(buffer, 0, count);
}
try {
in.close();
out.close();
} catch (IOException ex) {
throw new IOException("Closing file streams, " + ex.getMessage());
}
}
/**
* Encode a string using algorithm specified in web.xml and return the
* resulting encrypted password. If exception, the plain credentials
* string is returned
*
* @param password Password or other credentials to use in authenticating
* this username
* @param algorithm Algorithm used to do the digest
*
* @return encypted password based on the algorithm.
*/
public static String encodePassword(String password, String algorithm) {
byte[] unencodedPassword = password.getBytes();
MessageDigest md = null;
try {
// first create an instance, given the provider
md = MessageDigest.getInstance(algorithm);
} catch (Exception e) {
mLogger.error("Exception: " + e);
return password;
}
md.reset();
// call the update method one or more times
// (useful when you don't know the size of your data, eg. stream)
md.update(unencodedPassword);
// now calculate the hash
byte[] encodedPassword = md.digest();
StringBuffer buf = new StringBuffer();
for (int i = 0; i < encodedPassword.length; i++) {
if ((encodedPassword[i] & 0xff) < 0x10) {
buf.append("0");
}
buf.append(Long.toString(encodedPassword[i] & 0xff, 16));
}
return buf.toString();
}
/**
* Encode a string using Base64 encoding. Used when storing passwords
* as cookies.
*
* This is weak encoding in that anyone can use the decodeString
* routine to reverse the encoding.
*
* @param str
* @return String
* @throws IOException
*/
public static String encodeString(String str) throws IOException {
BASE64Encoder encoder = new BASE64Encoder();
String encodedStr = encoder.encodeBuffer(str.getBytes());
return (encodedStr.trim());
}
/**
* Decode a string using Base64 encoding.
*
* @param str
* @return String
* @throws IOException
*/
public static String decodeString(String str) throws IOException {
BASE64Decoder dec = new BASE64Decoder();
String value = new String(dec.decodeBuffer(str));
return (value);
}
/**
* Strips HTML and truncates.
*/
public static String truncate(
String str, int lower, int upper, String appendToEnd) {
// strip markup from the string
String str2 = removeHTML(str, false);
// quickly adjust the upper if it is set lower than 'lower'
if (upper < lower) {
upper = lower;
}
// now determine if the string fits within the upper limit
// if it does, go straight to return, do not pass 'go' and collect $200
if(str2.length() > upper) {
// the magic location int
int loc;
// first we determine where the next space appears after lower
loc = str2.lastIndexOf(' ', upper);
// now we'll see if the location is greater than the lower limit
if(loc >= lower) {
// yes it was, so we'll cut it off here
str2 = str2.substring(0, loc);
} else {
// no it wasnt, so we'll cut it off at the upper limit
str2 = str2.substring(0, upper);
loc = upper;
}
// the string was truncated, so we append the appendToEnd String
str2 = str2 + appendToEnd;
}
return str2;
}
/**
* This method based on code from the String taglib at Apache Jakarta:
* http://cvs.apache.org/viewcvs/jakarta-taglibs/string/src/org/apache/taglibs/string/util/StringW.java?rev=1.16&content-type=text/vnd.viewcvs-markup
* Copyright (c) 1999 The Apache Software Foundation.
* Author: timster@mac.com
*
* @param str
* @param lower
* @param upper
* @param appendToEnd
* @return
*/
public static String truncateNicely(String str, int lower, int upper, String appendToEnd) {
// strip markup from the string
String str2 = removeHTML(str, false);
boolean diff = (str2.length() < str.length());
// quickly adjust the upper if it is set lower than 'lower'
if(upper < lower) {
upper = lower;
}
// now determine if the string fits within the upper limit
// if it does, go straight to return, do not pass 'go' and collect $200
if(str2.length() > upper) {
// the magic location int
int loc;
// first we determine where the next space appears after lower
loc = str2.lastIndexOf(' ', upper);
// now we'll see if the location is greater than the lower limit
if(loc >= lower) {
// yes it was, so we'll cut it off here
str2 = str2.substring(0, loc);
} else {
// no it wasnt, so we'll cut it off at the upper limit
str2 = str2.substring(0, upper);
loc = upper;
}
// HTML was removed from original str
if (diff) {
// location of last space in truncated string
loc = str2.lastIndexOf(' ', loc);
// get last "word" in truncated string (add 1 to loc to eliminate space
String str3 = str2.substring(loc+1);
// find this fragment in original str, from 'loc' position
loc = str.indexOf(str3, loc) + str3.length();
// get truncated string from original str, given new 'loc'
str2 = str.substring(0, loc);
// get all the HTML from original str after loc
str3 = extractHTML(str.substring(loc));
// remove any tags which generate visible HTML
// This call is unecessary, all HTML has already been stripped
//str3 = removeVisibleHTMLTags(str3);
// append the appendToEnd String and
// add extracted HTML back onto truncated string
str = str2 + appendToEnd + str3;
} else {
// the string was truncated, so we append the appendToEnd String
str = str2 + appendToEnd;
}
}
return str;
}
public static String truncateText(String str, int lower, int upper, String appendToEnd) {
// strip markup from the string
String str2 = removeHTML(str, false);
boolean diff = (str2.length() < str.length());
// quickly adjust the upper if it is set lower than 'lower'
if(upper < lower) {
upper = lower;
}
// now determine if the string fits within the upper limit
// if it does, go straight to return, do not pass 'go' and collect $200
if(str2.length() > upper) {
// the magic location int
int loc;
// first we determine where the next space appears after lower
loc = str2.lastIndexOf(' ', upper);
// now we'll see if the location is greater than the lower limit
if(loc >= lower) {
// yes it was, so we'll cut it off here
str2 = str2.substring(0, loc);
} else {
// no it wasnt, so we'll cut it off at the upper limit
str2 = str2.substring(0, upper);
loc = upper;
}
// the string was truncated, so we append the appendToEnd String
str = str2 + appendToEnd;
}
return str;
}
/**
* @param str
* @return
*/
private static String stripLineBreaks(String str) {
// TODO: use a string buffer, ignore case !
str = str.replaceAll("<br>", "");
str = str.replaceAll("<br/>", "");
str = str.replaceAll("<br />", "");
str = str.replaceAll("<p></p>", "");
str = str.replaceAll("<p/>","");
str = str.replaceAll("<p />","");
return str;
}
/**
* Need need to get rid of any user-visible HTML tags once all text has been
* removed such as <BR>. This sounds like a better approach than removing
* all HTML tags and taking the chance to leave some tags un-closed.
*
* WARNING: this method has serious performance problems a
*
* @author Alexis Moussine-Pouchkine <alexis.moussine-pouchkine@france.sun.com>
* @author Lance Lavandowska
* @param str the String object to modify
* @return the new String object without the HTML "visible" tags
*/
private static String removeVisibleHTMLTags(String str) {
str = stripLineBreaks(str);
StringBuffer result = new StringBuffer(str);
StringBuffer lcresult = new StringBuffer(str.toLowerCase());
// <img should take care of smileys
String[] visibleTags = {"<img"}; // are there others to add?
int stringIndex;
for ( int j = 0 ; j < visibleTags.length ; j++ ) {
while ( (stringIndex = lcresult.indexOf(visibleTags[j])) != -1 ) {
if ( visibleTags[j].endsWith(">") ) {
result.delete(stringIndex, stringIndex+visibleTags[j].length() );
lcresult.delete(stringIndex, stringIndex+visibleTags[j].length() );
} else {
// need to delete everything up until next closing '>', for <img for instance
int endIndex = result.indexOf(">", stringIndex);
if (endIndex > -1) {
// only delete it if we find the end! If we don't the HTML may be messed up, but we
// can't safely delete anything.
result.delete(stringIndex, endIndex + 1 );
lcresult.delete(stringIndex, endIndex + 1 );
}
}
}
}
// TODO: This code is buggy by nature. It doesn't deal with nesting of tags properly.
// remove certain elements with open & close tags
String[] openCloseTags = {"li", "a", "div", "h1", "h2", "h3", "h4"}; // more ?
for (int j = 0; j < openCloseTags.length; j++) {
// could this be better done with a regular expression?
String closeTag = "</"+openCloseTags[j]+">";
int lastStringIndex = 0;
while ( (stringIndex = lcresult.indexOf( "<"+openCloseTags[j], lastStringIndex)) > -1) {
lastStringIndex = stringIndex;
// Try to find the matching closing tag (ignores possible nesting!)
int endIndex = lcresult.indexOf(closeTag, stringIndex);
if (endIndex > -1) {
// If we found it delete it.
result.delete(stringIndex, endIndex+closeTag.length());
lcresult.delete(stringIndex, endIndex+closeTag.length());
} else {
// Try to see if it is a self-closed empty content tag, i.e. closed with />.
endIndex = lcresult.indexOf(">", stringIndex);
int nextStart = lcresult.indexOf("<", stringIndex+1);
if (endIndex > stringIndex && lcresult.charAt(endIndex-1) == '/' && (endIndex < nextStart || nextStart == -1)) {
// Looks like it, so remove it.
result.delete(stringIndex, endIndex + 1);
lcresult.delete(stringIndex, endIndex + 1);
}
}
}
}
return result.toString();
}
/**
* Extract (keep) JUST the HTML from the String.
* @param str
* @return
*/
public static String extractHTML(String str) {
if (str == null) return "";
StringBuffer ret = new StringBuffer(str.length());
int start = 0;
int beginTag = str.indexOf("<");
int endTag = 0;
if (beginTag == -1)
return str;
while (beginTag >= start) {
endTag = str.indexOf(">", beginTag);
// if endTag found, keep tag
if (endTag > -1) {
ret.append( str.substring(beginTag, endTag+1) );
// move start forward and find another tag
start = endTag + 1;
beginTag = str.indexOf("<", start);
}
// if no endTag found, break
else {
break;
}
}
return ret.toString();
}
public static String hexEncode(String str) {
if (StringUtils.isEmpty(str)) return str;
return RegexUtil.encode(str);
}
public static String encodeEmail(String str) {
return str!=null ? RegexUtil.encodeEmail(str) : null;
}
/**
* URL encoding.
* @param s a string to be URL-encoded
* @return URL encoding of s using character encoding UTF-8; null if s is null.
*/
public static final String encode(String s) {
try {
if (s != null)
return URLEncoder.encode(s, "UTF-8");
else
return s;
} catch (UnsupportedEncodingException e) {
// Java Spec requires UTF-8 be in all Java environments, so this should not happen
return s;
}
}
/**
* URL decoding.
* @param s a URL-encoded string to be URL-decoded
* @return URL decoded value of s using character encoding UTF-8; null if s is null.
*/
public static final String decode(String s) {
try {
if (s != null)
return URLDecoder.decode(s, "UTF-8");
else
return s;
} catch (UnsupportedEncodingException e) {
// Java Spec requires UTF-8 be in all Java environments, so this should not happen
return s;
}
}
/**
* @param string
* @return
*/
public static int stringToInt(String string) {
try {
return Integer.valueOf(string).intValue();
} catch (NumberFormatException e) {
mLogger.debug("Invalid Integer:" + string);
}
return 0;
}
/**
* Convert a byte array into a Base64 string (as used in mime formats)
*/
public static String toBase64(byte[] aValue) {
final String m_strBase64Chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
int byte1;
int byte2;
int byte3;
int iByteLen = aValue.length;
StringBuffer tt = new StringBuffer();
for (int i = 0; i < iByteLen; i += 3) {
boolean bByte2 = (i + 1) < iByteLen;
boolean bByte3 = (i + 2) < iByteLen;
byte1 = aValue[i] & 0xFF;
byte2 = (bByte2) ? (aValue[i + 1] & 0xFF) : 0;
byte3 = (bByte3) ? (aValue[i + 2] & 0xFF) : 0;
tt.append(m_strBase64Chars.charAt(byte1 / 4));
tt.append(m_strBase64Chars.charAt((byte2 / 16) + ((byte1 & 0x3) * 16)));
tt.append(((bByte2) ? m_strBase64Chars.charAt((byte3 / 64) + ((byte2 & 0xF) * 4)) : '='));
tt.append(((bByte3) ? m_strBase64Chars.charAt(byte3 & 0x3F) : '='));
}
return tt.toString();
}
}