/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.portals.applications.webcontent.proxy.impl;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.portals.applications.webcontent.proxy.HttpReverseProxyPathMapper;
import org.apache.portals.applications.webcontent.util.CharArraySegment;
/**
* A simple reverse proxy link rewriting parser adaptor implementation.
* <P>
* You can refer to this example to implement more sophisticated rewriting parser adaptor.
* </P>
*
* @version $Id: DefaultReverseProxyLinkRewritingParserAaptor.java 832821 2009-11-04 18:00:10Z woonsan $
*/
public class DefaultReverseProxyLinkRewritingParserAaptor extends AbstractReverseProxyTextLinesParserAdaptor
{
protected static final Pattern LINK_ABS_PATH_PATTERN =
Pattern.compile("(\\s|^)(href|src|action)\\s*=\\s*(['\"])((\\/)[^'\"]*)['\"]", Pattern.CASE_INSENSITIVE);
protected static final Pattern LINK_HTTP_ABS_URL_PATTERN =
Pattern.compile("(\\s|^)(href|src|action)\\s*=\\s*['\"](https?:\\/\\/[^'\"]+)['\"]", Pattern.CASE_INSENSITIVE);
protected static final Pattern HTTP_DOMAIN_ADDRESS_ONLY_PATTERN =
Pattern.compile("^https?:\\/\\/[^\\/]+$", Pattern.CASE_INSENSITIVE);
protected boolean lookUpAllMappings;
protected String localPathMatchingReplaces;
protected String [] linkRemoteBaseURLSearches = { "/", "." };
protected String [] linkRemoteBaseURLReplaces = { "\\/", "\\." };
private Pattern defaultRemoteURLPattern;
private String defaultRemoteURLReplaces;
private Set<String> blacklist;
private Map<HttpReverseProxyPathMapper, Pattern> remoteURLMatchingPatternMap;
private Map<HttpReverseProxyPathMapper, String> localPathMatchingReplacesMap;
private Pattern [] customPatterns;
private String [] customReplaces;
public DefaultReverseProxyLinkRewritingParserAaptor()
{
this(true, null);
}
public DefaultReverseProxyLinkRewritingParserAaptor(boolean lookUpAllMappings, Set<String> blacklist)
{
setLookUpAllMappings(lookUpAllMappings);
setBlacklist(blacklist);
}
public void setLookUpAllMappings(boolean lookUpAllMappings)
{
this.lookUpAllMappings = lookUpAllMappings;
}
public boolean getLookUpAllMappings()
{
return lookUpAllMappings;
}
public void setBlacklist(Set<String> blacklist)
{
this.blacklist = blacklist;
}
public void setCustomPatterns(String [] patterns)
{
customPatterns = new Pattern[patterns.length];
for (int i = 0; i < patterns.length; i++)
{
customPatterns[i] = Pattern.compile(patterns[i], Pattern.CASE_INSENSITIVE);
}
}
public void setCustomReplaces(String [] customReplaces)
{
this.customReplaces = customReplaces;
}
@Override
protected String rewriteLine(String line) throws Exception
{
if (defaultRemoteURLPattern == null)
{
defaultRemoteURLPattern = createRemoteURLMatchingPattern(getHttpReverseProxyPathMapper());
defaultRemoteURLReplaces = createLocalPathMatchingReplaces(getHttpReverseProxyPathMapper());
}
// first, replace slash leading relative paths by slash leading reverse proxying relative paths.
Matcher linkBasePathMatcher = LINK_ABS_PATH_PATTERN.matcher(line);
line = linkBasePathMatcher.replaceAll("$1$2=$3" + defaultRemoteURLReplaces + "$4$3");
// if there's any https? absolute url link, try to find the proxy path mapper again...
if (lookUpAllMappings)
{
CharSequence segment = new CharArraySegment(line);
for (Matcher absURLMatcher = LINK_HTTP_ABS_URL_PATTERN.matcher(segment); absURLMatcher.find(); )
{
HttpReverseProxyPathMapper proxyMapper = null;
String absURL = absURLMatcher.group(3);
int maxMatchingPathPartCount = getMaxMatchingPathPartCount();
String [] pathParts = StringUtils.split(absURL, "/", maxMatchingPathPartCount + 2);
int pathPartCount = (pathParts != null ? pathParts.length : 0);
if (pathPartCount < 2)
{
continue;
}
String scheme = pathParts[0];
for (int i = Math.min(pathPartCount, maxMatchingPathPartCount + 1); i > 1; i--)
{
String remoteBaseURLKey = scheme + "//" + StringUtils.join(pathParts, "/", 1, i);
if (blacklist != null && blacklist.contains(remoteBaseURLKey))
{
continue;
}
proxyMapper = getHttpReverseProxyPathMapperProvider().findMapperByRemoteURL(remoteBaseURLKey + "/");
if (proxyMapper == null)
{
if (blacklist == null)
{
blacklist = new HashSet<String>();
}
blacklist.add(remoteBaseURLKey);
}
else
{
if (remoteURLMatchingPatternMap == null)
{
remoteURLMatchingPatternMap = new HashMap<HttpReverseProxyPathMapper, Pattern>();
localPathMatchingReplacesMap = new HashMap<HttpReverseProxyPathMapper, String>();
}
Pattern pattern = remoteURLMatchingPatternMap.get(proxyMapper);
if (pattern == null)
{
pattern = createRemoteURLMatchingPattern(proxyMapper);
remoteURLMatchingPatternMap.put(proxyMapper, pattern);
}
String localPathMatchingReplaces = localPathMatchingReplacesMap.get(proxyMapper);
if (localPathMatchingReplaces == null)
{
localPathMatchingReplaces = createLocalPathMatchingReplaces(proxyMapper);
localPathMatchingReplacesMap.put(proxyMapper, localPathMatchingReplaces);
}
Matcher matcher = pattern.matcher(line);
line = replaceRemoteLinkValues(matcher, "$1$2=$3" + localPathMatchingReplaces + "$6$3", line);
break;
}
}
segment = segment.subSequence(absURLMatcher.end(), segment.length());
absURLMatcher.reset(segment);
}
}
else
{
Matcher matcher = defaultRemoteURLPattern.matcher(line);
line = matcher.replaceAll("$1$2=$3" + defaultRemoteURLReplaces + "$6$3");
}
if (customPatterns != null)
{
for (int i = 0; i < customPatterns.length; i++)
{
Matcher matcher = customPatterns[i].matcher(line);
line = matcher.replaceAll(customReplaces[i]);
}
}
return line;
}
protected Pattern createRemoteURLMatchingPattern(HttpReverseProxyPathMapper proxyMapper)
{
String remoteBaseURLPattern = StringUtils.replaceEach(StringUtils.removeEnd(proxyMapper.getRemoteBaseURL(), "/"), linkRemoteBaseURLSearches, linkRemoteBaseURLReplaces);
return Pattern.compile("(\\s|^)(href|src|action)\\s*=\\s*(['\"])((" + remoteBaseURLPattern + ")([^'\"]*))['\"]", Pattern.CASE_INSENSITIVE);
}
protected String createLocalPathMatchingReplaces(HttpReverseProxyPathMapper proxyMapper)
{
return getRewritingContextPath() + StringUtils.removeEnd(proxyMapper.getLocalBasePath(), "/");
}
protected String replaceRemoteLinkValues(Matcher matcher, String replacement, String text)
{
matcher.reset();
boolean result = matcher.find();
if (result)
{
StringBuffer sb = new StringBuffer();
do
{
// if the url is comopsed of scheme and domain name such as "http://projects.apache.org",
// then the local path info should be appended by '/' like "/webcontent/rproxy/project_apache/".
if (HTTP_DOMAIN_ADDRESS_ONLY_PATTERN.matcher(matcher.group(4)).matches())
{
int offset = replacement.lastIndexOf("$3");
matcher.appendReplacement(sb, replacement.substring(0, offset) + "/" + replacement.substring(offset));
}
else
{
matcher.appendReplacement(sb, replacement);
}
result = matcher.find();
}
while (result);
matcher.appendTail(sb);
return sb.toString();
}
return text;
}
}