/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. The ASF licenses this file to You
* under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. For additional information regarding
* copyright in this work, please see the NOTICE file in the top level
* directory of this distribution.
*/
package org.apache.roller.weblogger.business.jpa;
import java.sql.Timestamp;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Collections;
import java.util.Comparator;
import javax.persistence.Query;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.lang.StringUtils;
import org.apache.roller.weblogger.WebloggerException;
import org.apache.roller.weblogger.business.Weblogger;
import org.apache.roller.weblogger.business.referrers.RefererManager;
import org.apache.roller.weblogger.config.WebloggerRuntimeConfig;
import org.apache.roller.weblogger.pojos.WeblogReferrer;
import org.apache.roller.weblogger.pojos.StatCount;
import org.apache.roller.weblogger.pojos.WeblogEntry;
import org.apache.roller.weblogger.pojos.Weblog;
import org.apache.roller.weblogger.pojos.StatCountCountComparator;
import org.apache.roller.weblogger.util.LinkbackExtractor;
import org.apache.roller.weblogger.util.Utilities;
/*
* JPARefererManagerImpl.java
*/
@com.google.inject.Singleton
public class JPARefererManagerImpl implements RefererManager {
private static Log log = LogFactory.getLog(
JPARefererManagerImpl.class);
protected static final String DAYHITS = "dayHits";
protected static final String TOTALHITS = "totalHits";
private static final Comparator statCountCountReverseComparator =
Collections.reverseOrder(StatCountCountComparator.getInstance());
/** The strategy for this manager. */
private final Weblogger roller;
private final JPAPersistenceStrategy strategy;
/**
* Creates a new instance of JPARefererManagerImpl
*/
@com.google.inject.Inject
protected JPARefererManagerImpl(Weblogger roller, JPAPersistenceStrategy strategy) {
log.debug("Instantiating JPA Referer Manager");
this.roller = roller;
this.strategy = strategy;
}
public void saveReferer(WeblogReferrer referer) throws WebloggerException {
strategy.store(referer);
}
public void removeReferer(WeblogReferrer referer) throws WebloggerException {
strategy.remove(referer);
}
/**
* Clear referrer dayhits and remove referrers without excerpts.
*/
public void clearReferrers() throws WebloggerException {
clearDayHits();
Query q = strategy.getNamedUpdate("WeblogReferrer.removeByNullOrEmptyExcerpt");
q.executeUpdate();
}
/**
* Clear referrer dayhits and remove referrers without excerpts.
*/
public void clearReferrers(Weblog website) throws WebloggerException {
clearDayHitsByWebsite(website);
Query q = strategy.getNamedUpdate("WeblogReferrer.removeByNullOrEmptyExcerpt&Website");
q.setParameter(1, website);
q.executeUpdate();
}
/**
* Apply ignoreWord/spam filters to all referers in system.
*/
public void applyRefererFilters() throws WebloggerException {
String spamwords = WebloggerRuntimeConfig.getProperty("spam.blacklist");
String[] blacklist = StringUtils.split(
StringUtils.deleteWhitespace(spamwords),",");
if (blacklist.length == 0) return;
List referers = getBlackListedReferer(blacklist);
for (Iterator iterator = referers.iterator(); iterator.hasNext();) {
WeblogReferrer referer= (WeblogReferrer) iterator.next();
this.strategy.remove(referer);
}
}
/**
* Apply ignoreWord/spam filters to all referers in website.
*/
public void applyRefererFilters(Weblog website)
throws WebloggerException {
if (null == website) throw new WebloggerException("website is null");
if (null == website.getBlacklist()) return;
String[] blacklist = StringUtils.split(
StringUtils.deleteWhitespace(website.getBlacklist()),",");
if (blacklist.length == 0) return;
List referers = getBlackListedReferer(website, blacklist);
for (Iterator iterator = referers.iterator(); iterator.hasNext();) {
WeblogReferrer referer= (WeblogReferrer) iterator.next();
this.strategy.remove(referer);
}
}
protected List getExistingReferers(Weblog website, String dateString,
String permalink) throws WebloggerException {
Query q = strategy.getNamedQuery(
"WeblogReferrer.getByWebsite&DateString&RefererPermalink");
q.setParameter(1, website);
q.setParameter(2, dateString);
q.setParameter(3, permalink);
return q.getResultList();
}
protected List getMatchingReferers(Weblog website, String requestUrl,
String refererUrl) throws WebloggerException {
Query q = strategy.getNamedQuery(
"WeblogReferrer.getByWebsite&RequestUrl&RefererUrl");
q.setParameter(1, website);
q.setParameter(2, requestUrl);
q.setParameter(3, refererUrl);
return q.getResultList();
}
/**
* Returns hot weblogs as StatCount objects, in descending order by today's
* hits.
* @param sinceDays Restrict to last X days (or -1 for all)
* @param offset Offset into results (for paging)
* @param length Maximum number of results to return (for paging)
* @return List of StatCount objects.
*/
public List getHotWeblogs(int sinceDays, int offset, int length)
throws WebloggerException {
String msg = "Getting hot weblogs";
List results = new ArrayList();
Calendar cal = Calendar.getInstance();
cal.setTime(new Date());
cal.add(Calendar.DATE, -1 * sinceDays);
Date startDate = cal.getTime();
if (length == -1) {
length = Integer.MAX_VALUE - offset;
}
Query q = strategy.getNamedQuery(
"WeblogReferrer.getHotWeblogsByWebsite.enabled&Website.active&Website.lastModifiedGreater");
if (offset != 0 || length != -1) {
q.setFirstResult(offset);
q.setMaxResults(length);
}
Timestamp start = new Timestamp(startDate.getTime());
q.setParameter(1, Boolean.TRUE);
q.setParameter(2, Boolean.TRUE);
q.setParameter(3, start);
List queryResults = (List)q.getResultList();
for (Iterator it = queryResults.iterator(); it.hasNext(); ) {
Object[] row = (Object[])it.next();
long hits = ((Number)row[0]).longValue();
String websiteId = (String)row[1];
String websiteName = (String)row[2];
String websiteHandle = (String)row[3];
results.add(new StatCount(
websiteId,
websiteHandle,
websiteName,
"statCount.weblogDayHits",
hits));
}
// Original query ordered by desc hits.
// JPA QL doesn't allow queries to be ordered by agregates; do it in memory
Collections.sort(results, statCountCountReverseComparator);
return results;
}
protected int getHits(Weblog website, String type)
throws WebloggerException {
int hits = -1;
if (log.isDebugEnabled()) {
log.debug("getHits: " + website.getName());
}
//TODO: JPAPort. This query retrieves both SUM(r.dayHits), SUM(r.totalHits)
//The method only comsumes one of them. We can optimize the logic to retrieve only the
//requied SUM
Query query = strategy.getNamedQuery(
"WeblogReferrer.getHitsByWebsite.enabled&Website.id");
query.setParameter(1, Boolean.TRUE);
query.setParameter(2, website.getId());
List results = query.getResultList();
Object[] resultsArray = (Object[]) results.get(0);
if (resultsArray.length > 0 && type.equals(DAYHITS)) {
if ( resultsArray[0] != null ) {
hits = ((Long) resultsArray[0]).intValue();
}
} else if ( resultsArray.length > 0 ) {
if ( resultsArray[0] != null ) {
hits = ((Long) resultsArray[1]).intValue();
}
} else {
hits = 0;
}
return hits;
}
/**
* Get all referers for specified weblog.
* @param weblog
* @return List of type WeblogReferrer
*/
public List getReferers(Weblog weblog) throws WebloggerException {
Query q = strategy.getNamedQuery(
"WeblogReferrer.getByWebsiteOrderByTotalHitsDesc");
q.setParameter(1, weblog);
return q.getResultList();
}
/**
* Get all referers for specified user that were made today.
* @param website Web site.
* @return List of type WeblogReferrer
*/
public List getTodaysReferers(Weblog website) throws WebloggerException {
Query q = strategy.getNamedQuery(
"WeblogReferrer.getByWebsite&DayHitsGreaterZeroOrderByDayHitsDesc");
q.setParameter(1, website);
return q.getResultList();
}
/**
* Get referers for a specified date.
* @param website Web site.
* @param date YYYYMMDD format of day's date.
* @return List of type WeblogReferrer.
* @throws org.apache.roller.weblogger.WebloggerException
*/
public List getReferersToDate(Weblog website, String date)
throws WebloggerException {
if (website==null )
throw new WebloggerException("website is null");
if (date==null )
throw new WebloggerException("Date is null");
Query q = strategy.getNamedQuery(
"WeblogReferrer.getByWebsite&DateString&DuplicateOrderByTotalHitsDesc");
q.setParameter(1, website);
q.setParameter(2, date);
q.setParameter(3, Boolean.FALSE);
return q.getResultList();
}
/**
* Get referers that refer to a specific weblog entry.
* @param entryid Weblog entry ID
* @return List of WeblogReferrer objects.
* @throws org.apache.roller.weblogger.WebloggerException
*/
public List getReferersToEntry(String entryid) throws WebloggerException {
if (null == entryid)
throw new WebloggerException("entryid is null");
//TODO: DataMapperPort: Change calling code to pass WeblogEntry instead of id
// we should change calling code to pass instance of WeblogEntry instead
// of extracting and passing id. Once that is done, change the code below to
// skip the load (Please note that the load below will always find the enty in cache)
Query q = strategy.getNamedQuery(
"WeblogReferrer.getByWeblogEntry&TitleNotNull&ExcerptNotNullOrderByTotalHitsDesc");
q.setParameter(1, strategy.load(WeblogEntry.class, entryid));
return q.getResultList();
}
/**
* Query for collection of referers.
*/
protected List getReferersToWebsite(Weblog website, String refererUrl)
throws WebloggerException {
Query q = strategy.getNamedQuery(
"WeblogReferrer.getByWebsite&RefererUrl");
q.setParameter(1, website);
q.setParameter(2, refererUrl);
return q.getResultList();
}
/**
* Query for collection of referers.
*/
protected List getReferersWithSameTitle(Weblog website,
String requestUrl,
String title,
String excerpt)
throws WebloggerException {
Query q = strategy.getNamedQuery(
"WeblogReferrer.getByWebsite&RequestURL&TitleOrExcerpt");
q.setParameter(1, website);
q.setParameter(2, requestUrl);
q.setParameter(3, title);
q.setParameter(4, excerpt);
return q.getResultList();
}
/**
* Get user's day hits
*/
public int getDayHits(Weblog website) throws WebloggerException {
return getHits(website, DAYHITS);
}
/**
* Get user's all-time total hits
*/
public int getTotalHits(Weblog website) throws WebloggerException {
return getHits(website, TOTALHITS);
}
/**
* Retrieve referer by id.
*/
public WeblogReferrer getReferer(String id) throws WebloggerException {
return (WeblogReferrer)strategy.load(WeblogReferrer.class, id);
}
/**
* Process an incoming referer.
*/
public void processReferrer(String requestUrl, String referrerUrl,
String weblogHandle, String entryAnchor, String dateString) {
log.debug("processing referrer ["+referrerUrl+
"] accessing ["+requestUrl+"]");
if (weblogHandle == null)
return;
String selfSiteFragment = "/"+weblogHandle;
Weblog weblog = null;
WeblogEntry entry = null;
// lookup the weblog now
try {
weblog = roller.getWeblogManager().getWeblogByHandle(weblogHandle);
if (weblog == null) return;
// now lookup weblog entry if possible
if (entryAnchor != null) {
// sanitize the anchor to avoid "Illegal mix of collations"
entryAnchor = Utilities.replaceNonAlphanumeric(entryAnchor, ' ').trim();
entry = roller.getWeblogEntryManager().getWeblogEntryByAnchor(weblog, entryAnchor);
}
} catch (WebloggerException re) {
// problem looking up website, gotta bail
log.error("Error looking up website object", re);
return;
}
try {
List matchRef = null;
// try to find existing WeblogReferrer for referrerUrl
if (referrerUrl == null || referrerUrl.trim().length() < 8) {
referrerUrl = "direct";
// Get referer specified by referer URL of direct
matchRef = getReferersToWebsite(weblog, referrerUrl);
} else {
referrerUrl = Utilities.stripJsessionId(referrerUrl);
// Query for referer with same referer and request URLs
matchRef = getMatchingReferers(weblog, requestUrl, referrerUrl);
// If referer was not found, try adding or leaving off 'www'
if ( matchRef.size() == 0 ) {
String secondTryUrl = null;
if ( referrerUrl.startsWith("http://www") ) {
secondTryUrl = "http://"+referrerUrl.substring(11);
} else {
secondTryUrl = "http://www"+referrerUrl.substring(7);
}
matchRef = getMatchingReferers(weblog, requestUrl,
secondTryUrl);
if ( matchRef.size() == 1 ) {
referrerUrl = secondTryUrl;
}
}
}
if (matchRef.size() == 1) {
// Referer was found in database, so bump up hit count
WeblogReferrer ref = (WeblogReferrer)matchRef.get(0);
ref.setDayHits(new Integer(ref.getDayHits().intValue() + 1));
ref.setTotalHits(new Integer(ref.getTotalHits().intValue() + 1));
log.debug("Incrementing hit count on existing referer: " +
referrerUrl);
saveReferer(ref);
} else if (matchRef.size() == 0) {
// Referer was not found in database, so new Referer object
Integer one = new Integer(1);
WeblogReferrer ref =
new WeblogReferrer(
null,
weblog,
entry,
dateString,
referrerUrl,
null,
requestUrl,
null,
"", // Read comment above regarding Derby bug
Boolean.FALSE,
Boolean.FALSE,
one,
one);
if (log.isDebugEnabled()) {
log.debug("newReferer="+ref.getRefererUrl());
}
String refurl = ref.getRefererUrl();
// If not a direct or search engine then search for linkback
boolean doLinkbackExtraction =
WebloggerRuntimeConfig.getBooleanProperty(
"site.linkbacks.enabled");
if (doLinkbackExtraction
&& entry != null
&& !refurl.equals("direct")
&& !refurl.startsWith("http://google")
&& !refurl.startsWith("http://www.google")
&& !refurl.startsWith("http://search.netscape")
&& !refurl.startsWith("http://www.blinkpro")
&& !refurl.startsWith("http://search.msn")
&& !refurl.startsWith("http://search.yahoo")
&& !refurl.startsWith("http://uk.search.yahoo")
&& !refurl.startsWith("http://www.javablogs.com")
&& !refurl.startsWith("http://www.teoma")
) {
// Launch thread to extract referer linkback
try {
Weblogger mRoller = roller;
mRoller.getThreadManager().executeInBackground(
new LinkbackExtractorRunnable(ref));
} catch (InterruptedException e) {
log.warn("Interrupted during linkback extraction",e);
}
} else {
saveReferer(ref);
}
}
} catch (WebloggerException pe) {
log.error(pe);
} catch (NullPointerException npe) {
log.error(npe);
}
}
/**
* Use LinkbackExtractor to parse title and excerpt from referer
*/
class LinkbackExtractorRunnable implements Runnable {
private WeblogReferrer mReferer = null;
public LinkbackExtractorRunnable( WeblogReferrer referer) {
mReferer = referer;
}
public void run() {
try {
LinkbackExtractor lb = new LinkbackExtractor(
mReferer.getRefererUrl(),mReferer.getRequestUrl());
if ( lb.getTitle()!=null && lb.getExcerpt()!=null ) {
mReferer.setTitle(lb.getTitle());
mReferer.setExcerpt(lb.getExcerpt());
if ( lb.getPermalink() != null ) {
// The presence of a permalink indicates that this
// linkback was parsed out of an RSS feed and is
// presumed to be a good linkback.
mReferer.setRefererPermalink(lb.getPermalink());
// See if this request/permalink is in the DB
List matchRef = getExistingReferers(
mReferer.getWebsite(),
mReferer.getDateString(),
mReferer.getRefererPermalink());
// If it is the first, then set it to be visible
if ( matchRef.size() == 0 ) {
mReferer.setVisible(Boolean.TRUE);
} else {
// We can't throw away duplicates or we will
// end up reparsing them everytime a hit comes
// in from one of them, but we can mark them
// as duplicates.
mReferer.setDuplicate(Boolean.TRUE);
}
saveReferer(mReferer);
}
else {
// Store the new referer
saveReferer(mReferer);
// Hacky Referer URL weighting kludge:
//
// If there are multple referers to a request URL,
// then we want to pick the best one. The others
// are marked as duplicates. To do this we use a
// weight. The weight formula is:
//
// w = URL length + (100 if URL contains anchor)
// LOOP: find the referer with the highest weight
Boolean visible = Boolean.FALSE;
List refs= getReferersWithSameTitle(
mReferer.getWebsite(),
mReferer.getRequestUrl(),
lb.getTitle(),
lb.getExcerpt());
WeblogReferrer chosen = null;
int maxweight = 0;
for (Iterator rdItr = refs.iterator();rdItr.hasNext();) {
WeblogReferrer referer = (WeblogReferrer) rdItr.next();
int weight = referer.getRefererUrl().length();
if (referer.getRefererUrl().indexOf('#') != -1) {
weight += 100;
}
if ( weight > maxweight ) {
chosen = referer;
maxweight = weight;
}
if (referer.getVisible().booleanValue()) {
// If any are visible then chosen
// replacement must be visible as well.
visible = Boolean.TRUE;
}
}
// LOOP: to mark all of the lower weight ones
// as duplicates
for (Iterator rdItr = refs.iterator();rdItr.hasNext();) {
WeblogReferrer referer = (WeblogReferrer) rdItr.next();
if (referer != chosen) {
referer.setDuplicate(Boolean.TRUE);
} else {
referer.setDuplicate(Boolean.FALSE);
referer.setVisible(visible);
}
saveReferer(referer);
}
}
} else {
// It is not a linkback, but store it anyway
saveReferer(mReferer);
log.info("No excerpt found at refering URL "
+ mReferer.getRefererUrl());
}
} catch (Exception e) {
log.error("Processing linkback",e);
} finally {
strategy.release();
}
}
}
/**
* Release all resources held by manager.
*/
public void release() {}
protected void clearDayHits() throws WebloggerException {
Query query = strategy.getNamedUpdate("WeblogReferrer.clearDayHits");
query.executeUpdate();
}
protected void clearDayHitsByWebsite(Weblog website) throws WebloggerException {
Query query = strategy.getNamedUpdate("WeblogReferrer.clearDayHitsByWebsite");
query.setParameter(1, website);
query.executeUpdate();
}
protected List getBlackListedReferer(String[] blacklist) throws
WebloggerException {
StringBuffer queryString = getQueryStringForBlackList(blacklist);
Query query = strategy.getDynamicQuery(queryString.toString());
return (List) query.getResultList();
}
protected List getBlackListedReferer(Weblog website, String[] blacklist)
throws WebloggerException {
StringBuffer queryString = getQueryStringForBlackList(blacklist);
queryString.append(" AND r.website = ?1 ");
Query query = strategy.getDynamicQuery(queryString.toString());
query.setParameter(1, website);
return query.getResultList();
}
/**
* Generates a JPQL query of form
* SELECT r FROM WeblogReferrer r WHERE
* ( refererUrl like %blacklist[1] ..... OR refererUrl like %blacklist[n])
* @param blacklist
* @return
*/
private StringBuffer getQueryStringForBlackList(String[] blacklist) {
assert blacklist.length > 0;
StringBuffer queryString = new StringBuffer("SELECT r FROM WeblogReferrer r WHERE (");
//Search for any matching entry from blacklist[]
final String OR = " OR ";
for (int i = 0; i < blacklist.length; i++) {
String ignoreWord = blacklist[i];
//TODO: DataMapper port: original code use "like ignore case" as follows
// or.add(Expression.ilike("refererUrl","%"+ignoreWord+"%"));
// There is no equivalent for it in JPA
queryString.append("r.refererUrl like '%").append(ignoreWord.trim()).append("%'").append(OR);
}
// Get rid of last OR
queryString.delete(queryString.length() - OR.length(), queryString.length());
queryString.append(" ) ");
return queryString;
}
}