/*
* Copyright 2010 Peter Karich jetwick_@_pannous_._info
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.jetwick.es;
import de.jetwick.config.Configuration;
import de.jetwick.data.JTag;
import de.jetwick.util.Helper;
import de.jetwick.util.MyDate;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import org.elasticsearch.action.deletebyquery.DeleteByQueryRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.NumericRangeFilterBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.search.sort.SortOrder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public class ElasticTagSearch extends AbstractElasticSearch<JTag> {
private Logger logger = LoggerFactory.getLogger(getClass());
private String indexName = "tagindex";
private String indexType = "tag";
public static final String Q_INTERVAL = "queryInterval";
public static final String LAST_REQ = "lastRequest";
public static final String TERM = "term";
public static final String USER = "user";
public static final String TWEETS_SEC = "tweetsPerSec";
public ElasticTagSearch(Configuration config) {
this(config.getTweetSearchUrl());
}
public ElasticTagSearch(String url) {
super(url);
}
public ElasticTagSearch(Client client) {
super(client);
}
@Override
public String getIndexName() {
return indexName;
}
@Override
public void setIndexName(String indexName) {
this.indexName = indexName;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
@Override
public String getIndexType() {
return indexType;
}
public void addAll(Collection<String> tagStringList, boolean refresh, boolean ignoreSearchError) throws IOException {
Map<String, JTag> tags = Collections.emptyMap();
try {
tags = findByTerms(tagStringList);
} catch (Exception ex) {
if (!ignoreSearchError)
throw new RuntimeException(ex);
}
Set<JTag> newTags = new LinkedHashSet<JTag>();
for (String requestedTag : tagStringList) {
if (!tags.containsKey(requestedTag))
newTags.add(new JTag(requestedTag));
}
bulkUpdate(newTags, getIndexName());
if (refresh)
refresh();
}
@Override
public XContentBuilder createDoc(JTag tag) throws IOException {
XContentBuilder b = JsonXContent.contentBuilder().startObject();
b.field(TERM, tag.getTerm());
b.field("lastMillis", tag.getLastMillis());
b.field("maxCreateTime", tag.getMaxCreateTime());
b.field(Q_INTERVAL, tag.getQueryInterval());
b.field("pages", tag.getPages());
b.field(LAST_REQ, tag.getLastRequest());
b.field("requestCount", tag.getRequestCount());
b.field(TWEETS_SEC, tag.getTweetsPerSec());
if (!Helper.isEmpty(tag.getUser()))
b.field(USER, tag.getUser().toLowerCase());
return b;
}
@Override
public JTag readDoc(String idAsStr, long version, Map<String, Object> doc) {
JTag tag = new JTag();
tag.setTerm((String) doc.get(TERM));
tag.setLastMillis(((Number) doc.get("lastMillis")).longValue());
tag.setMaxCreateTime(((Number) doc.get("maxCreateTime")).longValue());
tag.setQueryInterval(((Number) doc.get(Q_INTERVAL)).longValue());
tag.setPages(((Number) doc.get("pages")).intValue());
tag.setLastRequest(Helper.toDateNoNPE(((String) doc.get(LAST_REQ))));
tag.setRequestCount(((Number) doc.get("requestCount")).intValue());
Number num = (Number) doc.get(TWEETS_SEC);
if (num != null)
tag.setTweetsPerSec(num.doubleValue());
String user = (String) doc.get(USER);
if (!Helper.isEmpty(user))
tag.setUser(user);
return tag;
}
private Map<String, JTag> findByTerms(Collection<String> tagStringList) {
List<JTag> res = collectObjects(query(
QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(),
FilterBuilders.termsFilter(TERM, Helper.toStringArray(tagStringList)))));
Map<String, JTag> set = new LinkedHashMap<String, JTag>();
for (JTag t : res) {
set.put(t.getTerm(), t);
}
return set;
}
public Collection<JTag> findLowFrequent(int from, int size, double limitOfTweetsPerSec) {
SearchRequestBuilder srb = createSearchBuilder();
srb.addSort(Q_INTERVAL, SortOrder.ASC);
NumericRangeFilterBuilder rb = FilterBuilders.numericRangeFilter(TWEETS_SEC).lt(limitOfTweetsPerSec).includeUpper(true);
srb.setQuery(QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), rb));
srb.setFrom(from);
srb.setSize(size);
return collectObjects(srb.execute().actionGet());
}
public Collection<JTag> findSorted(int from, int size) {
SearchRequestBuilder srb = createSearchBuilder();
srb.addSort(LAST_REQ, SortOrder.DESC);
srb.setQuery(QueryBuilders.matchAllQuery());
srb.setFrom(from);
srb.setSize(size);
return collectObjects(srb.execute().actionGet());
}
public Collection<JTag> findAll(int from, int size) {
SearchRequestBuilder srb = createSearchBuilder();
srb.setQuery(QueryBuilders.matchAllQuery());
srb.setFrom(from);
srb.setSize(size);
return collectObjects(srb.execute().actionGet());
}
public JTag findByTerm(String term) {
term = JTag.toLowerCaseOnlyOnTerms(term);
SearchRequestBuilder b = createSearchBuilder().setQuery(
QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(),
FilterBuilders.andFilter(FilterBuilders.termFilter(TERM, term),
FilterBuilders.missingFilter(USER))));
return getOne(b);
}
public JTag findByTermAndUser(String term, String user) {
if (Helper.isEmpty(user))
return findByTerm(term);
term = JTag.toLowerCaseOnlyOnTerms(term);
user = JTag.toLowerCaseOnlyOnTerms(user);
SearchRequestBuilder b = createSearchBuilder().setQuery(
QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(),
FilterBuilders.andFilter(
FilterBuilders.termFilter(USER, user),
FilterBuilders.termFilter(TERM, term))));
return getOne(b);
}
private JTag getOne(SearchRequestBuilder b) {
List<JTag> tags = collectObjects(b.execute().actionGet());
if (tags.isEmpty())
return null;
else if (tags.size() > 1)
throw new IllegalStateException("It should be only one tag but was:" + tags);
else
return tags.get(0);
}
public void deleteByName(String term) {
DeleteByQueryRequestBuilder qb = client.prepareDeleteByQuery(getIndexName()).setTypes(getIndexType());
qb.setQuery(QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), FilterBuilders.termFilter(TERM, term)));
client.deleteByQuery(qb.request());
}
public void deleteOlderThan(int hours) {
RangeQueryBuilder sqb = QueryBuilders.rangeQuery(LAST_REQ).lt(new MyDate().minusHours(hours).toDate());
DeleteByQueryRequestBuilder dbq = client.prepareDeleteByQuery(getIndexName()).setQuery(sqb);
client.deleteByQuery(dbq.request());
}
/**
* Stores the specified tag and increase the request counter
* @param tag
*/
public void queueObject(JTag tag) {
queueObject(tag, false);
}
public void queueObject(JTag tag, boolean withUpdateCounter) {
if (withUpdateCounter)
updateRequestCounter(tag);
todoTags.add(tag);
if (!todoTagsThread.isAlive() && !todoTagsThread.isInterrupted())
todoTagsThread.start();
}
/**
* Do not use directly. Use queueObject instead
* @param tag
*/
void update(JTag tag) {
String term = tag.getTerm();
if (Helper.isEmpty(term) || JetwickQuery.containsForbiddenChars(term)) {
logger.warn("Did not add term: " + tag);
return;
}
if (term.contains(" OR ")) {
for (String tmpTerm : term.split(" OR ")) {
if (!tmpTerm.isEmpty()) {
// do not use default lastRequest (which is new Date())
JTag tmp = new JTag(tmpTerm).setLastRequest(tag.getLastRequest());
tmp.updateFrom(tag);
queueObject(tmp);
}
}
return;
}
JTag existing = findByTermAndUser(tag.getTerm(), tag.getUser());
if (existing != null) {
existing.updateFrom(tag);
tag = existing;
}
store(tag, false);
}
public void updateRequestCounter(JTag tag) {
JTag existing = findByTermAndUser(tag.getTerm(), tag.getUser());
if (existing != null)
tag.setRequestCount(existing.getRequestCount() + 1);
else
tag.setRequestCount(1);
tag.setLastRequest(new Date());
}
private final BlockingQueue<JTag> todoTags = new LinkedBlockingQueue<JTag>();
private Thread todoTagsThread = new Thread("TagQueue") {
@Override
public void run() {
while (!isInterrupted()) {
try {
JTag tag = todoTags.take();
update(tag);
if (todoTags.isEmpty()) {
synchronized (todoTags) {
todoTags.notifyAll();
}
}
} catch (Exception ex) {
logger.error(getName() + " was interrupted!!", ex);
break;
}
}
}
};
/**
* Blocks until queue gets empty. Use only for tests!
*
* @return true if queue was really empty. At least for some microseconds ;)
*/
public boolean forceCleanTagQueueAndRefresh() {
synchronized (todoTags) {
try {
todoTags.wait(1000);
refresh();
return true;
} catch (Exception ex) {
return false;
}
}
}
}