Package io.lumify.core

Source Code of io.lumify.core.EntityHighlighter

package io.lumify.core;

import io.lumify.core.ingest.video.VideoFrameInfo;
import io.lumify.core.ingest.video.VideoPropertyHelper;
import io.lumify.core.ingest.video.VideoTranscript;
import io.lumify.core.model.textHighlighting.OffsetItem;
import io.lumify.core.model.textHighlighting.VertexOffsetItem;
import io.lumify.web.clientapi.model.SandboxStatus;
import io.lumify.core.util.GraphUtil;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.json.JSONException;
import org.json.JSONObject;
import org.securegraph.Authorizations;
import org.securegraph.Vertex;

import java.util.*;

public class EntityHighlighter {
    public String getHighlightedText(String text, Iterable<Vertex> termMentions, String workspaceId, Authorizations authorizations) {
        List<OffsetItem> offsetItems = convertTermMentionsToOffsetItems(termMentions, workspaceId, authorizations);
        return getHighlightedText(text, offsetItems);
    }

    // TODO: change to use an InputStream?
    public static String getHighlightedText(String text, List<OffsetItem> offsetItems) throws JSONException {
        Collections.sort(offsetItems);
        StringBuilder result = new StringBuilder();
        PriorityQueue<Integer> endOffsets = new PriorityQueue<Integer>();
        int lastStart = 0;
        for (int i = 0; i < offsetItems.size(); i++) {
            OffsetItem offsetItem = offsetItems.get(i);

            boolean overlapsPreviousItem = false;
            if (offsetItem instanceof VertexOffsetItem) {
                for (int j = 0; j < i; j++) {
                    OffsetItem compareItem = offsetItems.get(j);
                    if (compareItem instanceof VertexOffsetItem
                            && (OffsetItem.getOffset(compareItem.getEnd()) >= OffsetItem.getOffset(offsetItem.getEnd())
                            || OffsetItem.getOffset(compareItem.getEnd()) > OffsetItem.getOffset(offsetItem.getStart()))) {
                        overlapsPreviousItem = true;
                        offsetItems.remove(i--);
                        break;
                    }
                }
            }
            if (overlapsPreviousItem) {
                continue;
            }
            if (OffsetItem.getOffset(offsetItem.getStart()) < 0 || OffsetItem.getOffset(offsetItem.getEnd()) < 0) {
                continue;
            }
            if (!offsetItem.shouldHighlight()) {
                continue;
            }

            while (endOffsets.size() > 0 && endOffsets.peek() <= OffsetItem.getOffset(offsetItem.getStart())) {
                int end = endOffsets.poll();
                result.append(StringEscapeUtils.escapeHtml(safeSubstring(text, lastStart, end)));
                result.append("</span>");
                lastStart = end;
            }
            result.append(StringEscapeUtils.escapeHtml(safeSubstring(text, lastStart, (int) OffsetItem.getOffset(offsetItem.getStart()))));

            JSONObject infoJson = offsetItem.getInfoJson();

            result.append("<span");
            result.append(" class=\"");
            result.append(StringUtils.join(offsetItem.getCssClasses(), " "));
            result.append("\"");
            if (offsetItem.getTitle() != null) {
                result.append(" title=\"");
                result.append(StringEscapeUtils.escapeHtml(offsetItem.getTitle()));
                result.append("\"");
            }
            result.append(" data-info=\"");
            result.append(StringEscapeUtils.escapeHtml(infoJson.toString()));
            result.append("\"");
            result.append(">");
            endOffsets.add((int) OffsetItem.getOffset(offsetItem.getEnd()));
            lastStart = (int) OffsetItem.getOffset(offsetItem.getStart());
        }

        while (endOffsets.size() > 0) {
            int end = endOffsets.poll();
            result.append(StringEscapeUtils.escapeHtml(safeSubstring(text, lastStart, end)));
            result.append("</span>");
            lastStart = end;
        }
        result.append(StringEscapeUtils.escapeHtml(safeSubstring(text, lastStart)));

        return result.toString().replaceAll("&nbsp;", " ");
    }

    public VideoTranscript getHighlightedVideoTranscript(VideoTranscript videoTranscript, Iterable<Vertex> termMentions, String workspaceId, Authorizations authorizations) {
        List<OffsetItem> offsetItems = convertTermMentionsToOffsetItems(termMentions, workspaceId, authorizations);
        return getHighlightedVideoTranscript(videoTranscript, offsetItems);
    }

    private VideoTranscript getHighlightedVideoTranscript(VideoTranscript videoTranscript, List<OffsetItem> offsetItems) {
        Map<Integer, List<OffsetItem>> videoTranscriptOffsetItems = convertOffsetItemsToVideoTranscriptOffsetItems(videoTranscript, offsetItems);
        return getHighlightedVideoTranscript(videoTranscript, videoTranscriptOffsetItems);
    }

    private VideoTranscript getHighlightedVideoTranscript(VideoTranscript videoTranscript, Map<Integer, List<OffsetItem>> videoTranscriptOffsetItems) {
        VideoTranscript result = new VideoTranscript();
        int entryIndex = 0;
        for (VideoTranscript.TimedText videoTranscriptEntry : videoTranscript.getEntries()) {
            VideoTranscript.TimedText entry = videoTranscript.getEntries().get(entryIndex);

            List<OffsetItem> offsetItems = videoTranscriptOffsetItems.get(entryIndex);
            String highlightedText;
            if (offsetItems == null) {
                highlightedText = entry.getText();
            } else {
                highlightedText = getHighlightedText(entry.getText(), offsetItems);
            }
            result.add(videoTranscriptEntry.getTime(), highlightedText);
            entryIndex++;
        }
        return result;
    }

    private Map<Integer, List<OffsetItem>> convertOffsetItemsToVideoTranscriptOffsetItems(VideoTranscript videoTranscript, List<OffsetItem> offsetItems) {
        Map<Integer, List<OffsetItem>> results = new HashMap<Integer, List<OffsetItem>>();
        for (OffsetItem offsetItem : offsetItems) {
            Integer videoTranscriptEntryIndex = getVideoTranscriptEntryIndex(videoTranscript, offsetItem);

            List<OffsetItem> currentList = results.get(videoTranscriptEntryIndex);
            if (currentList == null) {
                currentList = new ArrayList<OffsetItem>();
                results.put(videoTranscriptEntryIndex, currentList);
            }
            currentList.add(offsetItem);
        }
        return results;
    }

    private static int getVideoTranscriptEntryIndex(VideoTranscript videoTranscript, OffsetItem offsetItem) {
        Integer videoTranscriptEntryIndex = null;
        VideoFrameInfo videoFrameInfo = VideoPropertyHelper.getVideoFrameInfo(offsetItem.getId());
        if (videoFrameInfo != null) {
            videoTranscriptEntryIndex = videoTranscript.findEntryIndexFromStartTime(videoFrameInfo.getFrameStartTime());
        }
        if (videoTranscriptEntryIndex == null) {
            videoTranscriptEntryIndex = offsetItem.getVideoTranscriptEntryIndex();
        }
        return videoTranscriptEntryIndex;
    }

    private static String safeSubstring(String text, int beginIndex) {
        beginIndex = Math.min(beginIndex, text.length());
        return text.substring(beginIndex);
    }

    private static String safeSubstring(String text, int beginIndex, int endIndex) {
        beginIndex = Math.min(beginIndex, text.length());
        endIndex = Math.min(endIndex, text.length());
        return text.substring(beginIndex, endIndex);
    }

    public List<OffsetItem> convertTermMentionsToOffsetItems(Iterable<Vertex> termMentions, String workspaceId, Authorizations authorizations) {
        ArrayList<OffsetItem> termMetadataOffsetItems = new ArrayList<OffsetItem>();
        for (Vertex termMention : termMentions) {
            String visibility = termMention.getVisibility().getVisibilityString();
            SandboxStatus sandboxStatus = GraphUtil.getSandboxStatusFromVisibilityString(visibility, workspaceId);
            termMetadataOffsetItems.add(new VertexOffsetItem(termMention, sandboxStatus, authorizations));
        }
        return termMetadataOffsetItems;
    }
}
TOP

Related Classes of io.lumify.core.EntityHighlighter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.