Source Code of org.apache.lenya.cms.publication.xsp.DocumentReferencesHelper

/*
 * Copyright  1999-2004 The Apache Software Foundation
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */


/* $Id: DocumentReferencesHelper.java 160151 2005-04-05 09:59:13Z michi $  */


package org.apache.lenya.cms.publication.xsp;


import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import java.util.regex.Pattern;


import org.apache.cocoon.ProcessingException;
import org.apache.lenya.cms.publication.Document;
import org.apache.lenya.cms.publication.DocumentBuildException;
import org.apache.lenya.cms.publication.DocumentBuilder;
import org.apache.lenya.cms.publication.DocumentDoesNotExistException;
import org.apache.lenya.cms.publication.DocumentIdToPathMapper;
import org.apache.lenya.cms.publication.PageEnvelope;
import org.apache.lenya.cms.publication.PageEnvelopeException;
import org.apache.lenya.cms.publication.PageEnvelopeFactory;
import org.apache.lenya.cms.publication.PathToDocumentIdMapper;
import org.apache.lenya.cms.publication.Publication;
import org.apache.lenya.cms.publication.SiteTree;
import org.apache.lenya.cms.publication.SiteTreeException;
import org.apache.lenya.cms.publication.SiteTreeNode;
import org.apache.lenya.search.Grep;
import org.apache.log4j.Category;


/**
 * Helper class for finding references to the current document.
 */
public class DocumentReferencesHelper {


    private static final Category log = Category.getInstance(DocumentReferencesHelper.class);


    private PageEnvelope pageEnvelope = null;


    /**
     * Create a new DocumentReferencesHelper
     * 
     * @param objectModel the objectModel
     * 
     * @throws ProcessingException if the page envelope could not be created.
     */
    public DocumentReferencesHelper(Map objectModel)
        throws ProcessingException {
        try {
            this.pageEnvelope =
                PageEnvelopeFactory.getInstance().getPageEnvelope(objectModel);
        } catch (PageEnvelopeException e) {
            throw new ProcessingException(e);
        }
    }


    /**
     * Construct a search string for the search of references, i.e.
     * links from other documents to the current document. This
     * is done using the assumption that internal links look as if
     * they were copied directly from the browser,
     * e.g. /lenya/default/authoring/doctypes/2columns.html
     * 
     * @return the search string
     */
    protected String getReferencesSearchString() {
        return "href\\s*=\\s*\""
            + pageEnvelope.getContext()
            + "/"
            + pageEnvelope.getPublication().getId()
            + "/"
            + pageEnvelope.getDocument().getArea()
            + pageEnvelope.getDocument().getId();
    }


    /**
     * Construct a search string for the search of internal references, 
     * i.e from the current document to others. This is done using 
     * the assumption that internal links look as if they were copied 
     * directly from the browser, e.g. 
     * /lenya/default/authoring/doctypes/2columns.html
     * 
     * @return the search string
     */
    protected Pattern getInternalLinkPattern() {
        // FIXME: The following method is not very robust and certainly 
        // will fail if the mapping between URL and document-id changes  


        // Link Management now assumes that internal links are of the
        // form
        // href="$CONTEXT_PREFIX/$PUBLICATION_ID/$AREA$DOCUMENT_ID(_[a-z][a-z])?.html
        // If there is a match in a document file it is assumed that
        // this is an internal link and is treated as such (warning if
        // publish with unpublished internal links and warning if
        // deactivate with internal references).


        // However this is not coordinated with the
        // DocumentToPathMapper and will probably fail if the URL
        // looks different.


        return Pattern.compile(
            "href\\s*=\\s*\""
                + pageEnvelope.getContext()
                + "/"
                + pageEnvelope.getPublication().getId()
                + "/"
                + pageEnvelope.getDocument().getArea()
                + "(/[-a-zA-Z0-9_/]+?)(_[a-z][a-z])?\\.html");
    }


    /**
     * Find a list of document-ids which have references to the current
     * document.
     * 
     * @return an <code>array</code> of documents if there are references, 
     * an empty <code>array</code> otherwise 
     * 
     * @throws ProcessingException if the search for references failed.
     */
    public Document[] getReferences(String area) throws ProcessingException {


        ArrayList documents = new ArrayList();
        Publication publication = pageEnvelope.getPublication();
        DocumentIdToPathMapper mapper = publication.getPathMapper();
        if (mapper instanceof PathToDocumentIdMapper) {
            PathToDocumentIdMapper fileMapper = (PathToDocumentIdMapper)mapper;
            String documentId = null;
            String language = null;
            DocumentBuilder builder = publication.getDocumentBuilder();
            File[] inconsistentFiles;
            try {
                inconsistentFiles =
                    Grep.find(
                        publication.getContentDirectory(area),
                        getReferencesSearchString());
                for (int i = 0; i < inconsistentFiles.length; i++) {
                    // for performance reasons the getReferencesSearchString() is 
                    // constructed in a way such that it will catch all files which 
                    // have a link to any language version of the current document.
                    // That's why we need to do some additional tests for each hit. 
                    String languageOfCurrentDocument =
                        pageEnvelope.getDocument().getLanguage();
                    String defaultLanguage =
                        pageEnvelope.getPublication().getDefaultLanguage();
                    Pattern referencesSearchStringWithLanguage =
                        Pattern.compile(
                            getReferencesSearchString()
                                + "_"
                                + languageOfCurrentDocument);
                    Pattern referencesSearchStringWithOutLanguage =
                        Pattern.compile(
                            getReferencesSearchString() + "\\.html");
                    log.debug(
                        "languageOfCurrentDocument: "
                            + languageOfCurrentDocument);
                    log.debug("defaultLanguage: " + defaultLanguage);
                    log.debug(
                        "referencesSearchStringWithOutLanguage: "
                            + referencesSearchStringWithOutLanguage.pattern());
                    log.debug(
                        "referencesSearchStringWithLanguage: "
                            + referencesSearchStringWithLanguage.pattern());
                    // a link is indeed to the current document if the following conditions
                    // are met:
                    // 1. the link is to foo_xx and the language of the current 
                    //    document is xx.
                    // 2. or the link is to foo.html and the language of the current 
                    //    document is the default language.
                    // Now negate the expression because we continue if above (1) and (2) are
                    // false, and you'll get the following if statement
                    if (!Grep
                        .containsPattern(
                            inconsistentFiles[i],
                            referencesSearchStringWithLanguage)
                        && !(Grep
                            .containsPattern(
                                inconsistentFiles[i],
                                referencesSearchStringWithOutLanguage)
                            && languageOfCurrentDocument.equals(
                                defaultLanguage))) {
                        // the reference foo_xx is neither to the language of the current 
                        // document.
                        // nor is the reference foo.html and the current document is in the 
                        // default language.
                        // So the reference is of no importance to us, skip 
                        continue;
                    }


                    documentId =
                        fileMapper.getDocumentId(
                            publication,
                            area,
                            inconsistentFiles[i]);
                    log.debug("documentId: " + documentId);
                    language = fileMapper.getLanguage(inconsistentFiles[i]);
                    log.debug("language: " + language);


                    String url = null;
                    if (language != null) {
                        url =
                            builder.buildCanonicalUrl(
                                publication,
                                area,
                                documentId,
                                language);
                        log.debug("url: " + url);
                    } else {
                        url =
                            builder.buildCanonicalUrl(
                                publication,
                                area,
                                documentId);
                        log.debug("url: " + url);
                    }
                    documents.add(builder.buildDocument(publication, url));
                }
            } catch (IOException e) {
                throw new ProcessingException(e);
            } catch (DocumentDoesNotExistException e) {
                throw new ProcessingException(e);
            } catch (DocumentBuildException e) {
                throw new ProcessingException(e);
            }
        }
        return (Document[])documents.toArray(new Document[documents.size()]);
    }


    /**
     * Find all internal references in the current document to documents which have
     * not been published yet.
     * 
     * @return an <code>array</code> of <code>Document</code> of references 
     * from the current document to documents which have not been published yet.
     *
     * @throws ProcessingException if the current document cannot be opened.
     */
    public Document[] getInternalReferences() throws ProcessingException {
        ArrayList unpublishedReferences = new ArrayList();
        SiteTree sitetree;
        Pattern internalLinkPattern = getInternalLinkPattern();
        Publication publication = pageEnvelope.getPublication();
        DocumentBuilder builder = publication.getDocumentBuilder();
        try {
            sitetree = publication.getTree(Publication.LIVE_AREA);
            String[] internalLinks =
                Grep.findPattern(
                    pageEnvelope.getDocument().getFile(),
                    internalLinkPattern,
                    1);
            String[] internalLinksLanguages =
                Grep.findPattern(
                    pageEnvelope.getDocument().getFile(),
                    internalLinkPattern,
                    2);


            for (int i = 0; i < internalLinks.length; i++) {
                String docId = internalLinks[i];
                String language = null;


                log.debug("docId: " + docId);
                if (internalLinksLanguages[i] != null) {
                    // trim the leading '_'
                    language = internalLinksLanguages[i].substring(1);
                }


                log.debug("language: " + language);
                SiteTreeNode documentNode = sitetree.getNode(docId);


                if (language == null) {
                    String url =
                        "/"
                            + publication.getId()
                            + "/"
                            + pageEnvelope.getDocument().getArea()
                            + docId
                            + ".html";
                    language =
                        builder.buildDocument(publication, url).getLanguage();
                }
                log.debug("language: " + language);
                if (documentNode == null
                    || documentNode.getLabel(language) == null) {
                    // the docId has not been published for the given language
                    String url = null;
                    if (language != null) {
                        url =
                            builder.buildCanonicalUrl(
                                publication,
                                Publication.AUTHORING_AREA,
                                docId,
                                language);
                        log.debug("url: " + url);
                    } else {
                        url =
                            builder.buildCanonicalUrl(
                                publication,
                                Publication.AUTHORING_AREA,
                                docId);
                        log.debug("url: " + url);
                    }
                    unpublishedReferences.add(
                        builder.buildDocument(publication, url));
                }
            }
        } catch (SiteTreeException e) {
            throw new ProcessingException(e);
        } catch (IOException e) {
            throw new ProcessingException(e);
        } catch (DocumentBuildException e) {
            throw new ProcessingException(e);
        }
        return (Document[])unpublishedReferences.toArray(
            new Document[unpublishedReferences.size()]);
    }
}
Source Code of org.apache.lenya.cms.publication.xsp.DocumentReferencesHelper

Related Classes of org.apache.lenya.cms.publication.xsp.DocumentReferencesHelper