Source Code of com.google.caja.lexer.GuessContentType

// Copyright (C) 2009 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


package com.google.caja.lexer;


import javax.annotation.Nullable;


import com.google.caja.lexer.CharProducer;
import com.google.caja.lexer.CssLexer;
import com.google.caja.lexer.CssTokenType;
import com.google.caja.lexer.FilePosition;
import com.google.caja.lexer.ParseException;
import com.google.caja.lexer.Token;
import com.google.caja.reporting.DevNullMessageQueue;
import com.google.caja.util.ContentType;


/**
 * Guesses content type based on reported mime-type, file name, content of file.
 *
 * @author mikesamuel@gmail.com
 */
public final class GuessContentType {


  /**
   * Tries a number of heuristics to determine the type of content.
   * <ol>
   *   <li>if mimeType is provided and is recognized, uses it.
   *   <li>if the file path is available and has a recognized extension, uses
   *   it.
   *   <li>if the code is provided then applies some further heuristics;
   *     anything where the first non-whitespace char is {@code '<'} is markup.
   * </ol>
   *
   * @param mimeType null or a string like "text/plain" or
   *    "text/javascript; param=value"
   * @param path null or the path to the content.  Either a URL path
   *     (no trailing query or fragment) or a file system path will work.
   * @param code null or a representative prefix of the content.
   * @return a known {@link ContentType} or null if none could be found.
   */
  public static ContentType guess(
      @Nullable String mimeType, @Nullable String path,
      @Nullable CharSequence code) {
    ContentType contentType = null;
    if (mimeType != null) { contentType = ContentType.fromMimeType(mimeType); }
    if (contentType == null && path != null) {
      int dot = path.lastIndexOf('.');
      if (dot >= 0) {
        String ext = path.substring(dot + 1);
        for (ContentType candidate : ContentType.values()) {
          if (ext.equals(candidate.ext)) {
            contentType = candidate;
            break;
          }
        }
      }
    }
    if (contentType == null && code != null) {
      char ch = '\0';
      for (int i = 0, n = code.length(); i < n; ++i) {
        ch = code.charAt(i);
        if (!Character.isWhitespace(ch)) { break; }
      }
      switch (ch) {
        case '<':
          contentType = ContentType.HTML;
          break;
        case '@': case '.': case '#':
          contentType = ContentType.CSS;
          break;
      }
    }
    if (contentType == null && code != null) {
      // Try and lex and see what happens.
      CharProducer cp = CharProducer.Factory.fromString(
          code, FilePosition.UNKNOWN);
      boolean sawColon = false;
      try {
        CssLexer cssLexer = new CssLexer(
            cp, DevNullMessageQueue.singleton(), false);
        contentType = ContentType.CSS;
        while (cssLexer.hasNext()) {
          Token<CssTokenType> t = cssLexer.next();
          if ("if".equals(t.text) || "while".equals(t.text)
              || "for".equals(t.text) || "return".equals(t.text)
              || "?".equals(t.text)) {
            contentType = ContentType.JS;
            break;
          }
          if (!sawColon && ":".equals(t.text)) { sawColon = true; }
        }
        if (!sawColon) { contentType = ContentType.JS; }
      } catch (ParseException ex) {
        contentType = ContentType.JS;
      }
    }
    return contentType;
  }
}
Source Code of com.google.caja.lexer.GuessContentType

Related Classes of com.google.caja.lexer.GuessContentType