RegexpCheck.java
////////////////////////////////////////////////////////////////////////////////
// checkstyle: Checks Java source code for adherence to a set of rules.
// Copyright (C) 2001-2021 the original author or authors.
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
////////////////////////////////////////////////////////////////////////////////
package com.puppycrawl.tools.checkstyle.checks.regexp;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
import com.puppycrawl.tools.checkstyle.api.DetailAST;
import com.puppycrawl.tools.checkstyle.api.FileContents;
import com.puppycrawl.tools.checkstyle.api.FileText;
import com.puppycrawl.tools.checkstyle.api.LineColumn;
import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
/**
* <p>
* Checks that a specified pattern exists, exists less than
* a set number of times, or does not exist in the file.
* </p>
* <p>
* This check combines all the functionality provided by
* <a href="https://checkstyle.org/config_header.html#RegexpHeader">RegexpHeader</a>
* except supplying the regular expression from a file.
* </p>
* <p>
* It differs from them in that it works in multiline mode. Its regular expression
* can span multiple lines and it checks this against the whole file at once.
* The others work in singleline mode. Their single or multiple regular expressions
* can only span one line. They check each of these against each line in the file in turn.
* </p>
* <p>
* <b>Note:</b> Because of the different mode of operation there may be some
* changes in the regular expressions used to achieve a particular end.
* </p>
* <p>
* In multiline mode...
* </p>
* <ul>
* <li>
* {@code ^} means the beginning of a line, as opposed to beginning of the input.
* </li>
* <li>
* For beginning of the input use {@code \A}.
* </li>
* <li>
* {@code $} means the end of a line, as opposed to the end of the input.
* </li>
* <li>
* For end of input use {@code \Z}.
* </li>
* <li>
* Each line in the file is terminated with a line feed character.
* </li>
* </ul>
* <p>
* <b>Note:</b> Not all regular expression engines are created equal.
* Some provide extra functions that others do not and some elements
* of the syntax may vary. This check makes use of the
* <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/package-summary.html">
* java.util.regex package</a>; please check its documentation for details
* of how to construct a regular expression to achieve a particular goal.
* </p>
* <p>
* <b>Note:</b> When entering a regular expression as a parameter in
* the XML config file you must also take into account the XML rules. e.g.
* if you want to match a < symbol you need to enter &lt;.
* The regular expression should be entered on one line.
* </p>
* <ul>
* <li>
* Property {@code format} - Specify the pattern to match against.
* Type is {@code java.util.regex.Pattern}.
* Default value is {@code "^$"}.
* </li>
* <li>
* Property {@code message} - Specify message which is used to notify about
* violations, if empty then the default (hard-coded) message is used.
* Type is {@code java.lang.String}.
* Default value is {@code null}.
* </li>
* <li>
* Property {@code illegalPattern} - Control whether the pattern is required or illegal.
* Type is {@code boolean}.
* Default value is {@code false}.
* </li>
* <li>
* Property {@code duplicateLimit} - Control whether to check for duplicates
* of a required pattern, any negative value means no checking for duplicates,
* any positive value is used as the maximum number of allowed duplicates,
* if the limit is exceeded violations will be logged.
* Type is {@code int}.
* Default value is {@code 0}.
* </li>
* <li>
* Property {@code errorLimit} - Specify the maximum number of violations before
* the check will abort.
* Type is {@code int}.
* Default value is {@code 100}.
* </li>
* <li>
* Property {@code ignoreComments} - Control whether to ignore matches found within comments.
* Type is {@code boolean}.
* Default value is {@code false}.
* </li>
* </ul>
* <p>
* To configure the check:
* </p>
* <p>
* The following examples are mainly copied from the other 3 checks mentioned above,
* to show how the same results can be achieved using this check in place of them.
* </p>
* <p>
* <b>To use like Required Regexp check:</b>
* </p>
* <p>
* An example of how to configure the check to make sure a copyright statement
* is included in the file:
* </p>
* <p>
* The statement.
* </p>
* <pre>
* // This code is copyrighted
* </pre>
* <p>
* The check.
* </p>
* <pre>
* <module name="Regexp">
* <property name="format" value="// This code is copyrighted"/>
* </module>
* </pre>
* <p>
* Your statement may be multiline.
* </p>
* <pre>
* // This code is copyrighted
* // (c) MyCompany
* </pre>
* <p>
* Then the check would be.
* </p>
* <pre>
* <module name="Regexp">
* <property name="format" value="// This code is copyrighted\n// \(c\) MyCompany"/>
* </module>
* </pre>
* <p>
* <b>Note:</b> To search for parentheses () in a regular expression you must
* escape them like \(\). This is required by the regexp engine, otherwise it will
* think they are special instruction characters.
* </p>
* <p>
* And to make sure it appears only once:
* </p>
* <pre>
* <module name="Regexp">
* <property name="format" value="// This code is copyrighted\n// \(c\) MyCompany"/>
* <property name="duplicateLimit" value="0"/>
* </module>
* </pre>
* <p>
* It can also be useful to attach a meaningful message to the check:
* </p>
* <pre>
* <module name="Regexp">
* <property name="format" value="// This code is copyrighted\n// \(c\) MyCompany"/>
* <property name="message" value="Copyright"/>
* </module>
* </pre>
* <p>
* <b>To use like illegal regexp check:</b>
* </p>
* <p>
* An example of how to configure the check to make sure there are no calls to
* {@code System.out.println}:
* </p>
* <pre>
* <module name="Regexp">
* <!-- . matches any character, so we need to escape it and use \. to match dots. -->
* <property name="format" value="System\.out\.println"/>
* <property name="illegalPattern" value="true"/>
* </module>
* </pre>
* <p>
* You may want to make the above check ignore comments, like this:
* </p>
* <pre>
* <module name="Regexp">
* <property name="format" value="System\.out\.println"/>
* <property name="illegalPattern" value="true"/>
* <property name="ignoreComments" value="true"/>
* </module>
* </pre>
* <p>
* An example of how to configure the check to find trailing whitespace at the end of a line:
* </p>
* <pre>
* <module name="Regexp">
* <property name="format" value="[ \t]+$"/>
* <property name="illegalPattern" value="true"/>
* <property name="message" value="Trailing whitespace"/>
* </module>
* </pre>
* <p>
* An example of how to configure the check to find case-insensitive occurrences of "debug":
* </p>
* <pre>
* <module name="Regexp">
* <property name="format" value="(?i)debug"/>
* <property name="illegalPattern" value="true"/>
* </module>
* </pre>
* <p>
* <b>Note:</b> The (?i) at the beginning of the regular expression tells the
* regexp engine to ignore the case.
* </p>
* <p>
* There is also a feature to limit the number of violations reported.
* When the limit is reached the check aborts with a message reporting that
* the limit has been reached. The default limit setting is 100,
* but this can be change as shown in the following example.
* </p>
* <pre>
* <module name="Regexp">
* <property name="format" value="(?i)debug"/>
* <property name="illegalPattern" value="true"/>
* <property name="errorLimit" value="1000"/>
* </module>
* </pre>
* <p>
* <b>To use like <a href="https://checkstyle.org/config_header.html#RegexpHeader">
* RegexpHeader</a>:</b>
* </p>
* <p>
* To configure the check to verify that each file starts with the following multiline header.
* </p>
* <p>
* Note the following:
* </p>
* <ul>
* <li>
* \A means the start of the file.
* </li>
* <li>
* The date can be any 4 digit number.
* </li>
* </ul>
* <pre>
* // Copyright (C) 2004 MyCompany
* // All rights reserved
* </pre>
* <pre>
* <module name="Regexp">
* <property
* name="format"
* value="\A// Copyright \(C\) \d\d\d\d MyCompany\n// All rights reserved"/>
* </module>
* </pre>
* <p>
* A more complex example. Note how the import and javadoc multilines are handled,
* there can be any number of them.
* </p>
* <pre>
* ///////////////////////////////////////////////////////////////////////
* // checkstyle:
* // Checks Java source code for adherence to a set of rules.
* // Copyright (C) 2004 Oliver Burn
* // Last modification by $Author A.N.Other$
* ///////////////////////////////////////////////////////////////////////
*
* package com.puppycrawl.checkstyle;
*
* import java.util.thing1;
* import java.util.thing2;
* import java.util.thing3;
*
* /**
* * javadoc line 1
* * javadoc line 2
* * javadoc line 3
* */
* </pre>
* <pre>
* <module name="Regexp">
* <property
* name="format"
* value="\A/{71}\n// checkstyle:\n// Checks Java source code for
* adherence to a set of rules\.\n// Copyright \(C\) \d\d\d\d Oliver Burn\n
* // Last modification by \$Author.*\$\n/{71}\n\npackage [\w\.]*;\n\n
* (import [\w\.]*;\n)*\n/\*\*\n( \*[^/]*\n)* \*/"/>
* </module>
* </pre>
* <p>
* <b>More examples:</b>
* </p>
* <p>
* The next 2 examples deal with the following example Java source file:
* </p>
* <pre>
* /*
* * PID.java
* *
* * Copyright (c) 2001 ACME
* * 123 Some St.
* * Somewhere.
* *
* * This software is the confidential and proprietary information of ACME.
* * ("Confidential Information"). You shall not disclose such
* * Confidential Information and shall use it only in accordance with
* * the terms of the license agreement you entered into with ACME.
* *
* * $Log: config_misc.xml,v $
* * Revision 1.7 2007/01/16 12:16:35 oburn
* * Removing all reference to mailing lists
* *
* * Revision 1.6 2005/12/25 16:13:10 o_sukhodolsky
* * Fix for rfe 1248106 (TYPECAST is now accepted by NoWhitespaceAfter)
* *
* * Fix for rfe 953266 (thanks to Paul Guyot (pguyot) for submitting patch)
* * IllegalType can be configured to accept some abstract classes which
* * matches to regexp of illegal type names (property legalAbstractClassNames)
* *
* * TrailingComment now can be configured to accept some trailing comments
* * (such as NOI18N) (property legalComment, rfe 1385344).
* *
* * Revision 1.5 2005/11/06 11:54:12 oburn
* * Incorporate excellent patch [ 1344344 ] Consolidation of regexp checks.
* *
* * Revision 1.3.8.1 2005/10/11 14:26:32 someone
* * Fix for bug 251. The broken bit is fixed
* */
*
* package com.acme.tools;
*
* import com.acme.thing1;
* import com.acme.thing2;
* import com.acme.thing3;
*
* /**
* *
* * <P>
* * <I>This software is the confidential and proprietary information of
* * ACME (<B>"Confidential Information"</B>). You shall not
* * disclose such Confidential Information and shall use it only in
* * accordance with the terms of the license agreement you entered into
* * with ACME.</I>
* * </P>
* *
* * &#169; copyright 2002 ACME
* *
* * @author Some Body
* */
* public class PID extends StateMachine implements WebObject.Constants {
*
* /** javadoc. */
* public static final int A_SETPOINT = 1;
* .
* .
* .
* } // class PID
* </pre>
* <p>
* This checks for the presence of the header, the first 16 lines.
* </p>
* <p>
* Note the following:
* </p>
* <ul>
* <li>
* Line 2 and 13 contain the file name. These are checked to make sure they
* are the same, and that they match the class name.
* </li>
* <li>
* The date can be any 4 digit number.
* </li>
* </ul>
* <pre>
* <module name="Regexp">
* <property
* name="format"
* value="\A/\*\n \* (\w*)\.java\n \*\n \* Copyright \(c\)
* \d\d\d\d ACME\n \* 123 Some St\.\n \* Somewhere\.\n \*\n
* \* This software is the confidential and proprietary information
* of ACME\.\n \* \(&quot;Confidential Information&quot;\)\. You
* shall not disclose such\n \* Confidential Information and shall
* use it only in accordance with\n \* the terms of the license
* agreement you entered into with ACME\.\n \*\n
* \* \$Log: config_misc\.xml,v $
* \* Revision 1\.7 2007/01/16 12:16:35 oburn
* \* Removing all reference to mailing lists
* \* \
* \* Revision 1.6 2005/12/25 16:13:10 o_sukhodolsky
* \* Fix for rfe 1248106 \(TYPECAST is now accepted by NoWhitespaceAfter\)
* \* \
* \* Fix for rfe 953266 \(thanks to Paul Guyot \(pguyot\) for submitting patch\)
* \* IllegalType can be configured to accept some abstract classes which
* \* matches to regexp of illegal type names \(property legalAbstractClassNames\)
* \*
* \* TrailingComment now can be configured to accept some trailing comments
* \* \(such as NOI18N\) \(property legalComment, rfe 1385344\).
* \*
* \* Revision 1.5 2005/11/06 11:54:12 oburn
* \* Incorporate excellent patch \[ 1344344 \] Consolidation of regexp checks.
* \* \\n(.*\n)*([\w|\s]*( class | interface )\1)"/>
* <property name="message" value="Correct header not found"/>
* </module>
* </pre>
* <p>
* This checks for the presence of a copyright notice within the class javadoc, lines 24 to 37.
* </p>
* <pre>
* <module name="Regexp">
* <property
* name="format"
* value="(/\*\*\n)( \*.*\n)*( \* <P>\n \* <I>
* This software is the confidential and proprietary information of\n
* \* ACME \(<B>&quot;Confidential Information&quot;</B>
* \)\. You shall not\n \* disclose such Confidential Information
* and shall use it only in\n \* accordance with the terms of the
* license agreement you entered into\n \* with ACME\.</I>\n
* \* </P>\n \*\n \* &#169; copyright \d\d\d\d ACME\n
* \*\n \* @author .*)(\n\s\*.*)*/\n[\w|\s]*( class | interface )"/>
* <property name="message"
* value="Copyright in class/interface Javadoc"/>
* <property name="duplicateLimit" value="0"/>
* </module>
* </pre>
* <p>
* <b>Note:</b> To search for things that mean something in XML, like <
* you need to escape them like &lt;. This is required so the XML parser
* does not act on them, but instead passes the correct character to the regexp engine.
* </p>
* <p>
* Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
* </p>
* <p>
* Violation Message Keys:
* </p>
* <ul>
* <li>
* {@code duplicate.regexp}
* </li>
* <li>
* {@code illegal.regexp}
* </li>
* <li>
* {@code required.regexp}
* </li>
* </ul>
*
* @since 4.0
*/
@FileStatefulCheck
public class RegexpCheck extends AbstractCheck {
/**
* A key is pointing to the warning message text in "messages.properties"
* file.
*/
public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp";
/**
* A key is pointing to the warning message text in "messages.properties"
* file.
*/
public static final String MSG_REQUIRED_REGEXP = "required.regexp";
/**
* A key is pointing to the warning message text in "messages.properties"
* file.
*/
public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp";
/** Default duplicate limit. */
private static final int DEFAULT_DUPLICATE_LIMIT = -1;
/** Default error report limit. */
private static final int DEFAULT_ERROR_LIMIT = 100;
/** Error count exceeded message. */
private static final String ERROR_LIMIT_EXCEEDED_MESSAGE =
"The error limit has been exceeded, "
+ "the check is aborting, there may be more unreported errors.";
/**
* Specify message which is used to notify about violations,
* if empty then the default (hard-coded) message is used.
*/
private String message;
/** Control whether to ignore matches found within comments. */
private boolean ignoreComments;
/** Control whether the pattern is required or illegal. */
private boolean illegalPattern;
/** Specify the maximum number of violations before the check will abort. */
private int errorLimit = DEFAULT_ERROR_LIMIT;
/**
* Control whether to check for duplicates of a required pattern,
* any negative value means no checking for duplicates,
* any positive value is used as the maximum number of allowed duplicates,
* if the limit is exceeded violations will be logged.
*/
private int duplicateLimit;
/** Boolean to say if we should check for duplicates. */
private boolean checkForDuplicates;
/** Tracks number of matches made. */
private int matchCount;
/** Tracks number of errors. */
private int errorCount;
/** Specify the pattern to match against. */
private Pattern format = Pattern.compile("^$", Pattern.MULTILINE);
/** The matcher. */
private Matcher matcher;
/**
* Setter to specify message which is used to notify about violations,
* if empty then the default (hard-coded) message is used.
*
* @param message custom message which should be used in report.
*/
public void setMessage(String message) {
this.message = message;
}
/**
* Setter to control whether to ignore matches found within comments.
*
* @param ignoreComments True if comments should be ignored.
*/
public void setIgnoreComments(boolean ignoreComments) {
this.ignoreComments = ignoreComments;
}
/**
* Setter to control whether the pattern is required or illegal.
*
* @param illegalPattern True if pattern is not allowed.
*/
public void setIllegalPattern(boolean illegalPattern) {
this.illegalPattern = illegalPattern;
}
/**
* Setter to specify the maximum number of violations before the check will abort.
*
* @param errorLimit the number of errors to report.
*/
public void setErrorLimit(int errorLimit) {
this.errorLimit = errorLimit;
}
/**
* Setter to control whether to check for duplicates of a required pattern,
* any negative value means no checking for duplicates,
* any positive value is used as the maximum number of allowed duplicates,
* if the limit is exceeded violations will be logged.
*
* @param duplicateLimit negative values mean no duplicate checking,
* any positive value is used as the limit.
*/
public void setDuplicateLimit(int duplicateLimit) {
this.duplicateLimit = duplicateLimit;
checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT;
}
/**
* Setter to specify the pattern to match against.
*
* @param pattern the new pattern
*/
public final void setFormat(Pattern pattern) {
format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE);
}
@Override
public int[] getDefaultTokens() {
return getRequiredTokens();
}
@Override
public int[] getAcceptableTokens() {
return getRequiredTokens();
}
@Override
public int[] getRequiredTokens() {
return CommonUtil.EMPTY_INT_ARRAY;
}
@Override
public void beginTree(DetailAST rootAST) {
matcher = format.matcher(getFileContents().getText().getFullText());
matchCount = 0;
errorCount = 0;
findMatch();
}
/** Recursive method that finds the matches. */
private void findMatch() {
final boolean foundMatch = matcher.find();
if (foundMatch) {
final FileText text = getFileContents().getText();
final LineColumn start = text.lineColumn(matcher.start());
final int startLine = start.getLine();
final boolean ignore = isIgnore(startLine, text, start);
if (!ignore) {
matchCount++;
if (illegalPattern || checkForDuplicates
&& matchCount - 1 > duplicateLimit) {
errorCount++;
logMessage(startLine);
}
}
if (canContinueValidation(ignore)) {
findMatch();
}
}
else if (!illegalPattern && matchCount == 0) {
logMessage(0);
}
}
/**
* Check if we can stop validation.
*
* @param ignore flag
* @return true is we can continue
*/
private boolean canContinueValidation(boolean ignore) {
return errorCount <= errorLimit - 1
&& (ignore || illegalPattern || checkForDuplicates);
}
/**
* Detect ignore situation.
*
* @param startLine position of line
* @param text file text
* @param start line column
* @return true is that need to be ignored
*/
private boolean isIgnore(int startLine, FileText text, LineColumn start) {
final LineColumn end;
if (matcher.end() == 0) {
end = text.lineColumn(0);
}
else {
end = text.lineColumn(matcher.end() - 1);
}
boolean ignore = false;
if (ignoreComments) {
final FileContents theFileContents = getFileContents();
final int startColumn = start.getColumn();
final int endLine = end.getLine();
final int endColumn = end.getColumn();
ignore = theFileContents.hasIntersectionWithComment(startLine,
startColumn, endLine, endColumn);
}
return ignore;
}
/**
* Displays the right message.
*
* @param lineNumber the line number the message relates to.
*/
private void logMessage(int lineNumber) {
String msg;
if (message == null || message.isEmpty()) {
msg = format.pattern();
}
else {
msg = message;
}
if (errorCount >= errorLimit) {
msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg;
}
if (illegalPattern) {
log(lineNumber, MSG_ILLEGAL_REGEXP, msg);
}
else {
if (lineNumber > 0) {
log(lineNumber, MSG_DUPLICATE_REGEXP, msg);
}
else {
log(lineNumber, MSG_REQUIRED_REGEXP, msg);
}
}
}
}