Newer
Older
error(filename, linenum, 'readability/braces', 5,
'If an else has a brace on one side, it should have it on both')
else: # common case: else not followed by a multi-line if
error(filename, linenum, 'readability/braces', 5,
'If an else has a brace on one side, it should have it on both')
# Likewise, an else should never have the else clause on the same line
if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
error(filename, linenum, 'whitespace/newline', 4,
'Else clause should never be on same line as else (use 2 lines)')
# In the same way, a do/while should never be on one line
if Match(r'\s*do [^\s{]', line):
error(filename, linenum, 'whitespace/newline', 4,
'do/while clauses should not be on a single line')
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
# Block bodies should not be followed by a semicolon. Due to C++11
# brace initialization, there are more places where semicolons are
# required than not, so we use a whitelist approach to check these
# rather than a blacklist. These are the places where "};" should
# be replaced by just "}":
# 1. Some flavor of block following closing parenthesis:
# for (;;) {};
# while (...) {};
# switch (...) {};
# Function(...) {};
# if (...) {};
# if (...) else if (...) {};
#
# 2. else block:
# if (...) else {};
#
# 3. const member function:
# Function(...) const {};
#
# 4. Block following some statement:
# x = 42;
# {};
#
# 5. Block at the beginning of a function:
# Function(...) {
# {};
# }
#
# Note that naively checking for the preceding "{" will also match
# braces inside multi-dimensional arrays, but this is fine since
# that expression will not contain semicolons.
#
# 6. Block following another block:
# while (true) {}
# {};
#
# 7. End of namespaces:
# namespace {};
#
# These semicolons seems far more common than other kinds of
# redundant semicolons, possibly due to people converting classes
# to namespaces. For now we do not warn for this case.
#
# Try matching case 1 first.
match = Match(r'^(.*\)\s*)\{', line)
if match:
# Matched closing parenthesis (case 1). Check the token before the
# matching opening parenthesis, and don't warn if it looks like a
# macro. This avoids these false positives:
# - macro that defines a base class
# - multi-line macro that defines a base class
# - macro that defines the whole class-head
#
# But we still issue warnings for macros that we know are safe to
# warn, specifically:
# - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
# - TYPED_TEST
# - INTERFACE_DEF
# - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
#
# We implement a whitelist of safe macros instead of a blacklist of
# unsafe macros, even though the latter appears less frequently in
# google code and would have been easier to implement. This is because
# the downside for getting the whitelist wrong means some extra
# semicolons, while the downside for getting the blacklist wrong
# would result in compile errors.
#
# In addition to macros, we also don't want to warn on compound
# literals.
closing_brace_pos = match.group(1).rfind(')')
opening_parenthesis = ReverseCloseExpression(
clean_lines, linenum, closing_brace_pos)
if opening_parenthesis[2] > -1:
line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
macro = Search(r'\b([A-Z_]+)\s*$', line_prefix)
if ((macro and
macro.group(1) not in (
'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
Search(r'\s+=\s*$', line_prefix)):
match = None
else:
# Try matching cases 2-3.
match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line)
if not match:
# Try matching cases 4-6. These are always matched on separate lines.
#
# Note that we can't simply concatenate the previous line to the
# current line and do a single match, otherwise we may output
# duplicate warnings for the blank line case:
# if (cond) {
# // blank line
# }
prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
if prevline and Search(r'[;{}]\s*$', prevline):
match = Match(r'^(\s*)\{', line)
# Check matching closing brace
if match:
(endline, endlinenum, endpos) = CloseExpression(
clean_lines, linenum, len(match.group(1)))
if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
# Current {} pair is eligible for semicolon check, and we have found
# the redundant semicolon, output warning here.
#
# Note: because we are scanning forward for opening braces, and
# outputting warnings for the matching closing brace, if there are
# nested blocks with trailing semicolons, we will get the error
# messages in reversed order.
error(filename, endlinenum, 'readability/braces', 4,
"You don't need a ; after a }")
def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
"""Look for empty loop/conditional body with only a single semicolon.
Args:
filename: The name of the current file.
clean_lines: A CleansedLines instance containing the file.
linenum: The number of the line to check.
error: The function to call with any errors found.
"""
# Search for loop keywords at the beginning of the line. Because only
# whitespaces are allowed before the keywords, this will also ignore most
# do-while-loops, since those lines should start with closing brace.
#
# We also check "if" blocks here, since an empty conditional block
# is likely an error.
matched = Match(r'\s*(for|while|if)\s*\(', line)
if matched:
# Find the end of the conditional expression
(end_line, end_linenum, end_pos) = CloseExpression(
clean_lines, linenum, line.find('('))
# Output warning if what follows the condition expression is a semicolon.
# No warning for all other cases, including whitespace or newline, since we
# have a separate check for semicolons preceded by whitespace.
if end_pos >= 0 and Match(r';', end_line[end_pos:]):
if matched.group(1) == 'if':
error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
'Empty conditional bodies should use {}')
else:
error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
'Empty loop bodies should use {} or continue')
def CheckCheck(filename, clean_lines, linenum, error):
"""Checks the use of CHECK and EXPECT macros.
Args:
filename: The name of the current file.
clean_lines: A CleansedLines instance containing the file.
linenum: The number of the line to check.
error: The function to call with any errors found.
"""
# Decide the set of replacement macros that should be suggested
lines = clean_lines.elided
check_macro = None
start_pos = -1
for macro in _CHECK_MACROS:
i = lines[linenum].find(macro)
if i >= 0:
check_macro = macro
# Find opening parenthesis. Do a regular expression match here
# to make sure that we are matching the expected CHECK macro, as
# opposed to some other macro that happens to contain the CHECK
# substring.
matched = Match(r'^(.*\b' + check_macro + r'\s*)\(', lines[linenum])
if not matched:
continue
start_pos = len(matched.group(1))
# Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
return
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
# Find end of the boolean expression by matching parentheses
(last_line, end_line, end_pos) = CloseExpression(
clean_lines, linenum, start_pos)
if end_pos < 0:
return
if linenum == end_line:
expression = lines[linenum][start_pos + 1:end_pos - 1]
else:
expression = lines[linenum][start_pos + 1:]
for i in xrange(linenum + 1, end_line):
expression += lines[i]
expression += last_line[0:end_pos - 1]
# Parse expression so that we can take parentheses into account.
# This avoids false positives for inputs like "CHECK((a < 4) == b)",
# which is not replaceable by CHECK_LE.
lhs = ''
rhs = ''
operator = None
while expression:
matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||'
r'==|!=|>=|>|<=|<|\()(.*)$', expression)
if matched:
token = matched.group(1)
if token == '(':
# Parenthesized operand
expression = matched.group(2)
(end, _) = FindEndOfExpressionInLine(expression, 0, 1, '(', ')')
if end < 0:
return # Unmatched parenthesis
lhs += '(' + expression[0:end]
expression = expression[end:]
elif token in ('&&', '||'):
# Logical and/or operators. This means the expression
# contains more than one term, for example:
# CHECK(42 < a && a < b);
#
# These are not replaceable with CHECK_LE, so bail out early.
return
elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
# Non-relational operator
lhs += token
expression = matched.group(2)
else:
# Relational operator
operator = token
rhs = matched.group(2)
break
else:
# Unparenthesized operand. Instead of appending to lhs one character
# at a time, we do another regular expression match to consume several
# characters at once if possible. Trivial benchmark shows that this
# is more efficient when the operands are longer than a single
# character, which is generally the case.
matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression)
if not matched:
matched = Match(r'^(\s*\S)(.*)$', expression)
if not matched:
break
lhs += matched.group(1)
expression = matched.group(2)
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
# Only apply checks if we got all parts of the boolean expression
if not (lhs and operator and rhs):
return
# Check that rhs do not contain logical operators. We already know
# that lhs is fine since the loop above parses out && and ||.
if rhs.find('&&') > -1 or rhs.find('||') > -1:
return
# At least one of the operands must be a constant literal. This is
# to avoid suggesting replacements for unprintable things like
# CHECK(variable != iterator)
#
# The following pattern matches decimal, hex integers, strings, and
# characters (in that order).
lhs = lhs.strip()
rhs = rhs.strip()
match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$'
if Match(match_constant, lhs) or Match(match_constant, rhs):
# Note: since we know both lhs and rhs, we can provide a more
# descriptive error message like:
# Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
# Instead of:
# Consider using CHECK_EQ instead of CHECK(a == b)
#
# We are still keeping the less descriptive message because if lhs
# or rhs gets long, the error message might become unreadable.
error(filename, linenum, 'readability/check', 2,
'Consider using %s instead of %s(a %s b)' % (
_CHECK_REPLACEMENT[check_macro][operator],
check_macro, operator))
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
def CheckAltTokens(filename, clean_lines, linenum, error):
"""Check alternative keywords being used in boolean expressions.
Args:
filename: The name of the current file.
clean_lines: A CleansedLines instance containing the file.
linenum: The number of the line to check.
error: The function to call with any errors found.
"""
line = clean_lines.elided[linenum]
# Avoid preprocessor lines
if Match(r'^\s*#', line):
return
# Last ditch effort to avoid multi-line comments. This will not help
# if the comment started before the current line or ended after the
# current line, but it catches most of the false positives. At least,
# it provides a way to workaround this warning for people who use
# multi-line comments in preprocessor macros.
#
# TODO(unknown): remove this once cpplint has better support for
# multi-line comments.
if line.find('/*') >= 0 or line.find('*/') >= 0:
return
for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
error(filename, linenum, 'readability/alt_tokens', 2,
'Use operator %s instead of %s' % (
_ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
def GetLineWidth(line):
"""Determines the width of the line in column positions.
Args:
line: A string, which may be a Unicode string.
Returns:
The width of the line in column positions, accounting for Unicode
combining characters and wide characters.
"""
if isinstance(line, unicode):
width = 0
for uc in unicodedata.normalize('NFC', line):
if unicodedata.east_asian_width(uc) in ('W', 'F'):
width += 2
elif not unicodedata.combining(uc):
width += 1
return width
else:
return len(line)
def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
error):
"""Checks rules from the 'C++ style rules' section of cppguide.html.
Most of these rules are hard to test (naming, comment style), but we
do what we can. In particular we check for 2-space indents, line lengths,
tab usage, spaces inside code, etc.
Args:
filename: The name of the current file.
clean_lines: A CleansedLines instance containing the file.
linenum: The number of the line to check.
file_extension: The extension (without the dot) of the filename.
nesting_state: A _NestingState instance which maintains information about
the current stack of nested blocks being parsed.
error: The function to call with any errors found.
"""
# Don't use "elided" lines here, otherwise we can't check commented lines.
# Don't want to use "raw" either, because we don't want to check inside C++11
# raw strings,
raw_lines = clean_lines.lines_without_raw_strings
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
line = raw_lines[linenum]
if line.find('\t') != -1:
error(filename, linenum, 'whitespace/tab', 1,
'Tab found; better to use spaces')
# One or three blank spaces at the beginning of the line is weird; it's
# hard to reconcile that with 2-space indents.
# NOTE: here are the conditions rob pike used for his tests. Mine aren't
# as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
# if(RLENGTH > 20) complain = 0;
# if(match($0, " +(error|private|public|protected):")) complain = 0;
# if(match(prev, "&& *$")) complain = 0;
# if(match(prev, "\\|\\| *$")) complain = 0;
# if(match(prev, "[\",=><] *$")) complain = 0;
# if(match($0, " <<")) complain = 0;
# if(match(prev, " +for \\(")) complain = 0;
# if(prevodd && match(prevprev, " +for \\(")) complain = 0;
initial_spaces = 0
cleansed_line = clean_lines.elided[linenum]
while initial_spaces < len(line) and line[initial_spaces] == ' ':
initial_spaces += 1
if line and line[-1].isspace():
error(filename, linenum, 'whitespace/end_of_line', 4,
'Line ends in whitespace. Consider deleting these extra spaces.')
# There are certain situations we allow one space, notably for section labels
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
elif ((initial_spaces == 1 or initial_spaces == 3) and
not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
error(filename, linenum, 'whitespace/indent', 3,
'Weird number of spaces at line-start. '
'Are you using a 2-space indent?')
# Check if the line is a header guard.
is_header_guard = False
if file_extension == 'h':
cppvar = GetHeaderGuardCPPVariable(filename)
if (line.startswith('#ifndef %s' % cppvar) or
line.startswith('#define %s' % cppvar) or
line.startswith('#endif // %s' % cppvar)):
is_header_guard = True
# #include lines and header guards can be long, since there's no clean way to
# split them.
#
# URLs can be long too. It's possible to split these, but it makes them
# harder to cut&paste.
#
# The "$Id:...$" comment may also get very long without it being the
# developers fault.
if (not line.startswith('#include') and not is_header_guard and
not Match(r'^\s*//.*http(s?)://\S*$', line) and
not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
line_width = GetLineWidth(line)
extended_length = int((_line_length * 1.25))
if line_width > extended_length:
error(filename, linenum, 'whitespace/line_length', 4,
'Lines should very rarely be longer than %i characters' %
extended_length)
elif line_width > _line_length:
error(filename, linenum, 'whitespace/line_length', 2,
if (cleansed_line.count(';') > 1 and
# for loops are allowed two ;'s (and may run over two lines).
cleansed_line.find('for') == -1 and
(GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
# It's ok to have many commands in a switch case that fits in 1 line
not ((cleansed_line.find('case ') != -1 or
cleansed_line.find('default:') != -1) and
cleansed_line.find('break;') != -1)):
'More than one command on the same line')
# Some more style checks
CheckBraces(filename, clean_lines, linenum, error)
CheckAccess(filename, clean_lines, linenum, nesting_state, error)
CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
CheckCheck(filename, clean_lines, linenum, error)
CheckAltTokens(filename, clean_lines, linenum, error)
classinfo = nesting_state.InnermostClass()
if classinfo:
CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
# Matches the first component of a filename delimited by -s and _s. That is:
# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
def _DropCommonSuffixes(filename):
"""Drops common suffixes like _test.cc or -inl.h from filename.
For example:
>>> _DropCommonSuffixes('foo/foo-inl.h')
'foo/foo'
>>> _DropCommonSuffixes('foo/bar/foo.cc')
'foo/bar/foo'
>>> _DropCommonSuffixes('foo/foo_internal.h')
'foo/foo'
>>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
'foo/foo_unusualinternal'
Args:
filename: The input filename.
Returns:
The filename with the common suffix removed.
"""
for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
'inl.h', 'impl.h', 'internal.h'):
if (filename.endswith(suffix) and len(filename) > len(suffix) and
filename[-len(suffix) - 1] in ('-', '_')):
return filename[:-len(suffix) - 1]
return os.path.splitext(filename)[0]
def _IsTestFilename(filename):
"""Determines if the given filename has a suffix that identifies it as a test.
Args:
filename: The input filename.
Returns:
True if 'filename' looks like a test, False otherwise.
"""
if (filename.endswith('_test.cc') or
filename.endswith('_unittest.cc') or
filename.endswith('_regtest.cc')):
return True
else:
return False
def _ClassifyInclude(fileinfo, include, is_system):
"""Figures out what kind of header 'include' is.
Args:
fileinfo: The current file cpplint is running over. A FileInfo instance.
include: The path to a #included file.
is_system: True if the #include used <> rather than "".
Returns:
One of the _XXX_HEADER constants.
For example:
>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
_C_SYS_HEADER
>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
_CPP_SYS_HEADER
>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
_LIKELY_MY_HEADER
>>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
... 'bar/foo_other_ext.h', False)
_POSSIBLE_MY_HEADER
>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
_OTHER_HEADER
"""
# This is a list of all standard c++ header files, except
# those already checked for above.
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
if is_system:
if is_cpp_h:
return _CPP_SYS_HEADER
else:
return _C_SYS_HEADER
# If the target file and the include we're checking share a
# basename when we drop common extensions, and the include
# lives in . , then it's likely to be owned by the target file.
target_dir, target_base = (
os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
if target_base == include_base and (
include_dir == target_dir or
include_dir == os.path.normpath(target_dir + '/../public')):
return _LIKELY_MY_HEADER
# If the target and include share some initial basename
# component, it's possible the target is implementing the
# include, so it's allowed to be first, but we'll never
# complain if it's not there.
target_first_component = _RE_FIRST_COMPONENT.match(target_base)
include_first_component = _RE_FIRST_COMPONENT.match(include_base)
if (target_first_component and include_first_component and
target_first_component.group(0) ==
include_first_component.group(0)):
return _POSSIBLE_MY_HEADER
return _OTHER_HEADER
def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
"""Check rules that are applicable to #include lines.
Strings on #include lines are NOT removed from elided line, to make
certain tasks easier. However, to prevent false positives, checks
applicable to #include lines in CheckLanguage must be put here.
Args:
filename: The name of the current file.
clean_lines: A CleansedLines instance containing the file.
linenum: The number of the line to check.
include_state: An _IncludeState instance in which the headers are inserted.
error: The function to call with any errors found.
"""
fileinfo = FileInfo(filename)
line = clean_lines.lines[linenum]
# "include" should use the new style "foo/bar.h" instead of just "bar.h"
if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
error(filename, linenum, 'build/include', 4,
'Include the directory when naming .h files')
# we shouldn't include a file more than once. actually, there are a
# handful of instances where doing so is okay, but in general it's
# not.
match = _RE_PATTERN_INCLUDE.search(line)
if match:
include = match.group(2)
is_system = (match.group(1) == '<')
if include in include_state:
error(filename, linenum, 'build/include', 4,
'"%s" already included at %s:%s' %
(include, filename, include_state[include]))
else:
include_state[include] = linenum
# We want to ensure that headers appear in the right order:
# 1) for foo.cc, foo.h (preferred location)
# 2) c system files
# 3) cpp system files
# 4) for foo.cc, foo.h (deprecated location)
# 5) other google headers
#
# We classify each include statement as one of those 5 types
# using a number of techniques. The include_state object keeps
# track of the highest type seen, and complains if we see a
# lower type after that.
error_message = include_state.CheckNextIncludeOrder(
_ClassifyInclude(fileinfo, include, is_system))
if error_message:
error(filename, linenum, 'build/include_order', 4,
'%s. Should be: %s.h, c system, c++ system, other.' %
(error_message, fileinfo.BaseName()))
canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
if not include_state.IsInAlphabeticalOrder(
clean_lines, linenum, canonical_include):
error(filename, linenum, 'build/include_alpha', 4,
'Include "%s" not in alphabetical order' % include)
# Look for any of the stream classes that are part of standard C++.
match = _RE_PATTERN_INCLUDE.match(line)
if match:
include = match.group(2)
if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
# Many unit tests use cout, so we exempt them.
if not _IsTestFilename(filename):
error(filename, linenum, 'readability/streams', 3,
'Streams are highly discouraged.')
def _GetTextInside(text, start_pattern):
r"""Retrieves all the text between matching open and close parentheses.
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
Given a string of lines and a regular expression string, retrieve all the text
following the expression and between opening punctuation symbols like
(, [, or {, and the matching close-punctuation symbol. This properly nested
occurrences of the punctuations, so for the text like
printf(a(), b(c()));
a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
start_pattern must match string having an open punctuation symbol at the end.
Args:
text: The lines to extract text. Its comments and strings must be elided.
It can be single line and can span multiple lines.
start_pattern: The regexp string indicating where to start extracting
the text.
Returns:
The extracted text.
None if either the opening string or ending punctuation could not be found.
"""
# TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
# rewritten to use _GetTextInside (and use inferior regexp matching today).
# Give opening punctuations to get the matching close-punctuations.
matching_punctuation = {'(': ')', '{': '}', '[': ']'}
closing_punctuation = set(matching_punctuation.itervalues())
# Find the position to start extracting text.
match = re.search(start_pattern, text, re.M)
if not match: # start_pattern not found in text.
return None
start_position = match.end(0)
assert start_position > 0, (
'start_pattern must ends with an opening punctuation.')
assert text[start_position - 1] in matching_punctuation, (
'start_pattern must ends with an opening punctuation.')
# Stack of closing punctuations we expect to have in text after position.
punctuation_stack = [matching_punctuation[text[start_position - 1]]]
position = start_position
while punctuation_stack and position < len(text):
if text[position] == punctuation_stack[-1]:
punctuation_stack.pop()
elif text[position] in closing_punctuation:
# A closing punctuation without matching opening punctuations.
return None
elif text[position] in matching_punctuation:
punctuation_stack.append(matching_punctuation[text[position]])
position += 1
if punctuation_stack:
# Opening punctuations left without matching close-punctuations.
return None
# punctuations match.
return text[start_position:position - 1]
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
# Patterns for matching call-by-reference parameters.
#
# Supports nested templates up to 2 levels deep using this messy pattern:
# < (?: < (?: < [^<>]*
# >
# | [^<>] )*
# >
# | [^<>] )*
# >
_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]*
_RE_PATTERN_TYPE = (
r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
r'(?:\w|'
r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|'
r'::)+')
# A call-by-reference parameter ends with '& identifier'.
_RE_PATTERN_REF_PARAM = re.compile(
r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
# A call-by-const-reference parameter either ends with 'const& identifier'
# or looks like 'const type& identifier' when 'type' is atomic.
_RE_PATTERN_CONST_REF_PARAM = (
r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT +
r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
def CheckLanguage(filename, clean_lines, linenum, file_extension,
include_state, nesting_state, error):
"""Checks rules from the 'C++ language rules' section of cppguide.html.
Some of these rules are hard to test (function overloading, using
uint32 inappropriately), but we do the best we can.
Args:
filename: The name of the current file.
clean_lines: A CleansedLines instance containing the file.
linenum: The number of the line to check.
file_extension: The extension (without the dot) of the filename.
include_state: An _IncludeState instance in which the headers are inserted.
nesting_state: A _NestingState instance which maintains information about
the current stack of nested blocks being parsed.
error: The function to call with any errors found.
"""
# If the line is empty or consists of entirely a comment, no need to
# check it.
line = clean_lines.elided[linenum]
if not line:
return
match = _RE_PATTERN_INCLUDE.search(line)
if match:
CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
return
# Reset include state across preprocessor directives. This is meant
# to silence warnings for conditional includes.
if Match(r'^\s*#\s*(?:ifdef|elif|else|endif)\b', line):
include_state.ResetSection()
# Make Windows paths like Unix.
fullname = os.path.abspath(filename).replace('\\', '/')
# TODO(unknown): figure out if they're using default arguments in fn proto.
# Check to see if they're using an conversion function cast.
# I just try to capture the most common basic types, though there are more.
# Parameterless conversion functions, such as bool(), are allowed as they are
# probably a member operator declaration or default constructor.
match = Search(
r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there
r'(int|float|double|bool|char|int32|uint32|int64|uint64)'
r'(\([^)].*)', line)
matched_new = match.group(1)
matched_type = match.group(2)
matched_funcptr = match.group(3)
# gMock methods are defined using some variant of MOCK_METHODx(name, type)
# where type may be float(), int(string), etc. Without context they are
# virtually indistinguishable from int(x) casts. Likewise, gMock's
# MockCallback takes a template parameter of the form return_type(arg_type),
# which looks much like the cast we're trying to detect.
#
# std::function<> wrapper has a similar problem.
#
# Return types for function pointers also look like casts if they
# don't have an extra space.
if (matched_new is None and # If new operator, then this isn't a cast
not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
Search(r'\bMockCallback<.*>', line) or
Search(r'\bstd::function<.*>', line)) and
not (matched_funcptr and
Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(',
matched_funcptr))):
# Try a bit harder to catch gmock lines: the only place where
# something looks like an old-style cast is where we declare the
# return type of the mocked method, and the only time when we
# are missing context is if MOCK_METHOD was split across
# multiple lines. The missing MOCK_METHOD is usually one or two
# lines back, so scan back one or two lines.
#
# It's not possible for gmock macros to appear in the first 2
# lines, since the class head + section name takes up 2 lines.
if (linenum < 2 or
not (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$',
clean_lines.elided[linenum - 1]) or
Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$',
clean_lines.elided[linenum - 2]))):
error(filename, linenum, 'readability/casting', 4,
'Using deprecated casting style. '
'Use static_cast<%s>(...) instead' %
CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
'static_cast',
r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
# This doesn't catch all cases. Consider (const char * const)"hello".
#
# (char *) "foo" should always be a const_cast (reinterpret_cast won't
# compile).
if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
'const_cast', r'\((char\s?\*+\s?)\)\s*"', error):
pass
else:
# Check pointer casts for other than string constants
CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
# In addition, we look for people taking the address of a cast. This
# is dangerous -- casts can assign to temporaries, so the pointer doesn't
# point where you think.
match = Search(
r'(?:&\(([^)]+)\)[\w(])|'
r'(?:&(static|dynamic|down|reinterpret)_cast\b)', line)
if match and match.group(1) != '*':
error(filename, linenum, 'runtime/casting', 4,
('Are you taking an address of a cast? '
'This is dangerous: could be a temp var. '
'Take the address before doing the cast, rather than after'))
# Create an extended_line, which is the concatenation of the current and
# next lines, for more effective checking of code that may span more than one
# line.
if linenum + 1 < clean_lines.NumLines():
extended_line = line + clean_lines.elided[linenum + 1]
else:
extended_line = line
# Check for people declaring static/global STL strings at the top level.
# This is dangerous because the C++ language does not guarantee that
# globals with constructors are initialized before the first access.
match = Match(
r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
line)
# Make sure it's not a function.
# Function template specialization looks like: "string foo<Type>(...".
# Class template definitions look like: "string Foo<Type>::Method(...".
#
# Also ignore things that look like operators. These are matched separately
# because operator names cross non-word boundaries. If we change the pattern
# above, we would decrease the accuracy of matching identifiers.
if (match and
not Search(r'\boperator\W', line) and
not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)', match.group(3))):
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
error(filename, linenum, 'runtime/string', 4,
'For a static/global string constant, use a C style string instead: '
'"%schar %s[]".' %
(match.group(1), match.group(2)))
if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
error(filename, linenum, 'runtime/init', 4,
'You seem to be initializing a member variable with itself.')
if file_extension == 'h':
# TODO(unknown): check that 1-arg constructors are explicit.
# How to tell it's a constructor?
# (handled in CheckForNonStandardConstructs for now)
# TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
# (level 1 error)
pass
# Check if people are using the verboten C basic types. The only exception
# we regularly allow is "unsigned short port" for port.
if Search(r'\bshort port\b', line):
if not Search(r'\bunsigned short port\b', line):
error(filename, linenum, 'runtime/int', 4,
'Use "unsigned short" for ports, not "short"')
else:
match = Search(r'\b(short|long(?! +double)|long long)\b', line)
if match:
error(filename, linenum, 'runtime/int', 4,
'Use int16/int64/etc, rather than the C type %s' % match.group(1))
# When snprintf is used, the second argument shouldn't be a literal.
match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
if match and match.group(2) != '0':
# If 2nd arg is zero, snprintf is used to calculate size.
error(filename, linenum, 'runtime/printf', 3,
'If you can, use sizeof(%s) instead of %s as the 2nd arg '
'to snprintf.' % (match.group(1), match.group(2)))
# Check if some verboten C functions are being used.
if Search(r'\bsprintf\b', line):
error(filename, linenum, 'runtime/printf', 5,
'Never use sprintf. Use snprintf instead.')
match = Search(r'\b(strcpy|strcat)\b', line)
if match:
error(filename, linenum, 'runtime/printf', 4,
'Almost always, snprintf is better than %s' % match.group(1))
# Check if some verboten operator overloading is going on
# TODO(unknown): catch out-of-line unary operator&:
# class X {};
# int operator&(const X& x) { return 42; } // unary operator&
# The trick is it's hard to tell apart from binary operator&:
# class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
if Search(r'\boperator\s*&\s*\(\s*\)', line):
error(filename, linenum, 'runtime/operator', 4,
'Unary operator& is dangerous. Do not use it.')
# Check for suspicious usage of "if" like
# } if (a == b) {
if Search(r'\}\s*if\s*\(', line):
error(filename, linenum, 'readability/braces', 4,
'Did you mean "else if"? If not, start a new line for "if".')
# Check for potential format string bugs like printf(foo).
# We constrain the pattern not to pick things like DocidForPrintf(foo).
# Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
# TODO(sugawarayu): Catch the following case. Need to change the calling
# convention of the whole function to process multiple line to handle it.
# printf(
# boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
if printf_args:
match = Match(r'([\w.\->()]+)$', printf_args)
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
function_name = re.search(r'\b((?:string)?printf)\s*\(',
line, re.I).group(1)
error(filename, linenum, 'runtime/printf', 4,
'Potential format string bug. Do %s("%%s", %s) instead.'
% (function_name, match.group(1)))
# Check for potential memset bugs like memset(buf, sizeof(buf), 0).
match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
error(filename, linenum, 'runtime/memset', 4,
'Did you mean "memset(%s, 0, %s)"?'
% (match.group(1), match.group(2)))
if Search(r'\busing namespace\b', line):
error(filename, linenum, 'build/namespaces', 5,
'Do not use namespace using-directives. '
'Use using-declarations instead.')
# Detect variable-length arrays.
match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
match.group(3).find(']') == -1):
# Split the size using space and arithmetic operators as delimiters.
# If any of the resulting tokens are not compile time constants then
# report the error.
tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
is_const = True
skip_next = False
for tok in tokens:
if skip_next:
skip_next = False
continue
if Search(r'sizeof\(.+\)', tok): continue
if Search(r'arraysize\(\w+\)', tok): continue
tok = tok.lstrip('(')
tok = tok.rstrip(')')
if not tok: continue
if Match(r'\d+', tok): continue
if Match(r'0[xX][0-9a-fA-F]+', tok): continue
if Match(r'k[A-Z0-9]\w*', tok): continue
if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
# A catch all for tricky sizeof cases, including 'sizeof expression',
# 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
# requires skipping the next token because we split on ' ' and '*'.
if tok.startswith('sizeof'):
skip_next = True
continue
is_const = False
break
if not is_const:
error(filename, linenum, 'runtime/arrays', 1,
'Do not use variable-length arrays. Use an appropriately named '
"('k' followed by CamelCase) compile-time constant for the size.")
# If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
# DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
# in the class declaration.
match = Match(
(r'\s*'
r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
r'\(.*\);$'),
line)
if match and linenum + 1 < clean_lines.NumLines():