Skip to content

Commit

Permalink
remove non-ascii-checker
Browse files Browse the repository at this point in the history
  • Loading branch information
CarliJoy committed Jan 6, 2022
1 parent 01326cd commit dcf244a
Show file tree
Hide file tree
Showing 54 changed files with 232 additions and 806 deletions.
4 changes: 0 additions & 4 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@ Release date: TBA

Closes #5281

* ``non-ascii-name`` check was rewritten to ensure really __all__ Python names are ASCII
as non ASCII characters could be homoglyphs (look alike characters) and hard to
enter on a non specialized keyboard.

* Fixed false positive ``consider-using-dict-comprehension`` when creating a dict
using a list of tuples where key AND value vary depending on the same condition.

Expand Down
203 changes: 203 additions & 0 deletions base_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
# Copyright (c) 2006-2014 LOGILAB S.A. (Paris, FRANCE) <[email protected]>
# Copyright (c) 2013-2014 Google, Inc.
# Copyright (c) 2013 [email protected] <[email protected]>
# Copyright (c) 2014-2020 Claudiu Popa <[email protected]>
# Copyright (c) 2014 Brett Cannon <[email protected]>
# Copyright (c) 2014 Arun Persaud <[email protected]>
# Copyright (c) 2015 Ionel Cristian Maries <[email protected]>
# Copyright (c) 2016 Moises Lopez <[email protected]>
# Copyright (c) 2017-2018 Bryce Guinta <[email protected]>
# Copyright (c) 2018-2021 Pierre Sassoulas <[email protected]>
# Copyright (c) 2018 ssolanki <[email protected]>
# Copyright (c) 2019 Bruno P. Kinoshita <[email protected]>
# Copyright (c) 2020 hippo91 <[email protected]>
# Copyright (c) 2021 Daniël van Noord <[email protected]>
# Copyright (c) 2021 bot <[email protected]>
# Copyright (c) 2021 Marc Mueller <[email protected]>

# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
# For details: https://github.com/PyCQA/pylint/blob/main/LICENSE
import functools
from inspect import cleandoc
from typing import Any, Optional

from astroid import nodes

from pylint.config import OptionsProviderMixIn
from pylint.constants import _MSG_ORDER, WarningScope
from pylint.exceptions import InvalidMessageError
from pylint.interfaces import Confidence, IRawChecker, ITokenChecker, implements
from pylint.message.message_definition import MessageDefinition
from pylint.utils import get_rst_section, get_rst_title


@functools.total_ordering
class BaseChecker(OptionsProviderMixIn):

# checker name (you may reuse an existing one)
name: str = ""
# options level (0 will be displaying in --help, 1 in --long-help)
level = 1
# ordered list of options to control the checker behaviour
options: Any = ()
# messages issued by this checker
msgs: Any = {}
# reports issued by this checker
reports: Any = ()
# mark this checker as enabled or not.
enabled: bool = True

def __init__(self, linter=None):
"""checker instances should have the linter as argument
:param ILinter linter: is an object implementing ILinter.
"""
if self.name is not None:
self.name = self.name.lower()
super().__init__()
self.linter = linter

def __gt__(self, other):
"""Permit to sort a list of Checker by name."""
return f"{self.name}{self.msgs}".__gt__(f"{other.name}{other.msgs}")

def __repr__(self):
status = "Checker" if self.enabled else "Disabled checker"
msgs = "', '".join(self.msgs.keys())
return f"{status} '{self.name}' (responsible for '{msgs}')"

def __str__(self):
"""This might be incomplete because multiple class inheriting BaseChecker
can have the same name. Cf MessageHandlerMixIn.get_full_documentation()
"""
return self.get_full_documentation(
msgs=self.msgs, options=self.options_and_values(), reports=self.reports
)

def get_full_documentation(self, msgs, options, reports, doc=None, module=None):
result = ""
checker_title = f"{self.name.replace('_', ' ').title()} checker"
if module:
# Provide anchor to link against
result += f".. _{module}:\n\n"
result += f"{get_rst_title(checker_title, '~')}\n"
if module:
result += f"This checker is provided by ``{module}``.\n"
result += f"Verbatim name of the checker is ``{self.name}``.\n\n"
if doc:
# Provide anchor to link against
result += get_rst_title(f"{checker_title} Documentation", "^")
result += f"{cleandoc(doc)}\n\n"
# options might be an empty generator and not be False when cast to boolean
options = list(options)
if options:
result += get_rst_title(f"{checker_title} Options", "^")
result += f"{get_rst_section(None, options)}\n"
if msgs:
result += get_rst_title(f"{checker_title} Messages", "^")
for msgid, msg in sorted(
msgs.items(), key=lambda kv: (_MSG_ORDER.index(kv[0][0]), kv[1])
):
msg = self.create_message_definition_from_tuple(msgid, msg)
result += f"{msg.format_help(checkerref=False)}\n"
result += "\n"
if reports:
result += get_rst_title(f"{checker_title} Reports", "^")
for report in reports:
result += (
":%s: %s\n" % report[:2] # pylint: disable=consider-using-f-string
)
result += "\n"
result += "\n"
return result

def add_message(
self,
msgid: str,
line: Optional[int] = None,
node: Optional[nodes.NodeNG] = None,
args: Any = None,
confidence: Optional[Confidence] = None,
col_offset: Optional[int] = None,
end_lineno: Optional[int] = None,
end_col_offset: Optional[int] = None,
) -> None:
self.linter.add_message(
msgid, line, node, args, confidence, col_offset, end_lineno, end_col_offset
)

def check_consistency(self):
"""Check the consistency of msgid.
msg ids for a checker should be a string of len 4, where the two first
characters are the checker id and the two last the msg id in this
checker.
:raises InvalidMessageError: If the checker id in the messages are not
always the same.
"""
checker_id = None
existing_ids = []
for message in self.messages:
if checker_id is not None and checker_id != message.msgid[1:3]:
error_msg = "Inconsistent checker part in message id "
error_msg += f"'{message.msgid}' (expected 'x{checker_id}xx' "
error_msg += f"because we already had {existing_ids})."
raise InvalidMessageError(error_msg)
checker_id = message.msgid[1:3]
existing_ids.append(message.msgid)

def create_message_definition_from_tuple(self, msgid, msg_tuple):
if implements(self, (IRawChecker, ITokenChecker)):
default_scope = WarningScope.LINE
else:
default_scope = WarningScope.NODE
options = {}
if len(msg_tuple) > 3:
(msg, symbol, descr, options) = msg_tuple
elif len(msg_tuple) > 2:
(msg, symbol, descr) = msg_tuple
else:
error_msg = """Messages should have a msgid and a symbol. Something like this :
"W1234": (
"message",
"message-symbol",
"Message description with detail.",
...
),
"""
raise InvalidMessageError(error_msg)
options.setdefault("scope", default_scope)
return MessageDefinition(self, msgid, msg, descr, symbol, **options)

@property
def messages(self) -> list:
return [
self.create_message_definition_from_tuple(msgid, msg_tuple)
for msgid, msg_tuple in sorted(self.msgs.items())
]

# dummy methods implementing the IChecker interface

def get_message_definition(self, msgid):
for message_definition in self.messages:
if message_definition.msgid == msgid:
return message_definition
error_msg = f"MessageDefinition for '{msgid}' does not exists. "
error_msg += f"Choose from {[m.msgid for m in self.messages]}."
raise InvalidMessageError(error_msg)

def open(self):
"""called before visiting project (i.e. set of modules)"""

def close(self):
"""called after visiting project (i.e set of modules)"""


class BaseTokenChecker(BaseChecker):
"""Base class for checkers that want to have access to the token stream."""

def process_tokens(self, tokens):
"""Should be overridden by subclasses."""
raise NotImplementedError()
5 changes: 0 additions & 5 deletions doc/whatsnew/2.13.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,6 @@ New checkers

Closes #5281


* ``non-ascii-name`` check was rewritten to ensure really __all__ Python names are ASCII as
as non ASCII characters could be homoglyphs (look alike characters) and hard to
enter on a non specialized keyboard.

* ``unnecessary-ellipsis``: Emitted when the ellipsis constant is used unnecessarily.

Closes #5460
Expand Down
33 changes: 25 additions & 8 deletions pylint/checkers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
import astroid
from astroid import nodes

from pylint import checkers, interfaces
from pylint import checkers, constants, interfaces
from pylint import utils as lint_utils
from pylint.checkers import utils
from pylint.checkers.utils import (
Expand Down Expand Up @@ -1690,7 +1690,7 @@ def visit_for(self, node: nodes.For) -> None:
def _create_naming_options():
name_options = []
for name_type in sorted(KNOWN_NAME_TYPES):
human_readable_name = HUMAN_READABLE_TYPES[name_type]
human_readable_name = constants.HUMAN_READABLE_TYPES[name_type]
default_style = DEFAULT_NAMING_STYLES[name_type]
name_type = name_type.replace("_", "-")
name_options.append(
Expand Down Expand Up @@ -1737,6 +1737,11 @@ class NameChecker(_BasicChecker):
]
},
),
"C0144": (
'%s name "%s" contains a non-ASCII unicode character',
"non-ascii-name",
"Used when the name contains at least one non-ASCII unicode character.",
),
"W0111": (
"Name %s will become a keyword in Python %s",
"assign-to-new-keyword",
Expand Down Expand Up @@ -1833,6 +1838,7 @@ def __init__(self, linter):
self._name_hints = {}
self._good_names_rgxs_compiled = []
self._bad_names_rgxs_compiled = []
self._non_ascii_rgx_compiled = re.compile("[^\u0000-\u007F]")

def open(self):
self.linter.stats.reset_bad_names()
Expand Down Expand Up @@ -1872,7 +1878,7 @@ def _create_naming_rules(self):

return regexps, hints

@utils.check_messages("disallowed-name", "invalid-name")
@utils.check_messages("disallowed-name", "invalid-name", "non-ascii-name")
def visit_module(self, node: nodes.Module) -> None:
self._check_name("module", node.name.split(".")[-1], node)
self._bad_names = {}
Expand All @@ -1898,15 +1904,19 @@ def leave_module(self, _: nodes.Module) -> None:
for args in warnings:
self._raise_name_warning(prevalent_group, *args)

@utils.check_messages("disallowed-name", "invalid-name", "assign-to-new-keyword")
@utils.check_messages(
"disallowed-name", "invalid-name", "assign-to-new-keyword", "non-ascii-name"
)
def visit_classdef(self, node: nodes.ClassDef) -> None:
self._check_assign_to_new_keyword_violation(node.name, node)
self._check_name("class", node.name, node)
for attr, anodes in node.instance_attrs.items():
if not any(node.instance_attr_ancestors(attr)):
self._check_name("attr", attr, anodes[0])

@utils.check_messages("disallowed-name", "invalid-name", "assign-to-new-keyword")
@utils.check_messages(
"disallowed-name", "invalid-name", "assign-to-new-keyword", "non-ascii-name"
)
def visit_functiondef(self, node: nodes.FunctionDef) -> None:
# Do not emit any warnings if the method is just an implementation
# of a base class method.
Expand Down Expand Up @@ -1934,12 +1944,14 @@ def visit_functiondef(self, node: nodes.FunctionDef) -> None:

visit_asyncfunctiondef = visit_functiondef

@utils.check_messages("disallowed-name", "invalid-name")
@utils.check_messages("disallowed-name", "invalid-name", "non-ascii-name")
def visit_global(self, node: nodes.Global) -> None:
for name in node.names:
self._check_name("const", name, node)

@utils.check_messages("disallowed-name", "invalid-name", "assign-to-new-keyword")
@utils.check_messages(
"disallowed-name", "invalid-name", "assign-to-new-keyword", "non-ascii-name"
)
def visit_assignname(self, node: nodes.AssignName) -> None:
"""check module level assigned names"""
self._check_assign_to_new_keyword_violation(node.name, node)
Expand Down Expand Up @@ -1999,7 +2011,7 @@ def _raise_name_warning(
confidence,
warning: str = "invalid-name",
) -> None:
type_label = HUMAN_READABLE_TYPES[node_type]
type_label = constants.HUMAN_READABLE_TYPES[node_type]
hint = self._name_hints[node_type]
if prevalent_group:
# This happens in the multi naming match case. The expected
Expand Down Expand Up @@ -2029,6 +2041,11 @@ def _name_disallowed_by_regex(self, name: str) -> bool:

def _check_name(self, node_type, name, node, confidence=interfaces.HIGH):
"""check for a name using the type's regexp"""
non_ascii_match = self._non_ascii_rgx_compiled.match(name)
if non_ascii_match is not None:
self._raise_name_warning(
None, node, node_type, name, confidence, warning="non-ascii-name"
)

def _should_exempt_from_invalid_name(node):
if node_type == "variable":
Expand Down
Loading

0 comments on commit dcf244a

Please sign in to comment.