Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve non ascii checker #5643

Merged
merged 17 commits into from
Jan 10, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CONTRIBUTORS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -595,3 +595,6 @@ contributors:
* Eero Vuojolahti: contributor

* Kian-Meng, Ang: contributor

* Carli* Freudenberg (CarliJoy): contributor
- Improve non-ascii-name checker
8 changes: 8 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ Release date: TBA

Closes #5588

* Add ``non-ascii-identifier`` as replacement ``non-ascii-name``
to ensure really __all__ Python names are ASCII.
Checker now checks properly the names of imports (``non-ascii-module-import``) as
well for as of file names (``non-ascii-file-name``).
Non ASCII characters could be homoglyphs (look alike characters) and hard to
enter on a non specialized keyboard.
See `Confusable Characters in PEP 672 <https://www.python.org/dev/peps/pep-0672/#confusable-characters-in-identifiers>`_

* When run in parallel mode ``pylint`` now pickles the data passed to subprocesses with
the ``dill`` package. The ``dill`` package has therefore been added as a dependency.

Expand Down
9 changes: 9 additions & 0 deletions doc/whatsnew/2.13.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,17 @@ New checkers

Closes #5460

* Add ``non-ascii-identifier`` as replacement ``non-ascii-name``
to ensure really __all__ Python names are ASCII.
Checker now checks properly the names of imports (``non-ascii-module-import``) as
well for as of file names (``non-ascii-file-name``).
Non ASCII characters could be homoglyphs (look alike characters) and hard to
enter on a non specialized keyboard.
See `Confusable Characters in PEP 672 <https://www.python.org/dev/peps/pep-0672/#confusable-characters-in-identifiers>`_

Removed checkers
================
* ``non-ascii-name`` has replaced by ``non-ascii-identifier``

Extensions
==========
Expand Down
9 changes: 8 additions & 1 deletion pylint/checkers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,14 @@
15: stdlib
16: python3
17: refactoring
18-50: not yet used: reserved for future internal checkers.
.
.
.
25: non-ascii-names
25-50: not yet used: reserved for future internal checkers.
This file is not updated. Use
script/get_unused_message_id_category.py
to get the next free checker id.
51-99: perhaps used: reserved for external checkers

The raw_metrics checker has no number associated since it doesn't emit any
Expand Down
64 changes: 32 additions & 32 deletions pylint/checkers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
import itertools
import re
import sys
from typing import TYPE_CHECKING, Any, Dict, Iterator, Optional, Pattern, cast
from typing import TYPE_CHECKING, Any, Dict, Iterable, Iterator, Optional, Pattern, cast

import astroid
from astroid import nodes
Expand Down Expand Up @@ -1719,7 +1719,31 @@ def _create_naming_options():
return tuple(name_options)


class NameChecker(_BasicChecker):
class NameCheckerHelper:
"""Class containing functions required by NameChecker and NonAsciiNamesChecker"""

def _check_name(
self, node_type: str, name: str, node: nodes.NodeNG, optional_kwarg: Any = None
):
"""Only Dummy function will be overwritten by implementing classes

Note: kwarg arguments will be different in implementing classes
"""
raise NotImplementedError

def _recursive_check_names(self, args: Iterable[nodes.AssignName]):
"""check names in a possibly recursive list <arg>"""
for arg in args:
if isinstance(arg, nodes.AssignName):
self._check_name("argument", arg.name, arg)
else:
# pylint: disable-next=fixme
# TODO: Check if we can remove this if branch because of
# the up to date astroid version used
self._recursive_check_names(arg.elts)


class NameChecker(_BasicChecker, NameCheckerHelper):
msgs = {
"C0103": (
'%s name "%s" doesn\'t conform to %s',
Expand All @@ -1737,11 +1761,6 @@ class NameChecker(_BasicChecker):
]
},
),
"C0144": (
'%s name "%s" contains a non-ASCII unicode character',
"non-ascii-name",
"Used when the name contains at least one non-ASCII unicode character.",
),
"W0111": (
"Name %s will become a keyword in Python %s",
"assign-to-new-keyword",
Expand Down Expand Up @@ -1838,7 +1857,6 @@ def __init__(self, linter):
self._name_hints = {}
self._good_names_rgxs_compiled = []
self._bad_names_rgxs_compiled = []
self._non_ascii_rgx_compiled = re.compile("[^\u0000-\u007F]")

def open(self):
self.linter.stats.reset_bad_names()
Expand Down Expand Up @@ -1878,7 +1896,7 @@ def _create_naming_rules(self):

return regexps, hints

@utils.check_messages("disallowed-name", "invalid-name", "non-ascii-name")
@utils.check_messages("disallowed-name", "invalid-name")
def visit_module(self, node: nodes.Module) -> None:
self._check_name("module", node.name.split(".")[-1], node)
self._bad_names = {}
Expand All @@ -1904,19 +1922,15 @@ def leave_module(self, _: nodes.Module) -> None:
for args in warnings:
self._raise_name_warning(prevalent_group, *args)

@utils.check_messages(
"disallowed-name", "invalid-name", "assign-to-new-keyword", "non-ascii-name"
)
@utils.check_messages("disallowed-name", "invalid-name", "assign-to-new-keyword")
def visit_classdef(self, node: nodes.ClassDef) -> None:
self._check_assign_to_new_keyword_violation(node.name, node)
self._check_name("class", node.name, node)
for attr, anodes in node.instance_attrs.items():
if not any(node.instance_attr_ancestors(attr)):
self._check_name("attr", attr, anodes[0])

@utils.check_messages(
"disallowed-name", "invalid-name", "assign-to-new-keyword", "non-ascii-name"
)
@utils.check_messages("disallowed-name", "invalid-name", "assign-to-new-keyword")
def visit_functiondef(self, node: nodes.FunctionDef) -> None:
# Do not emit any warnings if the method is just an implementation
# of a base class method.
Expand Down Expand Up @@ -1944,14 +1958,12 @@ def visit_functiondef(self, node: nodes.FunctionDef) -> None:

visit_asyncfunctiondef = visit_functiondef

@utils.check_messages("disallowed-name", "invalid-name", "non-ascii-name")
@utils.check_messages("disallowed-name", "invalid-name")
def visit_global(self, node: nodes.Global) -> None:
for name in node.names:
self._check_name("const", name, node)

@utils.check_messages(
"disallowed-name", "invalid-name", "assign-to-new-keyword", "non-ascii-name"
)
@utils.check_messages("disallowed-name", "invalid-name", "assign-to-new-keyword")
def visit_assignname(self, node: nodes.AssignName) -> None:
"""check module level assigned names"""
self._check_assign_to_new_keyword_violation(node.name, node)
Expand Down Expand Up @@ -1991,14 +2003,6 @@ def visit_assignname(self, node: nodes.AssignName) -> None:
else:
self._check_name("class_attribute", node.name, node)

def _recursive_check_names(self, args):
"""check names in a possibly recursive list <arg>"""
for arg in args:
if isinstance(arg, nodes.AssignName):
self._check_name("argument", arg.name, arg)
else:
self._recursive_check_names(arg.elts)

def _find_name_group(self, node_type):
return self._name_group.get(node_type, node_type)

Expand Down Expand Up @@ -2039,13 +2043,9 @@ def _name_disallowed_by_regex(self, name: str) -> bool:
pattern.match(name) for pattern in self._bad_names_rgxs_compiled
)

# pylint: disable-next=arguments-renamed
def _check_name(self, node_type, name, node, confidence=interfaces.HIGH):
"""check for a name using the type's regexp"""
non_ascii_match = self._non_ascii_rgx_compiled.match(name)
if non_ascii_match is not None:
self._raise_name_warning(
None, node, node_type, name, confidence, warning="non-ascii-name"
)

def _should_exempt_from_invalid_name(node):
if node_type == "variable":
Expand Down
Loading