Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented new check use-maxsplit-arg #4469

Merged
merged 27 commits into from
May 22, 2021
Merged
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
94cc989
Implemented new check ``consider-using-str-partition``
yushao2 May 12, 2021
3173a45
Changed source of lint_module_test due to new checkers breaking
yushao2 May 12, 2021
3b36d2a
Improved help text for ``consider-using-str-partition``
yushao2 May 12, 2021
cc61bf5
Added additional tests
yushao2 May 12, 2021
eeb45d6
Improved on help text logic and accompanying tests
yushao2 May 12, 2021
943bd58
Update docs
yushao2 May 12, 2021
5963aac
Reworked consider-using-str-partition
yushao2 May 13, 2021
64cba0c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 13, 2021
860784e
Made checking subscript compatible with < py3.9
yushao2 May 13, 2021
35854ad
Refactored subscript checking to a utils function
yushao2 May 13, 2021
e4b31b6
Reworked and simplified ``consider-using-str-partition``
yushao2 May 14, 2021
47c8d41
Merge branch 'master' into checkers-4440
Pierre-Sassoulas May 17, 2021
3c8b0f3
Update ChangeLog
yushao2 May 18, 2021
cd86ca0
Updated PR from review
yushao2 May 18, 2021
23e82eb
Refactored if-else logic in consider-using-str-partition
yushao2 May 18, 2021
90b9b8d
updated func tests output file
yushao2 May 18, 2021
91e443b
Changes from code review
yushao2 May 19, 2021
ad1284d
Reworked logic based on review
yushao2 May 20, 2021
99dbcaf
Reviewed PR based on comments
yushao2 May 21, 2021
f60042a
Edited helptext
yushao2 May 21, 2021
35e2674
Update tests/functional/c/consider/consider_using_maxsplit_arg.py
yushao2 May 21, 2021
4922651
Updated func test results
yushao2 May 21, 2021
a1f3337
Renamed check to use-maxsplit-arg
yushao2 May 21, 2021
d46b8f1
Small formatting changes
cdce8p May 21, 2021
22824dd
Fixed escaping special characters issue
yushao2 May 22, 2021
100b341
Edited logic for helptext based on PR review comments
yushao2 May 22, 2021
7e640f5
Last changes
cdce8p May 22, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Reworked consider-using-str-partition
yushao2 committed May 13, 2021
commit 5963aac7a45fe2b37e9712cf9139b5c2802b64b3
236 changes: 133 additions & 103 deletions pylint/checkers/refactoring/recommendation_checker.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
# For details: https://github.com/PyCQA/pylint/blob/master/LICENSE
import re
from typing import cast

import astroid
@@ -37,12 +36,14 @@ class RecommendationChecker(checkers.BaseChecker):
"method of the dictionary instead.",
),
"C0207": (
"Consider using %s[%d] instead",
"Consider using %s instead",
"consider-using-str-partition",
"Emitted when accessing only the first or last element of a str.split(sep). "
"Or when a str.split(sep,maxsplit=1) is used. "
"The first and last element can be accessed by using str.partition(sep)[0] "
"or str.rpartition(sep)[-1] instead, which is less computationally "
"expensive.",
"expensive and works the same as str.split() or str.rsplit() with a maxsplit "
"of 1",
),
}

@@ -53,8 +54,14 @@ def _is_builtin(node, function):
return False
return utils.is_builtin_object(inferred) and inferred.name == function

@utils.check_messages("consider-iterating-dictionary")
def visit_call(self, node):
@utils.check_messages(
"consider-iterating-dictionary", "consider-using-str-partition"
)
def visit_call(self, node: astroid.Call) -> None:
self._check_consider_iterating_dictionary(node)
self._check_consider_using_str_partition(node)

def _check_consider_iterating_dictionary(self, node: astroid.Call) -> None:
if not isinstance(node.func, astroid.Attribute):
return
if node.func.attrname != "keys":
@@ -71,6 +78,127 @@ def visit_call(self, node):
if isinstance(node.parent, (astroid.For, astroid.Comprehension)):
self.add_message("consider-iterating-dictionary", node=node)

def _check_consider_using_str_partition(self, node: astroid.Call) -> None:
"""Add message when accessing first or last elements of a str.split() or str.rsplit(),
or when split/rsplit with max_split=1 is used"""
yushao2 marked this conversation as resolved.
Show resolved Hide resolved

# Check if call is split() or rsplit()
if isinstance(node.func, astroid.Attribute) and node.func.attrname in (
"split",
"rsplit",
):
inferred_func = utils.safe_infer(node.func)
fn_name = node.func.attrname
node_name = node.as_string()
yushao2 marked this conversation as resolved.
Show resolved Hide resolved

if not isinstance(inferred_func, astroid.BoundMethod):
return
yushao2 marked this conversation as resolved.
Show resolved Hide resolved

try:
seperator = utils.get_argument_from_call(node, 0, "sep").value
except utils.NoSuchArgumentError:
return
# Check if maxsplit is set, and if it's == 1 (this can be replaced by partition)
try:
maxsplit = utils.get_argument_from_call(node, 1, "maxsplit").value
if maxsplit == 1:
self.add_message(
"consider-using-str-partition",
node=node,
args=(
f"{node.func.expr.as_string()}.{node.func.attrname.replace('split','partition')}('{seperator}')"
),
)
return

except utils.NoSuchArgumentError:
pass

# Check if it's immediately subscripted
if isinstance(node.parent, astroid.Subscript):
subscript_node = node.parent
# Check if subscripted with -1/0
yushao2 marked this conversation as resolved.
Show resolved Hide resolved
if not isinstance(
subscript_node.slice, (astroid.Const, astroid.UnaryOp)
):
return
subscript_value = utils.safe_infer(subscript_node.slice)
subscript_value = cast(astroid.Const, subscript_value)
subscript_value = subscript_value.value
if subscript_value in (-1, 0):
new_fn = "rpartition" if subscript_value == -1 else "partition"
new_fn = new_fn[::-1]
node_name = node_name[::-1]
fn_name = fn_name[::-1]
new_name = node_name.replace(fn_name, new_fn, 1)[::-1]
self.add_message(
"consider-using-str-partition", node=node, args=(new_name,)
)
return
# Check where name it's assigned to, then check all usage of name
assign_target = None
if isinstance(node.parent, astroid.Tuple):
assign_node = node.parent.parent
if not isinstance(assign_node, astroid.Assign):
return
idx = node.parent.elts.index(node)
if not isinstance(assign_node.targets[0], astroid.Tuple):
return
assign_target = assign_node.targets[0].elts[idx]
elif isinstance(node.parent, astroid.Assign):
assign_target = node.parent.targets[0]

if assign_target is None or not isinstance(
assign_target, astroid.AssignName
):
return

# Go to outer-most scope (module), then search the child for usage
module_node = node
while not isinstance(module_node, astroid.Module):
module_node = module_node.parent

subscript_usage = set()
for child in module_node.body:
for search_node in child.nodes_of_class(astroid.Name):
search_node = cast(astroid.Name, search_node)

last_definition = search_node.lookup(search_node.name)[1][-1]
if last_definition is not assign_target:
continue
if not isinstance(search_node.parent, astroid.Subscript):
continue
subscript_node = search_node.parent
if not isinstance(
subscript_node.slice, (astroid.Const, astroid.UnaryOp)
):
return
subscript_value = utils.safe_infer(subscript_node.slice)
subscript_value = cast(astroid.Const, subscript_value)
subscript_value = subscript_value.value
if subscript_value not in (-1, 0):
return
subscript_usage.add(subscript_value)
if not subscript_usage: # Not used
return
# Construct help text
help_text = ""
node_name = node_name[::-1]
fn_name = fn_name[::-1]
if 0 in subscript_usage:
new_fn = "partition"[::-1]
new_name = node_name.replace(fn_name, new_fn, 1)[::-1]
help_text += f"{new_name} to extract the first element of a .split()"

if -1 in subscript_usage:
new_fn = "rpartition"[::-1]
new_name = node_name.replace(fn_name, new_fn, 1)[::-1]
help_text += " and " if help_text != "" else ""
help_text += f"{new_name} to extract the last element of a .split()"
self.add_message(
"consider-using-str-partition", node=node, args=(help_text,)
)

@utils.check_messages("consider-using-enumerate", "consider-using-dict-items")
def visit_for(self, node: astroid.For) -> None:
self._check_consider_using_enumerate(node)
@@ -219,101 +347,3 @@ def visit_comprehension(self, node: astroid.Comprehension) -> None:

self.add_message("consider-using-dict-items", node=node)
return

@utils.check_messages("consider-using-str-partition")
def visit_subscript(self, node: astroid.Subscript) -> None:
"""Add message when accessing first or last elements of a str.split()."""
assignment = None
subscripted_object = node.value
if isinstance(subscripted_object, astroid.Name):
# Check assignment of this subscripted object
assignment_lookup_results = node.value.lookup(node.value.name)
if (
len(assignment_lookup_results) == 0
or len(assignment_lookup_results[-1]) == 0
):
return
assign_name = assignment_lookup_results[-1][-1]
if not isinstance(assign_name, astroid.AssignName):
return
assignment = assign_name.parent
if not isinstance(assignment, astroid.Assign):
return
# Set subscripted_object to the assignment RHS
subscripted_object = assignment.value

if isinstance(subscripted_object, astroid.Call):
# Check if call is .split() or .rsplit()
if isinstance(
subscripted_object.func, astroid.Attribute
) and subscripted_object.func.attrname in ["split", "rsplit"]:
inferred = utils.safe_infer(subscripted_object.func)
if not isinstance(inferred, astroid.BoundMethod):
return

# Check if subscript is 1 or -1
value = node.slice
if isinstance(value, astroid.Index):
value = value.value

if isinstance(value, (astroid.UnaryOp, astroid.Const)):
const = utils.safe_infer(value)
const = cast(astroid.Const, const)
p = re.compile(
r"tilpsr*\."
) # Match and replace in reverse to ensure last occurance replaced
if const.value == -1:
help_text = p.sub(
".rpartition"[::-1],
subscripted_object.as_string()[::-1],
count=1,
)[::-1]
self.add_message(
"consider-using-str-partition",
node=node,
args=(help_text, -1),
)
elif const.value == 0:
help_text = p.sub(
".partition"[::-1],
subscripted_object.as_string()[::-1],
count=1,
)[::-1]
self.add_message(
"consider-using-str-partition",
node=node,
args=(help_text, 0),
)
# Check if subscript is len(split) - 1
if (
isinstance(value, astroid.BinOp)
and value.left.func.name == "len"
and value.op == "-"
and value.right.value == 1
and assignment is not None
):
# Ensure that the len() is from builtins, not user re-defined
inferred_fn = utils.safe_infer(value.left.func)
if not (
isinstance(inferred_fn, astroid.FunctionDef)
and isinstance(inferred_fn.parent, astroid.Module)
and inferred_fn.parent.name == "builtins"
):
return

# Further check for argument within len(), and compare that to object being split
arg_within_len = value.left.args[0].name
if arg_within_len == node.value.name:
p = re.compile(
r"tilpsr*\."
) # Match and replace in reverse to ensure last occurance replaced
help_text = p.sub(
".rpartition"[::-1],
subscripted_object.as_string()[::-1],
count=1,
)[::-1]
self.add_message(
"consider-using-str-partition",
node=node,
args=(help_text, -1),
)
68 changes: 43 additions & 25 deletions tests/functional/c/consider/consider_using_str_partition.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
"""Emit a message for iteration through dict keys and subscripting dict with key."""
# pylint: disable=line-too-long,missing-docstring,unsubscriptable-object,too-few-public-methods,invalid-name,redefined-builtin
from collections.abc import Iterable
from typing import Any


# Test varying maxsplit argument
bad_split = '1,2,3'.split(sep=',',maxsplit=1) # [consider-using-str-partition]
bad_split = '1,2,3'[::-1].split(',',1) # [consider-using-str-partition]

good_split = '1,2,3'[::-1].split(',',2)
good_split = '1,2,3'[::-1].split(',')

good_split = '1,2,3'[::-1].split(',',2)[0] #This is fine, we ignore cases where maxsplit > 1
good_split = '1,2,3'[::-1].split(',',2)[-1] #This is fine, we ignore cases where maxsplit > 1


# Test subscripting .split()
get_first = '1,2,3'.split(',')[0] # [consider-using-str-partition]
get_last = '1,2,3'[::-1].split(',')[0] # [consider-using-str-partition]

@@ -15,27 +26,38 @@
get_mid = SEQ.split(',')[1] # This is okay
get_mid = SEQ.split(',')[-2] # This is okay


# Test with storing splits as an object
split1 = SEQ.split(',')
get_first = split1[0] # [consider-using-str-partition]
get_last = split1[-1] # [consider-using-str-partition]
get_last = split1[len(split1)-1] # [consider-using-str-partition]
split1 = SEQ.split(',') # [consider-using-str-partition]
get_first = split1[0]
get_last = split1[-1]

split2 = SEQ.rsplit(',') # [consider-using-str-partition]
get_first = split2[0]
get_last = split2[-1]

split2 = SEQ.rsplit(',')
get_first = split2[0] # [consider-using-str-partition]
get_last = split2[-1] # [consider-using-str-partition]
get_last = split2[len(split2)-1] # [consider-using-str-partition]
split3 = SEQ.rsplit(',') # This is fine, split3 indexed with [1]
get_first = split3[0]
get_middle = split3[1]
get_last = split3[-1]

print(split1[0], split1[-1]) # [consider-using-str-partition, consider-using-str-partition]
split1, split2 = SEQ.split(','), SEQ.rsplit(',') # [consider-using-str-partition, consider-using-str-partition]
get_first = split1[0]
get_last = split1[-1]
get_first = split2[0]
get_last = split2[-1]

split1, split2 = SEQ.split(','), SEQ.rsplit(',') # This is fine, both splits are utilized
get_first = split1[0]
get_last = split1[-1]
get_first = split2[0]
get_last = split2[-1]
split1[1] = split2[1]

# Test when running len on another iterable
some_list = []
get_last = split1[len(split2)-1] # Should not throw an error

# Tests on class attributes
class Foo():
class_str = '1,2,3'

def __init__(self):
self.my_str = '1,2,3'

@@ -51,10 +73,10 @@ def get_string(self) -> str:
get_mid = Foo.class_str.split(',')[1]
get_mid = Foo.class_str.split(',')[-2]

split2 = Foo.class_str.split(',')
get_first = split2[0] # [consider-using-str-partition]
get_last = split2[-1] # [consider-using-str-partition]
get_last = split2[len(split2)-1] # [consider-using-str-partition]
split2 = Foo.class_str.split(',') # [consider-using-str-partition]
get_first = split2[0]
get_last = split2[-1]


# Test with accessors
bar = Foo()
@@ -64,13 +86,15 @@ def get_string(self) -> str:
get_mid = bar.get_string().split(',')[1]
get_mid = bar.get_string().split(',')[-2]


# Test with iterating over strings
list_of_strs = ["a", "b", "c", "d", "e", "f"]
for s in list_of_strs:
print(s.split(" ")[0]) # [consider-using-str-partition]
print(s.split(" ")[-1]) # [consider-using-str-partition]
print(s.split(" ")[-2])


# Test warning messages (matching and replacing .split / .rsplit)
class Bar():
split = '1,2,3'
@@ -79,9 +103,3 @@ class Bar():
print(Bar.split.split(",")[-1]) # [consider-using-str-partition] (Error message should show Bar.split.rpartition)
print(Bar.split.rsplit(",")[0]) # [consider-using-str-partition] (Error message should show Bar.split.partition)
print(Bar.split.rsplit(",")[-1]) # [consider-using-str-partition] (Error message should show Bar.split.rpartition)

# Test with user-defined len function
def len(x: Iterable[Any]) -> str:
return f"Hello, world! {x[2]}"

get_last = split2[len(split2)-1] # This won't throw the warning as the len function has been redefined
Loading