astral-sh · dhruvmanila · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024
diff --git a/crates/ruff/tests/snapshots/show_settings__display_default_settings.snap b/crates/ruff/tests/snapshots/show_settings__display_default_settings.snap
@@ -375,6 +375,7 @@ linter.pylint.max_locals = 15
 linter.pyupgrade.keep_runtime_typing = false
 linter.ruff.parenthesize_tuple_in_subscript = false
 linter.ruff.extend_markup_names = []
+linter.ruff.allowed_markup_calls = []
 
 # Formatter Settings
 formatter.exclude = []

diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF035_whitelisted_markup_calls.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF035_whitelisted_markup_calls.py
@@ -0,0 +1,9 @@
+from bleach import clean
+from markupsafe import Markup
+
+content = "<script>alert('Hello, world!')</script>"
+Markup(clean(content))
+
+# indirect assignments are currently not supported
+cleaned = clean(content)
+Markup(cleaned)  # RUF035
diff --git a/crates/ruff_linter/src/rules/ruff/mod.rs b/crates/ruff_linter/src/rules/ruff/mod.rs
@@ -91,6 +91,7 @@ mod tests {
                 ruff: super::settings::Settings {
                     parenthesize_tuple_in_subscript: true,
                     extend_markup_names: vec![],
+                    allowed_markup_calls: vec![],
                 },
                 ..LinterSettings::for_rule(Rule::IncorrectlyParenthesizedTupleInSubscript)
             },
@@ -107,6 +108,7 @@ mod tests {
                 ruff: super::settings::Settings {
                     parenthesize_tuple_in_subscript: false,
                     extend_markup_names: vec![],
+                    allowed_markup_calls: vec![],
                 },
                 target_version: PythonVersion::Py310,
                 ..LinterSettings::for_rule(Rule::IncorrectlyParenthesizedTupleInSubscript)
@@ -447,6 +449,30 @@ mod tests {
                 ruff: super::settings::Settings {
                     parenthesize_tuple_in_subscript: true,
                     extend_markup_names: vec!["webhelpers.html.literal".to_string()],
+                    allowed_markup_calls: vec![],
+                },
+                preview: PreviewMode::Enabled,
+                ..LinterSettings::for_rule(rule_code)
+            },
+        )?;
+        assert_messages!(snapshot, diagnostics);
+        Ok(())
+    }
+
+    #[test_case(Rule::UnsafeMarkupUse, Path::new("RUF035_whitelisted_markup_calls.py"))]
+    fn whitelisted_markup_calls(rule_code: Rule, path: &Path) -> Result<()> {
+        let snapshot = format!(
+            "whitelisted_markup_calls__{}_{}",
+            rule_code.noqa_code(),
+            path.to_string_lossy()
+        );
+        let diagnostics = test_path(
+            Path::new("ruff").join(path).as_path(),
+            &LinterSettings {
+                ruff: super::settings::Settings {
+                    parenthesize_tuple_in_subscript: true,
+                    extend_markup_names: vec![],
+                    allowed_markup_calls: vec!["bleach.clean".to_string()],
                 },
                 preview: PreviewMode::Enabled,
                 ..LinterSettings::for_rule(rule_code)

diff --git a/crates/ruff_linter/src/rules/ruff/rules/unsafe_markup_use.rs b/crates/ruff_linter/src/rules/ruff/rules/unsafe_markup_use.rs
@@ -1,9 +1,9 @@
-use ruff_python_ast::ExprCall;
+use ruff_python_ast::{Expr, ExprCall};
 
 use ruff_diagnostics::{Diagnostic, Violation};
 use ruff_macros::{derive_message_formats, ViolationMetadata};
 use ruff_python_ast::name::QualifiedName;
-use ruff_python_semantic::Modules;
+use ruff_python_semantic::{Modules, SemanticModel};
 use ruff_text_size::Ranged;
 
 use crate::{checkers::ast::Checker, settings::LinterSettings};
@@ -22,7 +22,9 @@ use crate::{checkers::ast::Checker, settings::LinterSettings};
 /// treated like [`markupsafe.Markup`].
 ///
 /// This rule was originally inspired by [flake8-markupsafe] but doesn't carve
-/// out any exceptions for i18n related calls.
+/// out any exceptions for i18n related calls by default.
+///
+/// You can use [`lint.ruff.allowed-markup-calls`] to specify exceptions.
 ///
 /// ## Example
 /// Given:
@@ -64,6 +66,7 @@ use crate::{checkers::ast::Checker, settings::LinterSettings};
 /// ```
 /// ## Options
 /// - `lint.ruff.extend-markup-names`
+/// - `lint.ruff.allowed-markup-calls`
 ///
 /// ## References
 /// - [MarkupSafe](https://pypi.org/project/MarkupSafe/)
@@ -95,7 +98,7 @@ pub(crate) fn unsafe_markup_call(checker: &mut Checker, call: &ExprCall) {
         return;
     }
 
-    if !is_unsafe_call(call) {
+    if !is_unsafe_call(call, checker.semantic(), checker.settings) {
         return;
     }
 
@@ -127,12 +130,29 @@ fn is_markup_call(qualified_name: &QualifiedName, settings: &LinterSettings) ->
         .any(|target| *qualified_name == target)
 }
 
-fn is_unsafe_call(call: &ExprCall) -> bool {
+fn is_unsafe_call(call: &ExprCall, semantic: &SemanticModel, settings: &LinterSettings) -> bool {
     // technically this could be circumvented by using a keyword argument
     // but without type-inference we can't really know which keyword argument
     // corresponds to the first positional argument and either way it is
     // unlikely that someone will actually use a keyword argument here
     // TODO: Eventually we may want to allow dynamic values, as long as they
     //       have a __html__ attribute, since that is part of the API
-    matches!(&*call.arguments.args, [first] if !first.is_string_literal_expr() && !first.is_bytes_literal_expr())
+    matches!(&*call.arguments.args, [first] if !first.is_string_literal_expr() && !first.is_bytes_literal_expr() && !is_whitelisted_call(first, semantic, settings))
+}
+
+fn is_whitelisted_call(expr: &Expr, semantic: &SemanticModel, settings: &LinterSettings) -> bool {
+    let Expr::Call(ExprCall { func, .. }) = expr else {
+        return false;
+    };
+
+    let Some(qualified_name) = semantic.resolve_qualified_name(func) else {
+        return false;
+    };
+
+    settings
+        .ruff
+        .allowed_markup_calls
+        .iter()
+        .map(|target| QualifiedName::from_dotted_name(target))
+        .any(|target| qualified_name == target)
 }
diff --git a/crates/ruff_linter/src/rules/ruff/settings.rs b/crates/ruff_linter/src/rules/ruff/settings.rs
@@ -8,6 +8,7 @@ use std::fmt;
 pub struct Settings {
     pub parenthesize_tuple_in_subscript: bool,
     pub extend_markup_names: Vec<String>,
+    pub allowed_markup_calls: Vec<String>,
 }
 
 impl fmt::Display for Settings {
@@ -18,6 +19,7 @@ impl fmt::Display for Settings {
             fields = [
                 self.parenthesize_tuple_in_subscript,
                 self.extend_markup_names | array,
+                self.allowed_markup_calls | array,
             ]
         }
         Ok(())

diff --git a/...es__ruff__tests__whitelisted_markup_calls__RUF035_RUF035_whitelisted_markup_calls.py.snap b/...es__ruff__tests__whitelisted_markup_calls__RUF035_RUF035_whitelisted_markup_calls.py.snap
@@ -0,0 +1,10 @@
+---
+source: crates/ruff_linter/src/rules/ruff/mod.rs
+---
+RUF035_whitelisted_markup_calls.py:9:1: RUF035 Unsafe use of `markupsafe.Markup` detected
+  |
+7 | # indirect assignments are currently not supported
+8 | cleaned = clean(content)
+9 | Markup(cleaned)  # RUF035
+  | ^^^^^^^^^^^^^^^ RUF035
+  |
diff --git a/crates/ruff_workspace/src/options.rs b/crates/ruff_workspace/src/options.rs
@@ -3091,6 +3091,41 @@ pub struct RuffOptions {
         example = "extend-markup-names = [\"webhelpers.html.literal\", \"my_package.Markup\"]"
     )]
     pub extend_markup_names: Option<Vec<String>>,
+
+    /// A list of callable names, whose result may be safely passed into [`markupsafe.Markup`].
+    ///
+    /// Expects to receive a list of fully-qualified names (e.g., [`bleach.clean`], rather than
+    /// `clean`).
+    ///
+    /// This setting helps you avoid false positives in code like:
+    ///
+    /// ```python
+    /// from bleach import clean
+    /// from markupsafe import Markup
+    ///
+    /// cleaned_markup = Markup(clean(some_user_input))
+    /// ```
+    ///
+    /// Where the use of [`bleach.clean`] usually ensures that there's no XSS vulnerability.
+    ///
+    /// Although it is not recommended, you may also use this setting to whitelist other
+    /// kinds of calls, e.g. calls to i18n translation functions, where how safe that is
+    /// will depend on the implementation and how well the translations are audited.
+    ///
+    /// Another common use-case is to wrap the output of functions that generate markup
+    /// like [`xml.etree.ElementTree.tostring`] or template rendering engines where
+    /// sanitization of potential user input is either already baked in or has to happen
+    /// before rendering.
+    ///
+    /// [bleach.clean]: https://bleach.readthedocs.io/en/latest/clean.html
+    /// [markupsafe.Markup]: https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup
+    /// [xml.etree.ElementTree.tostring]: https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.tostring
+    #[option(
+        default = "[]",
+        value_type = "list[str]",
+        example = "allowed-markup-calls = [\"bleach.clean\", \"my_package.sanitize\"]"
+    )]
+    pub allowed_markup_calls: Option<Vec<String>>,
 }
 
 impl RuffOptions {
@@ -3100,6 +3135,7 @@ impl RuffOptions {
                 .parenthesize_tuple_in_subscript
                 .unwrap_or_default(),
             extend_markup_names: self.extend_markup_names.unwrap_or_default(),
+            allowed_markup_calls: self.allowed_markup_calls.unwrap_or_default(),
         }
     }
 }

diff --git a/ruff.schema.json b/ruff.schema.json