From 8a9a8779307d5a0f410c3a6386108652eb114ae9 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Thu, 30 Nov 2023 20:41:14 -0500 Subject: [PATCH 1/6] ruff_python_formatter: fix bug in doctest formatting This fixes a bug where if the very last line in a docstring is a doctest, then it wasn't getting formatted. We do a little more than fix that bug in this commit. In particular, we industrialize how we handle actions by using a queue. Instead of trying to do too much with each action, we make each one a bit simpler and build the infrastructure necessary to permit more arbitrary composition. This in particular lets us handle weird corner cases like "the code example and the docstring end at the same time" without much fuss. In essence, actions like Format and Reset no longer carry the "original" line. Instead of doing that, we generate two actions: the first to format and the second (if necessary) to print the original line. This infrastructure will become more apparently useful when dealing with reStructuredText blocks. --- .../fixtures/ruff/docstring_code_examples.py | 21 ++ .../src/expression/string/docstring.rs | 206 +++++++++++------- .../format@docstring_code_examples.py.snap | 189 ++++++++++++++++ 3 files changed, 336 insertions(+), 80 deletions(-) diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_code_examples.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_code_examples.py index 6f5fc0b30bb61..7d827d2cda053 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_code_examples.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_code_examples.py @@ -67,6 +67,27 @@ def doctest_last_line_continued(): pass +# Test that a doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line(): + """ + Do cool stuff. + + >>> cool_stuff( x )""" + pass + + +# Test that a continued doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line_continued(): + """ + Do cool stuff. + + >>> cool_stuff( x ) + ... more( y )""" + pass + + # Test that a doctest is correctly identified and formatted with a blank # continuation line. def doctest_blank_continued(): diff --git a/crates/ruff_python_formatter/src/expression/string/docstring.rs b/crates/ruff_python_formatter/src/expression/string/docstring.rs index bed24c34191c0..dfe58bc0f3fec 100644 --- a/crates/ruff_python_formatter/src/expression/string/docstring.rs +++ b/crates/ruff_python_formatter/src/expression/string/docstring.rs @@ -1,4 +1,8 @@ -use std::borrow::Cow; +// This gives tons of false positives in this file because of +// "reStructuredText." +#![allow(clippy::doc_markdown)] + +use std::{borrow::Cow, collections::VecDeque}; use ruff_python_trivia::PythonWhitespace; use { @@ -182,6 +186,7 @@ pub(super) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form DocstringLinePrinter { f, + action_queue: VecDeque::new(), offset, stripped_indentation_length, already_normalized, @@ -218,6 +223,17 @@ fn contains_unescaped_newline(haystack: &str) -> bool { /// An abstraction for printing each line of a docstring. struct DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { f: &'fmt mut PyFormatter<'ast, 'buf>, + /// A queue of actions to perform. + /// + /// Whenever we process a line, it is possible for it to generate multiple + /// actions to take. The most basic, and most common case, is for the line + /// to just simply be printed as-is. But in some cases, a line is part of + /// a code example that we'd like to reformat. In those cases, the actions + /// can be more complicated. + /// + /// Actions are pushed on to the end of the queue and popped from the + /// beginning. + action_queue: VecDeque>, /// The source offset of the beginning of the line that is currently being /// printed. offset: TextSize, @@ -253,7 +269,8 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { self.offset += line.line.text_len() + "\n".text_len(); self.add_one(line)?; } - Ok(()) + self.code_example.finish(&mut self.action_queue); + self.run_action_queue() } /// Adds the given line to this printer. @@ -273,34 +290,40 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { { return self.print_one(&line.as_output()); } - match self.code_example.add(line) { - CodeExampleAddAction::Print { original } => self.print_one(&original.as_output())?, - CodeExampleAddAction::Kept => {} - CodeExampleAddAction::Reset { code, original } => { - for codeline in code { - self.print_one(&codeline.original.as_output())?; + self.code_example.add(line, &mut self.action_queue); + self.run_action_queue() + } + + /// Process any actions in this printer's queue until the queue is empty. + fn run_action_queue(&mut self) -> FormatResult<()> { + while let Some(action) = self.action_queue.pop_front() { + match action { + CodeExampleAddAction::Print { original } => { + self.print_one(&original.as_output())?; } - self.print_one(&original.as_output())?; - } - CodeExampleAddAction::Format { mut kind, original } => { - let Some(formatted_lines) = self.format(kind.code())? else { - // If formatting failed in a way that should not be - // allowed, we back out what we're doing and print the - // original lines we found as-is as if we did nothing. - for codeline in kind.code() { + CodeExampleAddAction::Kept => {} + CodeExampleAddAction::Reset { code } => { + for codeline in code { self.print_one(&codeline.original.as_output())?; } - if let Some(original) = original { - self.print_one(&original.as_output())?; - } - return Ok(()); - }; - - self.already_normalized = false; - match kind { - CodeExampleKind::Doctest(CodeExampleDoctest { ps1_indent, .. }) => { - let mut lines = formatted_lines.into_iter(); - if let Some(first) = lines.next() { + } + CodeExampleAddAction::Format { mut kind } => { + let Some(formatted_lines) = self.format(kind.code())? else { + // Since we've failed to emit these lines, we need to + // put them back in the queue but have them jump to the + // front of the queue to get processed before any other + // action. + self.action_queue.push_front(CodeExampleAddAction::Reset { + code: kind.into_code(), + }); + continue; + }; + + self.already_normalized = false; + match kind { + CodeExampleKind::Doctest(CodeExampleDoctest { ps1_indent, .. }) => { + let mut lines = formatted_lines.into_iter(); + let Some(first) = lines.next() else { continue }; self.print_one( &first.map(|line| std::format!("{ps1_indent}>>> {line}")), )?; @@ -312,9 +335,6 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { } } } - if let Some(original) = original { - self.print_one(&original.as_output())?; - } } } Ok(()) @@ -568,38 +588,44 @@ impl<'src> CodeExample<'src> { /// Attempt to add an original line from a docstring to this code example. /// /// Based on the line and the internal state of whether a code example is - /// currently being collected or not, this will return an "action" for - /// the caller to perform. The typical case is a "print" action, which - /// instructs the caller to just print the line as though it were not part - /// of a code snippet. - fn add(&mut self, original: InputDocstringLine<'src>) -> CodeExampleAddAction<'src> { + /// currently being collected or not, this will push an "action" to the + /// given queue for the caller to perform. The typical case is a "print" + /// action, which instructs the caller to just print the line as though it + /// were not part of a code snippet. + fn add( + &mut self, + original: InputDocstringLine<'src>, + queue: &mut VecDeque>, + ) { match self.kind.take() { // There's no existing code example being built, so we look for // the start of one or otherwise tell the caller we couldn't find // anything. - None => match self.add_start(original) { - None => CodeExampleAddAction::Kept, - Some(original) => CodeExampleAddAction::Print { original }, - }, - Some(CodeExampleKind::Doctest(mut doctest)) => { - if doctest.add_code_line(original) { - // Stay with the doctest kind while we accumulate all - // PS2 prompts. - self.kind = Some(CodeExampleKind::Doctest(doctest)); - return CodeExampleAddAction::Kept; - } - let original = self.add_start(original); - CodeExampleAddAction::Format { - kind: CodeExampleKind::Doctest(doctest), - original, - } + None => { + self.add_start(original, queue); + } + Some(CodeExampleKind::Doctest(doctest)) => { + let Some(doctest) = doctest.add_code_line(original, queue) else { + self.add_start(original, queue); + return; + }; + self.kind = Some(CodeExampleKind::Doctest(doctest)); } } } + /// Finish the code example by generating any final actions if applicable. + /// + /// This typically adds an action when the end of a code example coincides + /// with the end of the docstring. + fn finish(&mut self, queue: &mut VecDeque>) { + let Some(kind) = self.kind.take() else { return }; + queue.push_back(CodeExampleAddAction::Format { kind }); + } + /// Looks for the start of a code example. If one was found, then the given /// line is kept and added as part of the code example. Otherwise, the line - /// is returned unchanged and no code example was found. + /// is pushed onto the queue unchanged to be printed as-is. /// /// # Panics /// @@ -609,13 +635,15 @@ impl<'src> CodeExample<'src> { fn add_start( &mut self, original: InputDocstringLine<'src>, - ) -> Option> { + queue: &mut VecDeque>, + ) { assert!(self.kind.is_none(), "expected no existing code example"); if let Some(doctest) = CodeExampleDoctest::new(original) { self.kind = Some(CodeExampleKind::Doctest(doctest)); - return None; + queue.push_back(CodeExampleAddAction::Kept); + } else { + queue.push_back(CodeExampleAddAction::Print { original }); } - Some(original) } } @@ -645,6 +673,18 @@ impl<'src> CodeExampleKind<'src> { CodeExampleKind::Doctest(ref doctest) => &doctest.lines, } } + + /// Consume this code example and return only the lines that have been + /// accrued so far. + /// + /// This is useful when the code example being collected has been + /// determined to be invalid, and one wants to "give up" and print the + /// original lines through unchanged without attempting formatting. + fn into_code(self) -> Vec> { + match self { + CodeExampleKind::Doctest(doctest) => doctest.lines, + } + } } /// State corresponding to a single doctest code example found in a docstring. @@ -681,19 +721,24 @@ impl<'src> CodeExampleDoctest<'src> { Some(doctest) } - /// Looks for a valid doctest PS2 prompt in the line given. + /// Looks for a valid doctest PS2 prompt in the line given. If one is + /// found, it is added to this code example and ownership of the example is + /// returned to the caller. In this case, callers should continue trying to + /// add PS2 prompt lines. /// - /// If one is found, then the code portion of the line following the PS2 prompt - /// is returned. + /// But if one isn't found, then the given line is not part of the code + /// example and ownership of this example is not returned. /// - /// Callers must provide a string containing the original indentation of the - /// PS1 prompt that started the doctest containing the potential PS2 prompt - /// in the line given. If the line contains a PS2 prompt, its indentation must - /// match the indentation used for the corresponding PS1 prompt (otherwise - /// `None` will be returned). - fn add_code_line(&mut self, original: InputDocstringLine<'src>) -> bool { + /// In either case, relevant actions will be added to the given queue to + /// process. + fn add_code_line( + mut self, + original: InputDocstringLine<'src>, + queue: &mut VecDeque>, + ) -> Option> { let Some((ps2_indent, ps2_after)) = original.line.split_once("...") else { - return false; + queue.push_back(self.into_format_action()); + return None; }; // PS2 prompts must have the same indentation as their // corresponding PS1 prompt.[1] While the 'doctest' Python @@ -702,7 +747,8 @@ impl<'src> CodeExampleDoctest<'src> { // // [1]: https://github.com/python/cpython/blob/0ff6368519ed7542ad8b443de01108690102420a/Lib/doctest.py#L733 if self.ps1_indent != ps2_indent { - return false; + queue.push_back(self.into_format_action()); + return None; } // PS2 prompts must be followed by an ASCII space character unless // it's an otherwise empty line[1]. @@ -710,11 +756,22 @@ impl<'src> CodeExampleDoctest<'src> { // [1]: https://github.com/python/cpython/blob/0ff6368519ed7542ad8b443de01108690102420a/Lib/doctest.py#L809-L812 let code = match ps2_after.strip_prefix(' ') { None if ps2_after.is_empty() => "", - None => return false, + None => { + queue.push_back(self.into_format_action()); + return None; + } Some(code) => code, }; self.lines.push(CodeExampleLine { original, code }); - true + queue.push_back(CodeExampleAddAction::Kept); + Some(self) + } + + /// Consume this doctest and turn it into a formatting action. + fn into_format_action(self) -> CodeExampleAddAction<'src> { + CodeExampleAddAction::Format { + kind: CodeExampleKind::Doctest(self), + } } } @@ -763,20 +820,12 @@ enum CodeExampleAddAction<'src> { Kept, /// The line added indicated that the code example is finished and should /// be formatted and printed. The line added is not treated as part of - /// the code example. If the line added indicated the start of another - /// code example, then is won't be returned to the caller here. Otherwise, - /// callers should pass it through to the formatter as-is. + /// the code example. Format { /// The kind of code example that was found. /// /// This is guaranteed to have a non-empty code snippet. kind: CodeExampleKind<'src>, - /// When set, the line is considered not part of any code example and - /// should be formatted as if the [`Print`] action were returned. - /// Otherwise, if there is no line, then either one does not exist - /// or it is part of another code example and should be treated as a - /// [`Kept`] action. - original: Option>, }, /// This occurs when adding a line to an existing code example /// results in that code example becoming invalid. In this case, @@ -787,9 +836,6 @@ enum CodeExampleAddAction<'src> { /// The lines of code that we collected but should be printed back to /// the docstring as-is and not formatted. code: Vec>, - /// The line that was added and triggered this reset to occur. It - /// should be written back to the docstring as-is after the code lines. - original: InputDocstringLine<'src>, }, } diff --git a/crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples.py.snap index 5eeafa914fb93..3d945be424e8a 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples.py.snap @@ -73,6 +73,27 @@ def doctest_last_line_continued(): pass +# Test that a doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line(): + """ + Do cool stuff. + + >>> cool_stuff( x )""" + pass + + +# Test that a continued doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line_continued(): + """ + Do cool stuff. + + >>> cool_stuff( x ) + ... more( y )""" + pass + + # Test that a doctest is correctly identified and formatted with a blank # continuation line. def doctest_blank_continued(): @@ -414,6 +435,27 @@ def doctest_last_line_continued(): pass +# Test that a doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line(): + """ + Do cool stuff. + + >>> cool_stuff( x )""" + pass + + +# Test that a continued doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line_continued(): + """ + Do cool stuff. + + >>> cool_stuff( x ) + ... more( y )""" + pass + + # Test that a doctest is correctly identified and formatted with a blank # continuation line. def doctest_blank_continued(): @@ -758,6 +800,27 @@ def doctest_last_line_continued(): pass +# Test that a doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line(): + """ + Do cool stuff. + + >>> cool_stuff( x )""" + pass + + +# Test that a continued doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line_continued(): + """ + Do cool stuff. + + >>> cool_stuff( x ) + ... more( y )""" + pass + + # Test that a doctest is correctly identified and formatted with a blank # continuation line. def doctest_blank_continued(): @@ -1102,6 +1165,27 @@ def doctest_last_line_continued(): pass +# Test that a doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line(): + """ + Do cool stuff. + + >>> cool_stuff( x )""" + pass + + +# Test that a continued doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line_continued(): + """ + Do cool stuff. + + >>> cool_stuff( x ) + ... more( y )""" + pass + + # Test that a doctest is correctly identified and formatted with a blank # continuation line. def doctest_blank_continued(): @@ -1446,6 +1530,27 @@ def doctest_last_line_continued(): pass +# Test that a doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line(): + """ + Do cool stuff. + + >>> cool_stuff( x )""" + pass + + +# Test that a continued doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line_continued(): + """ + Do cool stuff. + + >>> cool_stuff( x ) + ... more( y )""" + pass + + # Test that a doctest is correctly identified and formatted with a blank # continuation line. def doctest_blank_continued(): @@ -1790,6 +1895,27 @@ def doctest_last_line_continued(): pass +# Test that a doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line(): + """ + Do cool stuff. + + >>> cool_stuff(x)""" + pass + + +# Test that a continued doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line_continued(): + """ + Do cool stuff. + + >>> cool_stuff(x) + ... more(y)""" + pass + + # Test that a doctest is correctly identified and formatted with a blank # continuation line. def doctest_blank_continued(): @@ -2134,6 +2260,27 @@ def doctest_last_line_continued(): pass +# Test that a doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line(): + """ + Do cool stuff. + + >>> cool_stuff(x)""" + pass + + +# Test that a continued doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line_continued(): + """ + Do cool stuff. + + >>> cool_stuff(x) + ... more(y)""" + pass + + # Test that a doctest is correctly identified and formatted with a blank # continuation line. def doctest_blank_continued(): @@ -2478,6 +2625,27 @@ def doctest_last_line_continued(): pass +# Test that a doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line(): + """ + Do cool stuff. + + >>> cool_stuff(x)""" + pass + + +# Test that a continued doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line_continued(): + """ + Do cool stuff. + + >>> cool_stuff(x) + ... more(y)""" + pass + + # Test that a doctest is correctly identified and formatted with a blank # continuation line. def doctest_blank_continued(): @@ -2822,6 +2990,27 @@ def doctest_last_line_continued(): pass +# Test that a doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line(): + """ + Do cool stuff. + + >>> cool_stuff(x)""" + pass + + +# Test that a continued doctest on the real last line of a docstring reformats +# correctly. +def doctest_really_last_line_continued(): + """ + Do cool stuff. + + >>> cool_stuff(x) + ... more(y)""" + pass + + # Test that a doctest is correctly identified and formatted with a blank # continuation line. def doctest_blank_continued(): From 6483d894d8f0910de184d3c36228c113bff2380e Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Thu, 30 Nov 2023 20:49:48 -0500 Subject: [PATCH 2/6] ruff_python_formatter: simplify `format`'s contract There was really no good reason to have it panic if the code block given is empty. Instead, we can just return `None` and everything will get handled correctly. (It will turn into a `Reset` with zero lines, which will in turn be a no-op.) In practice I do believe empty code blocks are not possible, but I felt like this makes the code a bit more robust. --- .../src/expression/string/docstring.rs | 25 +++++-------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/crates/ruff_python_formatter/src/expression/string/docstring.rs b/crates/ruff_python_formatter/src/expression/string/docstring.rs index dfe58bc0f3fec..ae84dda69a7c6 100644 --- a/crates/ruff_python_formatter/src/expression/string/docstring.rs +++ b/crates/ruff_python_formatter/src/expression/string/docstring.rs @@ -415,26 +415,15 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { /// routine is silent about it. So from the user's perspective, this will /// fail silently. Ideally, this would at least emit a warning message, /// but at time of writing, it wasn't clear to me how to best do that. - /// - /// # Panics - /// - /// This panics when the given slice is empty. fn format( &mut self, code: &[CodeExampleLine<'_>], ) -> FormatResult>>> { use ruff_python_parser::AsMode; - let offset = code - .get(0) - .expect("code blob must be non-empty") - .original - .offset; - let last_line_is_last = code - .last() - .expect("code blob must be non-empty") - .original - .is_last(); + let (Some(unformatted_first), Some(unformatted_last)) = (code.first(), code.last()) else { + return Ok(None); + }; let codeblob = code .iter() .map(|line| line.code) @@ -481,12 +470,12 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { .lines() .map(|line| OutputDocstringLine { line: Cow::Owned(line.to_string()), - offset, + offset: unformatted_first.original.offset, is_last: false, }) .collect::>(); - if let Some(last) = lines.last_mut() { - last.is_last = last_line_is_last; + if let Some(reformatted_last) = lines.last_mut() { + reformatted_last.is_last = unformatted_last.original.is_last(); } Ok(Some(lines)) } @@ -823,8 +812,6 @@ enum CodeExampleAddAction<'src> { /// the code example. Format { /// The kind of code example that was found. - /// - /// This is guaranteed to have a non-empty code snippet. kind: CodeExampleKind<'src>, }, /// This occurs when adding a line to an existing code example From 6cebe63da9e97c186c9693c55d12572bee42558d Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Thu, 30 Nov 2023 22:48:42 -0500 Subject: [PATCH 3/6] ruff_python_formatter: force space indent in code snippets The comment in the source code should explain, but basically, if the user has tab indentation enabled, then this winds up screwing with code snippet formatting. The essential problem seems to be that docstring normalization strips tabs, and this interplay between code snippet reformatting with tabs winds up making formatting non-idempotent. We justify this hard-coded option by pointing out that tabs get stripped as part of docstring normalization anyway. --- .../src/expression/string/docstring.rs | 24 ++++++-- .../format@docstring_code_examples.py.snap | 60 +++++++++---------- 2 files changed, 50 insertions(+), 34 deletions(-) diff --git a/crates/ruff_python_formatter/src/expression/string/docstring.rs b/crates/ruff_python_formatter/src/expression/string/docstring.rs index ae84dda69a7c6..c396e810cd3ba 100644 --- a/crates/ruff_python_formatter/src/expression/string/docstring.rs +++ b/crates/ruff_python_formatter/src/expression/string/docstring.rs @@ -6,7 +6,7 @@ use std::{borrow::Cow, collections::VecDeque}; use ruff_python_trivia::PythonWhitespace; use { - ruff_formatter::{write, Printed}, + ruff_formatter::{write, IndentStyle, Printed}, ruff_source_file::Locator, ruff_text_size::{Ranged, TextLen, TextRange, TextSize}, }; @@ -429,7 +429,23 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { .map(|line| line.code) .collect::>() .join("\n"); - let printed = match docstring_format_source(self.f.options(), self.quote_style, &codeblob) { + let options = self + .f + .options() + .clone() + // It's perhaps a little odd to be hard-coding the indent + // style here, but I believe it is necessary as a result + // of the whitespace normalization otherwise done in + // docstrings. Namely, tabs are rewritten with ASCII + // spaces. If code examples in docstrings are formatted + // with tabs and those tabs end up getting rewritten, this + // winds up screwing with the indentation in ways that + // results in formatting no longer being idempotent. Since + // tabs will get erased anyway, we just clobber them here + // instead of later, and as a result, get more consistent + // results. + .with_indent_style(IndentStyle::Space); + let printed = match docstring_format_source(options, self.quote_style, &codeblob) { Ok(printed) => printed, Err(FormatModuleError::FormatError(err)) => return Err(err), Err( @@ -837,7 +853,7 @@ enum CodeExampleAddAction<'src> { /// explicitly sets the context to indicate that formatting is taking place /// inside of a docstring. fn docstring_format_source( - options: &crate::PyFormatOptions, + options: crate::PyFormatOptions, docstring_quote_style: QuoteStyle, source: &str, ) -> Result { @@ -851,7 +867,7 @@ fn docstring_format_source( let comments = crate::Comments::from_ast(&module, source_code, &comment_ranges); let locator = Locator::new(source); - let ctx = PyFormatContext::new(options.clone(), locator.contents(), comments) + let ctx = PyFormatContext::new(options, locator.contents(), comments) .in_docstring(docstring_quote_style); let formatted = crate::format!(ctx, [module.format()])?; formatted diff --git a/crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples.py.snap index 3d945be424e8a..82e2d7cff4efb 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples.py.snap @@ -2583,7 +2583,7 @@ def doctest_simple_continued(): Do cool stuff. >>> def cool_stuff(x): - ... print(f"hi {x}") + ... print(f"hi {x}") hi 2 """ pass @@ -2620,7 +2620,7 @@ def doctest_last_line_continued(): Do cool stuff. >>> def cool_stuff(x): - ... print(f"hi {x}") + ... print(f"hi {x}") """ pass @@ -2653,9 +2653,9 @@ def doctest_blank_continued(): Do cool stuff. >>> def cool_stuff(x): - ... print(x) + ... print(x) ... - ... print(x) + ... print(x) """ pass @@ -2668,8 +2668,8 @@ def doctest_blank_end(): Do cool stuff. >>> def cool_stuff(x): - ... print(x) - ... print(x) + ... print(x) + ... print(x) """ pass @@ -2681,8 +2681,8 @@ def doctest_blank_end_then_some_text(): Do cool stuff. >>> def cool_stuff(x): - ... print(x) - ... print(x) + ... print(x) + ... print(x) And say something else. """ @@ -2754,11 +2754,11 @@ def doctest_nested_doctest_not_formatted(): Do cool stuff. >>> def nested(x): - ... """ - ... Do nested cool stuff. - ... >>> func_call( 5 ) - ... """ - ... pass + ... """ + ... Do nested cool stuff. + ... >>> func_call( 5 ) + ... """ + ... pass ''' pass @@ -2799,7 +2799,7 @@ def doctest_long_lines(): But this one is long enough to get wrapped. >>> foo, bar, quux = this_is_a_long_line( - ... lion, giraffe, hippo, zeba, lemur, penguin, monkey, spider, bear, leopard + ... lion, giraffe, hippo, zeba, lemur, penguin, monkey, spider, bear, leopard ... ) """ # This demostrates a normal line that will get wrapped but won't @@ -2855,7 +2855,7 @@ def doctest_skipped_partial_inconsistent_indent(): Do cool stuff. >>> def cool_stuff(x): - ... print(x) + ... print(x) ... print( f"hi {x}" ); hi 2 """ @@ -2948,7 +2948,7 @@ def doctest_simple_continued(): Do cool stuff. >>> def cool_stuff(x): - ... print(f"hi {x}") + ... print(f"hi {x}") hi 2 """ pass @@ -2985,7 +2985,7 @@ def doctest_last_line_continued(): Do cool stuff. >>> def cool_stuff(x): - ... print(f"hi {x}") + ... print(f"hi {x}") """ pass @@ -3018,9 +3018,9 @@ def doctest_blank_continued(): Do cool stuff. >>> def cool_stuff(x): - ... print(x) + ... print(x) ... - ... print(x) + ... print(x) """ pass @@ -3033,8 +3033,8 @@ def doctest_blank_end(): Do cool stuff. >>> def cool_stuff(x): - ... print(x) - ... print(x) + ... print(x) + ... print(x) """ pass @@ -3046,8 +3046,8 @@ def doctest_blank_end_then_some_text(): Do cool stuff. >>> def cool_stuff(x): - ... print(x) - ... print(x) + ... print(x) + ... print(x) And say something else. """ @@ -3119,11 +3119,11 @@ def doctest_nested_doctest_not_formatted(): Do cool stuff. >>> def nested(x): - ... """ - ... Do nested cool stuff. - ... >>> func_call( 5 ) - ... """ - ... pass + ... """ + ... Do nested cool stuff. + ... >>> func_call( 5 ) + ... """ + ... pass ''' pass @@ -3164,7 +3164,7 @@ def doctest_long_lines(): But this one is long enough to get wrapped. >>> foo, bar, quux = this_is_a_long_line( - ... lion, giraffe, hippo, zeba, lemur, penguin, monkey, spider, bear, leopard + ... lion, giraffe, hippo, zeba, lemur, penguin, monkey, spider, bear, leopard ... ) """ # This demostrates a normal line that will get wrapped but won't @@ -3220,7 +3220,7 @@ def doctest_skipped_partial_inconsistent_indent(): Do cool stuff. >>> def cool_stuff(x): - ... print(x) + ... print(x) ... print( f"hi {x}" ); hi 2 """ From 148372c172102882b164305727d43a97a3dcfc59 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Mon, 4 Dec 2023 14:14:59 -0500 Subject: [PATCH 4/6] ruff_python_formatter: group imports Small style tweak. . o O { too bad `imports_granularity` isn't stable in rustfmt yet } --- crates/ruff_python_formatter/src/expression/string/docstring.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ruff_python_formatter/src/expression/string/docstring.rs b/crates/ruff_python_formatter/src/expression/string/docstring.rs index c396e810cd3ba..b982cf77337e7 100644 --- a/crates/ruff_python_formatter/src/expression/string/docstring.rs +++ b/crates/ruff_python_formatter/src/expression/string/docstring.rs @@ -4,9 +4,9 @@ use std::{borrow::Cow, collections::VecDeque}; -use ruff_python_trivia::PythonWhitespace; use { ruff_formatter::{write, IndentStyle, Printed}, + ruff_python_trivia::{is_python_whitespace, PythonWhitespace}, ruff_source_file::Locator, ruff_text_size::{Ranged, TextLen, TextRange, TextSize}, }; From e53053ef8044ded534b04d3fff3defce22110e09 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Thu, 30 Nov 2023 21:34:46 -0500 Subject: [PATCH 5/6] ruff_python_formatter: add reStructuredText docstring formatting support This commit makes use of the refactoring done in prior commits to slot in reStructuredText support. Essentially, we add a new type of code example and look for *both* literal blocks and code block directives. Literal blocks are treated as Python by default because it seems to be a common practice[1]. That is, literal blocks like this: ``` def example(): """ Here's an example:: foo( 1 ) All done. """ pass ``` Will get reformatted. And code blocks (via reStructuredText directives) will also get reformatted: ``` def example(): """ Here's an example: .. code-block:: python foo( 1 ) All done. """ pass ``` When looking for a code block, it is possible for it to become invalid. In which case, we back out of looking for a code example and print the lines out as they are. As with doctest formatting, if reformatting the code would result in invalid Python or if the code collected from the block is invalid, then formatting is also skipped. A number of tests have been added to check both the formatting and resetting behavior. Mixed indentation is also tested a fair bit, since one of my initial attempts at dealing with mixed indentation ended up not working. Closes #8859 [1]: https://github.com/adamchainz/blacken-docs/issues/195 --- crates/ruff_python_formatter/Cargo.toml | 1 + .../fixtures/ruff/docstring_code_examples.py | 484 ++ .../src/expression/string/docstring.rs | 413 ++ .../ruff_python_formatter/tests/normalizer.rs | 19 +- .../format@docstring_code_examples.py.snap | 4458 ++++++++++++++++- 5 files changed, 5327 insertions(+), 48 deletions(-) diff --git a/crates/ruff_python_formatter/Cargo.toml b/crates/ruff_python_formatter/Cargo.toml index 5bc38d4960f15..1c8825b3aff35 100644 --- a/crates/ruff_python_formatter/Cargo.toml +++ b/crates/ruff_python_formatter/Cargo.toml @@ -28,6 +28,7 @@ countme = "3.0.1" itertools = { workspace = true } memchr = { workspace = true } once_cell = { workspace = true } +regex = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true, optional = true } schemars = { workspace = true, optional = true } diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_code_examples.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_code_examples.py index 7d827d2cda053..296da3d816545 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_code_examples.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_code_examples.py @@ -344,3 +344,487 @@ def doctest_invalid_skipped_with_triple_double_in_single_quote_string(): >>> x = '\"\"\"' """ pass + + +############################################################################### +# reStructuredText CODE EXAMPLES +# +# This section shows examples of docstrings that contain code snippets in +# reStructuredText formatted code blocks. +# +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#literal-blocks +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#literal-blocks +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-30 +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-38 +############################################################################### + + +def rst_literal_simple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_literal_simple_continued(): + """ + Do cool stuff:: + + def cool_stuff( x ): + print( f"hi {x}" ); + + Done. + """ + pass + + +# Tests that we can end the literal block on the second +# to last line of the docstring. +def rst_literal_second_to_last(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + """ + pass + + +# Tests that we can end the literal block on the actual +# last line of the docstring. +def rst_literal_actually_last(): + """ + Do cool stuff:: + + cool_stuff( 1 )""" + pass + + +def rst_literal_with_blank_lines(): + """ + Do cool stuff:: + + def cool_stuff( x ): + print( f"hi {x}" ); + + def other_stuff( y ): + print( y ) + + Done. + """ + pass + + +# Extra blanks should be preserved. +def rst_literal_extra_blanks(): + """ + Do cool stuff:: + + + + cool_stuff( 1 ) + + + + Done. + """ + pass + + +# If a literal block is never properly ended (via a non-empty unindented line), +# then the end of the block should be the last non-empty line. And subsequent +# empty lines should be preserved as-is. +def rst_literal_extra_blanks_at_end(): + """ + Do cool stuff:: + + + cool_stuff( 1 ) + + + + """ + pass + + +# A literal block can contain many empty lines and it should not end the block +# if it continues. +def rst_literal_extra_blanks_in_snippet(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + + cool_stuff( 2 ) + + Done. + """ + pass + + +# This tests that a unindented line appearing after an indented line (but where +# the indent is still beyond the minimum) gets formatted properly. +def rst_literal_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( ''' + hiya''' ) + + Done. + """ + pass + + +# This checks that if the first line in a code snippet has been indented with +# tabs, then so long as its "indentation length" is considered bigger than the +# line with `::`, it is reformatted as code. +# +# (If your tabwidth is set to 4, then it looks like the code snippet +# isn't indented at all, which is perhaps counter-intuitive. Indeed, reST +# itself also seems to recognize this as a code block, although it appears +# under-specified.) +def rst_literal_first_line_indent_uses_tabs_4spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_4spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Another test with tabs, except in this case, if your tabwidth is less than +# 8, than the code snippet actually looks like its indent is *less* than the +# opening line with a `::`. One might presume this means that the code snippet +# is not treated as a literal block and thus not reformatted, but since we +# assume all tabs have tabwidth=8 when computing indentation length, the code +# snippet is actually seen as being more indented than the opening `::` line. +# As with the above example, reST seems to behave the same way here. +def rst_literal_first_line_indent_uses_tabs_8spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_8spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that if two lines in a literal block are indented to the same level +# but by different means (tabs versus spaces), then we correctly recognize the +# block and format it. +def rst_literal_first_line_tab_second_line_spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that when two lines in a code snippet have weird and inconsistent +# indentation, the code still gets formatted so long as the indent is greater +# than the indent of the `::` line. +# +# In this case, the minimum indent is 5 spaces (from the second line) where as +# the first line has an indent of 8 spaces via a tab (by assuming tabwidth=8). +# The minimum indent is stripped from each code line. Since tabs aren't +# divisible, the entire tab is stripped, which means the first and second lines +# wind up with the same level of indentation. +# +# An alternative behavior here would be that the tab is replaced with 3 spaces +# instead of being stripped entirely. The code snippet itself would then have +# inconsistent indentation to the point of being invalid Python, and thus code +# formatting would be skipped. +# +# I decided on the former behavior because it seems a bit easier to implement, +# but we might want to switch to the alternative if cases like this show up in +# the real world. ---AG +def rst_literal_odd_indentation(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that having a line with a lone `::` works as an introduction of a +# literal block. +def rst_literal_lone_colon(): + """ + Do cool stuff. + + :: + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_simple(): + """ + .. code-block:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_case_insensitive(): + """ + .. cOdE-bLoCk:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_sourcecode(): + """ + .. sourcecode:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_options(): + """ + .. code-block:: python + :linenos: + :emphasize-lines: 2,3 + :name: blah blah + + cool_stuff( 1 ) + cool_stuff( 2 ) + cool_stuff( 3 ) + cool_stuff( 4 ) + + Done. + """ + pass + + +# In this case, since `pycon` isn't recognized as a Python code snippet, the +# docstring reformatter ignores it. But it then picks up the doctest and +# reformats it. +def rst_directive_doctest(): + """ + .. code-block:: pycon + + >>> cool_stuff( 1 ) + + Done. + """ + pass + + +# This checks that if the first non-empty line after the start of a literal +# block is not indented more than the line containing the `::`, then it is not +# treated as a code snippet. +def rst_literal_skipped_first_line_not_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but inserts an indented line after the un-indented one. +# This should not cause the literal block to be resumed. +def rst_literal_skipped_first_line_not_indented_then_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# This also checks that a code snippet is not reformatted when the indentation +# of the first line is not more than the line with `::`, but this uses tabs to +# make it a little more confounding. It relies on the fact that indentation +# length is computed by assuming a tabwidth equal to 8. reST also rejects this +# and doesn't treat it as a literal block. +def rst_literal_skipped_first_line_not_indented_tab(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the previous test, but adds a second line. +def rst_literal_skipped_first_line_not_indented_tab_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that a code block with a second line that is not properly indented gets +# skipped. A valid code block needs to have an empty line separating these. +# +# One trick here is that we need to make sure the Python code in the snippet is +# valid, otherwise it would be skipped because of invalid Python. +def rst_literal_skipped_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( ''' + hiya''' ) + + Done. + """ + pass + + +# In this test, we write what looks like a code-block, but it should be treated +# as invalid due to the missing `language` argument. +# +# It does still look like it could be a literal block according to the literal +# rules, but we currently consider the `.. ` prefix to indicate that it is not +# a literal block. +def rst_literal_skipped_not_directive(): + """ + .. code-block:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# In this test, we start a line with `.. `, which makes it look like it might +# be a directive. But instead continue it as if it was just some periods from +# the previous line, and then try to end it by starting a literal block. +# +# But because of the `.. ` in the beginning, we wind up not treating this as a +# code snippet. The reST render I was using to test things does actually treat +# this as a code block, so we may be out of conformance here. +def rst_literal_skipped_possible_false_negative(): + """ + This is a test. + .. This is a test:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This tests that a doctest inside of a reST literal block doesn't get +# reformatted. It's plausible this isn't the right behavior, but it also seems +# like it might be the right behavior since it is a literal block. (The doctest +# makes the Python code invalid.) +def rst_literal_skipped_doctest(): + """ + Do cool stuff:: + + >>> cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_not_indented(): + """ + .. code-block:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_wrong_language(): + """ + .. code-block:: rust + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This gets skipped for the same reason that the doctest in a literal block +# gets skipped. +def rst_directive_skipped_doctest(): + """ + .. code-block:: python + + >>> cool_stuff( 1 ) + + Done. + """ + pass diff --git a/crates/ruff_python_formatter/src/expression/string/docstring.rs b/crates/ruff_python_formatter/src/expression/string/docstring.rs index b982cf77337e7..dbd01a9f8c6f2 100644 --- a/crates/ruff_python_formatter/src/expression/string/docstring.rs +++ b/crates/ruff_python_formatter/src/expression/string/docstring.rs @@ -4,6 +4,8 @@ use std::{borrow::Cow, collections::VecDeque}; +use {once_cell::sync::Lazy, regex::Regex}; + use { ruff_formatter::{write, IndentStyle, Printed}, ruff_python_trivia::{is_python_whitespace, PythonWhitespace}, @@ -333,6 +335,19 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { )?; } } + CodeExampleKind::Rst(litblock) => { + let Some(min_indent) = litblock.min_indent else { + continue; + }; + // This looks suspicious, but it's consistent with the whitespace + // normalization that will occur anyway. + let indent = " ".repeat(min_indent.to_usize()); + for docline in formatted_lines { + self.print_one( + &docline.map(|line| std::format!("{indent}{line}")), + )?; + } + } } } } @@ -616,6 +631,13 @@ impl<'src> CodeExample<'src> { }; self.kind = Some(CodeExampleKind::Doctest(doctest)); } + Some(CodeExampleKind::Rst(litblock)) => { + let Some(litblock) = litblock.add_code_line(original, queue) else { + self.add_start(original, queue); + return; + }; + self.kind = Some(CodeExampleKind::Rst(litblock)); + } } } @@ -646,6 +668,9 @@ impl<'src> CodeExample<'src> { if let Some(doctest) = CodeExampleDoctest::new(original) { self.kind = Some(CodeExampleKind::Doctest(doctest)); queue.push_back(CodeExampleAddAction::Kept); + } else if let Some(litblock) = CodeExampleRst::new(original) { + self.kind = Some(CodeExampleKind::Rst(litblock)); + queue.push_back(CodeExampleAddAction::Print { original }); } else { queue.push_back(CodeExampleAddAction::Print { original }); } @@ -666,6 +691,12 @@ enum CodeExampleKind<'src> { /// /// [regex matching]: https://github.com/python/cpython/blob/0ff6368519ed7542ad8b443de01108690102420a/Lib/doctest.py#L611-L622 Doctest(CodeExampleDoctest<'src>), + /// Code found from a reStructuredText "[literal block]" or "[code block + /// directive]". + /// + /// [literal block]: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#literal-blocks + /// [code block directive]: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block + Rst(CodeExampleRst<'src>), } impl<'src> CodeExampleKind<'src> { @@ -676,6 +707,7 @@ impl<'src> CodeExampleKind<'src> { fn code(&mut self) -> &[CodeExampleLine<'src>] { match *self { CodeExampleKind::Doctest(ref doctest) => &doctest.lines, + CodeExampleKind::Rst(ref mut litblock) => litblock.indented_code(), } } @@ -688,6 +720,7 @@ impl<'src> CodeExampleKind<'src> { fn into_code(self) -> Vec> { match self { CodeExampleKind::Doctest(doctest) => doctest.lines, + CodeExampleKind::Rst(litblock) => litblock.lines, } } } @@ -780,6 +813,333 @@ impl<'src> CodeExampleDoctest<'src> { } } +/// State corresponding to a single reStructuredText literal block or +/// code-block directive. +/// +/// While a literal block and code-block directive are technically two +/// different reStructuredText constructs, we use one type to represent +/// both because they are exceptionally similar. Basically, they are +/// the same with two main differences: +/// +/// 1. Literal blocks are began with a line that ends with `::`. Code block +/// directives are began with a line like `.. code-block:: python`. +/// 2. Code block directives permit a list of options as a "field list" +/// immediately after the opening line. Literal blocks have no options. +/// +/// Otherwise, everything else, including the indentation structure, is the +/// same. +#[derive(Debug)] +struct CodeExampleRst<'src> { + /// The lines that have been seen so far that make up the block. + lines: Vec>, + /// The indent of the line "opening" this block. It can either be the + /// indent of a line ending with `::` (for a literal block) or the indent + /// of a line starting with `.. ` (a directive). + /// + /// The content body of a block needs to be indented more than the line + /// opening the block, so we use this indentation to look for indentation + /// that is "more than" it. + opening_indent: TextSize, + /// The minimum indent of the block. + /// + /// This is `None` until the first such line is seen. If no such line is + /// found, then we consider it an invalid block and bail out of trying to + /// find a code snippet. Otherwise, we update this indentation as we see + /// lines in the block with less indentation. (Usually, the minimum is the + /// indentation of the first block, but this is not required.) + /// + /// By construction, all lines part of the block must have at least this + /// indentation. Additionally, it is guaranteed that the indentation length + /// of the opening indent is strictly less than the indentation of the + /// minimum indent. Namely, the block ends once we find a line that has + /// been unindented to at most the indent of the opening line. + /// + /// When the code snippet has been extracted, it is re-built before being + /// reformatted. The minimum indent is stripped from each line when it is + /// re-built. + min_indent: Option, + /// Whether this is a directive block or not. When not a directive, this is + /// a literal block. The main difference between them is that they start + /// differently. A literal block is started merely by trailing a line with + /// `::`. A directive block is started with `.. code-block:: python`. + /// + /// The other difference is that directive blocks can have options + /// (represented as a reStructuredText "field list") after the beginning of + /// the directive and before the body content of the directive. + is_directive: bool, +} + +impl<'src> CodeExampleRst<'src> { + /// Looks for the start of a reStructuredText [literal block] or [code + /// block directive]. + /// + /// If the start of a block is found, then this returns a correctly + /// initialized reStructuredText block. Callers should print the line as + /// given as it is not retained as part of the block. + /// + /// [literal block]: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#literal-blocks + /// [code block directive]: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block + fn new(original: InputDocstringLine<'src>) -> Option { + let (opening_indent, rest) = indent_with_suffix(original.line); + if rest.starts_with(".. ") { + if let Some(litblock) = CodeExampleRst::new_code_block(original) { + return Some(litblock); + } + // In theory, we could still have something that looks like a literal block, + // but if the line starts with `.. `, then it seems like it probably shouldn't + // be a literal block. For example: + // + // .. code-block:: + // + // cool_stuff( 1 ) + // + // The above is not valid because the `language` argument is missing from + // the `code-block` directive. Because of how we handle it here, the above + // is not treated as a code snippet. + return None; + } + // At this point, we know we didn't find a code block, so the only + // thing we can hope for is a literal block which must end with a `::`. + if !rest.trim_end().ends_with("::") { + return None; + } + Some(CodeExampleRst { + lines: vec![], + opening_indent: indentation_length(opening_indent), + min_indent: None, + is_directive: false, + }) + } + + /// Attempts to create a new reStructuredText code example from a + /// `code-block` or `sourcecode` directive. If one couldn't be found, then + /// `None` is returned. + fn new_code_block(original: InputDocstringLine<'src>) -> Option { + // This regex attempts to parse the start of a reStructuredText code + // block [directive]. From the reStructuredText spec: + // + // > Directives are indicated by an explicit markup start (".. ") + // > followed by the directive type, two colons, and whitespace + // > (together called the "directive marker"). Directive types + // > are case-insensitive single words (alphanumerics plus + // > isolated internal hyphens, underscores, plus signs, colons, + // > and periods; no whitespace). + // + // The language names matched here (e.g., `python` or `py`) are taken + // from the [Pygments lexer names], which is referenced from the docs + // for the [code-block] directive. + // + // [directives]: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#directives + // [Pygments lexer names]: https://pygments.org/docs/lexers/ + // [code-block]: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block + static DIRECTIVE_START: Lazy = Lazy::new(|| { + Regex::new( + r"(?m)^\s*\.\. \s*(?i:code-block|sourcecode)::\s*(?i:python|py|python3|py3)$", + ) + .unwrap() + }); + if !DIRECTIVE_START.is_match(original.line) { + return None; + } + Some(CodeExampleRst { + lines: vec![], + opening_indent: indentation_length(original.line), + min_indent: None, + is_directive: true, + }) + } + + /// Returns the code collected in this example as a sequence of lines. + /// + /// The lines returned have the minimum indentation stripped from their + /// prefix in-place. Based on the definition of minimum indentation, this + /// implies there is at least one line in the slice returned with no + /// whitespace prefix. + fn indented_code(&mut self) -> &[CodeExampleLine<'src>] { + let Some(min_indent) = self.min_indent else { + return &[]; + }; + for line in &mut self.lines { + line.code = if line.original.line.trim().is_empty() { + "" + } else { + indentation_trim(min_indent, line.original.line) + }; + } + &self.lines + } + + /// Attempts to add the given line from a docstring to the reStructuredText + /// code snippet being collected. + /// + /// This takes ownership of `self`, and if ownership is returned to the + /// caller, that means the caller should continue trying to add lines to + /// this code snippet. Otherwise, if ownership is not returned, then this + /// implies at least one action was added to the give queue to either reset + /// the code block or format. That is, the code snippet was either found to + /// be invalid or it was completed and should be reformatted. + /// + /// Note that actions may be added even if ownership is returned. For + /// example, empty lines immediately preceding the actual code snippet will + /// be returned back as an action to print them verbatim, but the caller + /// should still continue to try to add lines to this code snippet. + fn add_code_line( + mut self, + original: InputDocstringLine<'src>, + queue: &mut VecDeque>, + ) -> Option> { + // If we haven't started populating the minimum indent yet, then + // we haven't found the first code line and may need to find and + // pass through leading empty lines. + let Some(min_indent) = self.min_indent else { + return self.add_first_line(original, queue); + }; + let (indent, rest) = indent_with_suffix(original.line); + if rest.is_empty() { + // This is the standard way we close a block: when we see + // an empty line followed by an unindented non-empty line. + if let Some(next) = original.next { + let (next_indent, next_rest) = indent_with_suffix(next); + if !next_rest.is_empty() && indentation_length(next_indent) <= self.opening_indent { + self.push_format_action(queue); + return None; + } + } else { + self.push_format_action(queue); + return None; + } + self.push(original); + queue.push_back(CodeExampleAddAction::Kept); + return Some(self); + } + let indent_len = indentation_length(indent); + if indent_len <= self.opening_indent { + // If we find an unindented non-empty line at the same (or less) + // indentation of the opening line at this point, then we know it + // must be wrong because we didn't see it immediately following an + // empty line. + queue.push_back(self.into_reset_action()); + return None; + } else if indent_len < min_indent { + // While the minimum indent is usually the indentation of the first + // line in a code snippet, it is not guaranteed to be the case. + // And indeed, reST is happy to let blocks have a first line whose + // indentation is greater than a subsequent line in the block. The + // only real restriction is that every line in the block must be + // indented at least past the indentation of the `::` line. + self.min_indent = Some(indent_len); + } + self.push(original); + queue.push_back(CodeExampleAddAction::Kept); + Some(self) + } + + /// Looks for the first line in a literal or code block. + /// + /// If a first line is found, then this returns true. Otherwise, an empty + /// line has been found and the caller should pass it through to the + /// docstring unchanged. (Empty lines are allowed to precede a + /// block. And there must be at least one of them.) + /// + /// If the given line is invalid for a reStructuredText block (i.e., no + /// empty lines seen between the opening line), then an error variant is + /// returned. In this case, callers should bail out of parsing this code + /// example. + /// + /// When this returns `true`, it is guaranteed that `self.min_indent` is + /// set to a non-None value. + /// + /// # Panics + /// + /// Callers must only call this when the first indentation has not yet been + /// found. If it has, then this panics. + fn add_first_line( + mut self, + original: InputDocstringLine<'src>, + queue: &mut VecDeque>, + ) -> Option> { + assert!(self.min_indent.is_none()); + + // While the rst spec isn't completely clear on this point, through + // experimentation, I found that multiple empty lines before the first + // non-empty line are ignored. + let (indent, rest) = indent_with_suffix(original.line); + if rest.is_empty() { + queue.push_back(CodeExampleAddAction::Print { original }); + return Some(self); + } + // Ignore parameters in field lists. These can only occur in + // directives, not literal blocks. + if self.is_directive && is_rst_option(rest) { + queue.push_back(CodeExampleAddAction::Print { original }); + return Some(self); + } + let min_indent = indentation_length(indent); + // At this point, we found a non-empty line. The only thing we require + // is that its indentation is strictly greater than the indentation of + // the line containing the `::`. Otherwise, we treat this as an invalid + // block and bail. + if min_indent <= self.opening_indent { + queue.push_back(self.into_reset_action()); + return None; + } + self.min_indent = Some(min_indent); + self.push(original); + queue.push_back(CodeExampleAddAction::Kept); + Some(self) + } + + /// Pushes the given line as part of this code example. + fn push(&mut self, original: InputDocstringLine<'src>) { + // N.B. We record the code portion as identical to the original line. + // When we go to reformat the code lines, we change them by removing + // the `min_indent`. This design is necessary because the true value of + // `min_indent` isn't known until the entire block has been parsed. + let code = original.line; + self.lines.push(CodeExampleLine { original, code }); + } + + /// Consume this block and add actions to the give queue for formatting. + /// + /// This may trim lines from the end of the block and add them to the queue + /// for printing as-is. For example, this happens when there are trailing + /// empty lines, as we would like to preserve those since they aren't + /// generally treated as part of the code block. + fn push_format_action(mut self, queue: &mut VecDeque>) { + let has_non_whitespace = |line: &CodeExampleLine| { + line.original + .line + .chars() + .any(|ch| !is_python_whitespace(ch)) + }; + let first_trailing_empty_line = self + .lines + .iter() + .rposition(has_non_whitespace) + .map_or(0, |i| i + 1); + let trailing_lines = self.lines.split_off(first_trailing_empty_line); + queue.push_back(CodeExampleAddAction::Format { + kind: CodeExampleKind::Rst(self), + }); + queue.extend( + trailing_lines + .into_iter() + .map(|line| CodeExampleAddAction::Print { + original: line.original, + }), + ); + } + + /// Consume this block and turn it into a reset action. + /// + /// This occurs when we started collecting a code example from something + /// that looked like a block, but later determined that it wasn't a valid + /// block. + fn into_reset_action(self) -> CodeExampleAddAction<'src> { + CodeExampleAddAction::Reset { code: self.lines } + } +} + /// A single line in a code example found in a docstring. /// /// A code example line exists prior to formatting, and is thus in full @@ -904,6 +1264,59 @@ fn indentation_length(line: &str) -> TextSize { TextSize::new(indentation) } +/// Trims at most `indent_len` indentation from the beginning of `line`. +/// +/// This treats indentation in precisely the same way as `indentation_length`. +/// As such, it is expected that `indent_len` is computed from +/// `indentation_length`. This is useful when one needs to trim some minimum +/// level of indentation from a code snippet collected from a docstring before +/// attempting to reformat it. +fn indentation_trim(indent_len: TextSize, line: &str) -> &str { + let mut seen_indent_len = 0u32; + let mut trimmed = line; + for char in line.chars() { + if seen_indent_len >= indent_len.to_u32() { + return trimmed; + } + if char == '\t' { + // Pad to the next multiple of tab_width + seen_indent_len += 8 - (seen_indent_len.rem_euclid(8)); + trimmed = &trimmed[1..]; + } else if char.is_whitespace() { + seen_indent_len += u32::from(char.text_len()); + trimmed = &trimmed[char.len_utf8()..]; + } else { + break; + } + } + line +} + +/// Returns the indentation of the given line and everything following it. +fn indent_with_suffix(line: &str) -> (&str, &str) { + let suffix = line.trim_whitespace_start(); + let indent_len = line + .len() + .checked_sub(suffix.len()) + .expect("suffix <= line"); + let indent = &line[..indent_len]; + (indent, suffix) +} + +/// Returns true if this line looks like a reStructuredText option in a +/// field list. +/// +/// That is, a line that looks like `:name: optional-value`. +fn is_rst_option(line: &str) -> bool { + let line = line.trim_start(); + if !line.starts_with(':') { + return false; + } + line.chars() + .take_while(|&ch| !is_python_whitespace(ch)) + .any(|ch| ch == ':') +} + #[cfg(test)] mod tests { use ruff_text_size::TextSize; diff --git a/crates/ruff_python_formatter/tests/normalizer.rs b/crates/ruff_python_formatter/tests/normalizer.rs index d0f2ba0b9c234..8f01694468dcb 100644 --- a/crates/ruff_python_formatter/tests/normalizer.rs +++ b/crates/ruff_python_formatter/tests/normalizer.rs @@ -60,7 +60,7 @@ impl Transformer for Normalizer { } fn visit_string_literal(&self, string_literal: &mut ast::StringLiteral) { - static STRIP_CODE_SNIPPETS: Lazy = Lazy::new(|| { + static STRIP_DOC_TESTS: Lazy = Lazy::new(|| { Regex::new( r#"(?mx) ( @@ -75,14 +75,27 @@ impl Transformer for Normalizer { ) .unwrap() }); + static STRIP_RST_BLOCKS: Lazy = Lazy::new(|| { + // This is kind of unfortunate, but it's pretty tricky (likely + // impossible) to detect a reStructuredText block with a simple + // regex. So we just look for the start of a block and remove + // everything after it. Talk about a hammer. + Regex::new(r#"::(?s:.*)"#).unwrap() + }); // Start by (1) stripping everything that looks like a code // snippet, since code snippets may be completely reformatted if // they are Python code. - string_literal.value = STRIP_CODE_SNIPPETS + string_literal.value = STRIP_DOC_TESTS + .replace_all( + &string_literal.value, + "\n", + ) + .into_owned(); + string_literal.value = STRIP_RST_BLOCKS .replace_all( &string_literal.value, - "\n", + "\n", ) .into_owned(); // Normalize a string by (2) stripping any leading and trailing space from each diff --git a/crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples.py.snap index 82e2d7cff4efb..cae50ad8c6d91 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples.py.snap @@ -350,6 +350,490 @@ def doctest_invalid_skipped_with_triple_double_in_single_quote_string(): >>> x = '\"\"\"' """ pass + + +############################################################################### +# reStructuredText CODE EXAMPLES +# +# This section shows examples of docstrings that contain code snippets in +# reStructuredText formatted code blocks. +# +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#literal-blocks +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#literal-blocks +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-30 +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-38 +############################################################################### + + +def rst_literal_simple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_literal_simple_continued(): + """ + Do cool stuff:: + + def cool_stuff( x ): + print( f"hi {x}" ); + + Done. + """ + pass + + +# Tests that we can end the literal block on the second +# to last line of the docstring. +def rst_literal_second_to_last(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + """ + pass + + +# Tests that we can end the literal block on the actual +# last line of the docstring. +def rst_literal_actually_last(): + """ + Do cool stuff:: + + cool_stuff( 1 )""" + pass + + +def rst_literal_with_blank_lines(): + """ + Do cool stuff:: + + def cool_stuff( x ): + print( f"hi {x}" ); + + def other_stuff( y ): + print( y ) + + Done. + """ + pass + + +# Extra blanks should be preserved. +def rst_literal_extra_blanks(): + """ + Do cool stuff:: + + + + cool_stuff( 1 ) + + + + Done. + """ + pass + + +# If a literal block is never properly ended (via a non-empty unindented line), +# then the end of the block should be the last non-empty line. And subsequent +# empty lines should be preserved as-is. +def rst_literal_extra_blanks_at_end(): + """ + Do cool stuff:: + + + cool_stuff( 1 ) + + + + """ + pass + + +# A literal block can contain many empty lines and it should not end the block +# if it continues. +def rst_literal_extra_blanks_in_snippet(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + + cool_stuff( 2 ) + + Done. + """ + pass + + +# This tests that a unindented line appearing after an indented line (but where +# the indent is still beyond the minimum) gets formatted properly. +def rst_literal_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( ''' + hiya''' ) + + Done. + """ + pass + + +# This checks that if the first line in a code snippet has been indented with +# tabs, then so long as its "indentation length" is considered bigger than the +# line with `::`, it is reformatted as code. +# +# (If your tabwidth is set to 4, then it looks like the code snippet +# isn't indented at all, which is perhaps counter-intuitive. Indeed, reST +# itself also seems to recognize this as a code block, although it appears +# under-specified.) +def rst_literal_first_line_indent_uses_tabs_4spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_4spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Another test with tabs, except in this case, if your tabwidth is less than +# 8, than the code snippet actually looks like its indent is *less* than the +# opening line with a `::`. One might presume this means that the code snippet +# is not treated as a literal block and thus not reformatted, but since we +# assume all tabs have tabwidth=8 when computing indentation length, the code +# snippet is actually seen as being more indented than the opening `::` line. +# As with the above example, reST seems to behave the same way here. +def rst_literal_first_line_indent_uses_tabs_8spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_8spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that if two lines in a literal block are indented to the same level +# but by different means (tabs versus spaces), then we correctly recognize the +# block and format it. +def rst_literal_first_line_tab_second_line_spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that when two lines in a code snippet have weird and inconsistent +# indentation, the code still gets formatted so long as the indent is greater +# than the indent of the `::` line. +# +# In this case, the minimum indent is 5 spaces (from the second line) where as +# the first line has an indent of 8 spaces via a tab (by assuming tabwidth=8). +# The minimum indent is stripped from each code line. Since tabs aren't +# divisible, the entire tab is stripped, which means the first and second lines +# wind up with the same level of indentation. +# +# An alternative behavior here would be that the tab is replaced with 3 spaces +# instead of being stripped entirely. The code snippet itself would then have +# inconsistent indentation to the point of being invalid Python, and thus code +# formatting would be skipped. +# +# I decided on the former behavior because it seems a bit easier to implement, +# but we might want to switch to the alternative if cases like this show up in +# the real world. ---AG +def rst_literal_odd_indentation(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that having a line with a lone `::` works as an introduction of a +# literal block. +def rst_literal_lone_colon(): + """ + Do cool stuff. + + :: + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_simple(): + """ + .. code-block:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_case_insensitive(): + """ + .. cOdE-bLoCk:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_sourcecode(): + """ + .. sourcecode:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_options(): + """ + .. code-block:: python + :linenos: + :emphasize-lines: 2,3 + :name: blah blah + + cool_stuff( 1 ) + cool_stuff( 2 ) + cool_stuff( 3 ) + cool_stuff( 4 ) + + Done. + """ + pass + + +# In this case, since `pycon` isn't recognized as a Python code snippet, the +# docstring reformatter ignores it. But it then picks up the doctest and +# reformats it. +def rst_directive_doctest(): + """ + .. code-block:: pycon + + >>> cool_stuff( 1 ) + + Done. + """ + pass + + +# This checks that if the first non-empty line after the start of a literal +# block is not indented more than the line containing the `::`, then it is not +# treated as a code snippet. +def rst_literal_skipped_first_line_not_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but inserts an indented line after the un-indented one. +# This should not cause the literal block to be resumed. +def rst_literal_skipped_first_line_not_indented_then_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# This also checks that a code snippet is not reformatted when the indentation +# of the first line is not more than the line with `::`, but this uses tabs to +# make it a little more confounding. It relies on the fact that indentation +# length is computed by assuming a tabwidth equal to 8. reST also rejects this +# and doesn't treat it as a literal block. +def rst_literal_skipped_first_line_not_indented_tab(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the previous test, but adds a second line. +def rst_literal_skipped_first_line_not_indented_tab_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that a code block with a second line that is not properly indented gets +# skipped. A valid code block needs to have an empty line separating these. +# +# One trick here is that we need to make sure the Python code in the snippet is +# valid, otherwise it would be skipped because of invalid Python. +def rst_literal_skipped_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( ''' + hiya''' ) + + Done. + """ + pass + + +# In this test, we write what looks like a code-block, but it should be treated +# as invalid due to the missing `language` argument. +# +# It does still look like it could be a literal block according to the literal +# rules, but we currently consider the `.. ` prefix to indicate that it is not +# a literal block. +def rst_literal_skipped_not_directive(): + """ + .. code-block:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# In this test, we start a line with `.. `, which makes it look like it might +# be a directive. But instead continue it as if it was just some periods from +# the previous line, and then try to end it by starting a literal block. +# +# But because of the `.. ` in the beginning, we wind up not treating this as a +# code snippet. The reST render I was using to test things does actually treat +# this as a code block, so we may be out of conformance here. +def rst_literal_skipped_possible_false_negative(): + """ + This is a test. + .. This is a test:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This tests that a doctest inside of a reST literal block doesn't get +# reformatted. It's plausible this isn't the right behavior, but it also seems +# like it might be the right behavior since it is a literal block. (The doctest +# makes the Python code invalid.) +def rst_literal_skipped_doctest(): + """ + Do cool stuff:: + + >>> cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_not_indented(): + """ + .. code-block:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_wrong_language(): + """ + .. code-block:: rust + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This gets skipped for the same reason that the doctest in a literal block +# gets skipped. +def rst_directive_skipped_doctest(): + """ + .. code-block:: python + + >>> cool_stuff( 1 ) + + Done. + """ + pass ``` ## Outputs @@ -715,35 +1199,519 @@ def doctest_invalid_skipped_with_triple_double_in_single_quote_string(): >>> x = '\"\"\"' """ pass -``` -### Output 2 -``` -indent-style = space -line-width = 88 -indent-width = 2 -quote-style = Double -line-ending = LineFeed -magic-trailing-comma = Respect -docstring-code = Disabled -preview = Disabled -``` - -```python ############################################################################### -# DOCTEST CODE EXAMPLES +# reStructuredText CODE EXAMPLES # # This section shows examples of docstrings that contain code snippets in -# Python's "doctest" format. +# reStructuredText formatted code blocks. # -# See: https://docs.python.org/3/library/doctest.html +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#literal-blocks +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#literal-blocks +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-30 +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-38 ############################################################################### -# The simplest doctest to ensure basic formatting works. -def doctest_simple(): - """ - Do cool stuff. + +def rst_literal_simple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_literal_simple_continued(): + """ + Do cool stuff:: + + def cool_stuff( x ): + print( f"hi {x}" ); + + Done. + """ + pass + + +# Tests that we can end the literal block on the second +# to last line of the docstring. +def rst_literal_second_to_last(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + """ + pass + + +# Tests that we can end the literal block on the actual +# last line of the docstring. +def rst_literal_actually_last(): + """ + Do cool stuff:: + + cool_stuff( 1 )""" + pass + + +def rst_literal_with_blank_lines(): + """ + Do cool stuff:: + + def cool_stuff( x ): + print( f"hi {x}" ); + + def other_stuff( y ): + print( y ) + + Done. + """ + pass + + +# Extra blanks should be preserved. +def rst_literal_extra_blanks(): + """ + Do cool stuff:: + + + + cool_stuff( 1 ) + + + + Done. + """ + pass + + +# If a literal block is never properly ended (via a non-empty unindented line), +# then the end of the block should be the last non-empty line. And subsequent +# empty lines should be preserved as-is. +def rst_literal_extra_blanks_at_end(): + """ + Do cool stuff:: + + + cool_stuff( 1 ) + + + + """ + pass + + +# A literal block can contain many empty lines and it should not end the block +# if it continues. +def rst_literal_extra_blanks_in_snippet(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + + cool_stuff( 2 ) + + Done. + """ + pass + + +# This tests that a unindented line appearing after an indented line (but where +# the indent is still beyond the minimum) gets formatted properly. +def rst_literal_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( ''' + hiya''' ) + + Done. + """ + pass + + +# This checks that if the first line in a code snippet has been indented with +# tabs, then so long as its "indentation length" is considered bigger than the +# line with `::`, it is reformatted as code. +# +# (If your tabwidth is set to 4, then it looks like the code snippet +# isn't indented at all, which is perhaps counter-intuitive. Indeed, reST +# itself also seems to recognize this as a code block, although it appears +# under-specified.) +def rst_literal_first_line_indent_uses_tabs_4spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_4spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Another test with tabs, except in this case, if your tabwidth is less than +# 8, than the code snippet actually looks like its indent is *less* than the +# opening line with a `::`. One might presume this means that the code snippet +# is not treated as a literal block and thus not reformatted, but since we +# assume all tabs have tabwidth=8 when computing indentation length, the code +# snippet is actually seen as being more indented than the opening `::` line. +# As with the above example, reST seems to behave the same way here. +def rst_literal_first_line_indent_uses_tabs_8spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_8spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that if two lines in a literal block are indented to the same level +# but by different means (tabs versus spaces), then we correctly recognize the +# block and format it. +def rst_literal_first_line_tab_second_line_spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that when two lines in a code snippet have weird and inconsistent +# indentation, the code still gets formatted so long as the indent is greater +# than the indent of the `::` line. +# +# In this case, the minimum indent is 5 spaces (from the second line) where as +# the first line has an indent of 8 spaces via a tab (by assuming tabwidth=8). +# The minimum indent is stripped from each code line. Since tabs aren't +# divisible, the entire tab is stripped, which means the first and second lines +# wind up with the same level of indentation. +# +# An alternative behavior here would be that the tab is replaced with 3 spaces +# instead of being stripped entirely. The code snippet itself would then have +# inconsistent indentation to the point of being invalid Python, and thus code +# formatting would be skipped. +# +# I decided on the former behavior because it seems a bit easier to implement, +# but we might want to switch to the alternative if cases like this show up in +# the real world. ---AG +def rst_literal_odd_indentation(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that having a line with a lone `::` works as an introduction of a +# literal block. +def rst_literal_lone_colon(): + """ + Do cool stuff. + + :: + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_simple(): + """ + .. code-block:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_case_insensitive(): + """ + .. cOdE-bLoCk:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_sourcecode(): + """ + .. sourcecode:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_options(): + """ + .. code-block:: python + :linenos: + :emphasize-lines: 2,3 + :name: blah blah + + cool_stuff( 1 ) + cool_stuff( 2 ) + cool_stuff( 3 ) + cool_stuff( 4 ) + + Done. + """ + pass + + +# In this case, since `pycon` isn't recognized as a Python code snippet, the +# docstring reformatter ignores it. But it then picks up the doctest and +# reformats it. +def rst_directive_doctest(): + """ + .. code-block:: pycon + + >>> cool_stuff( 1 ) + + Done. + """ + pass + + +# This checks that if the first non-empty line after the start of a literal +# block is not indented more than the line containing the `::`, then it is not +# treated as a code snippet. +def rst_literal_skipped_first_line_not_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but inserts an indented line after the un-indented one. +# This should not cause the literal block to be resumed. +def rst_literal_skipped_first_line_not_indented_then_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# This also checks that a code snippet is not reformatted when the indentation +# of the first line is not more than the line with `::`, but this uses tabs to +# make it a little more confounding. It relies on the fact that indentation +# length is computed by assuming a tabwidth equal to 8. reST also rejects this +# and doesn't treat it as a literal block. +def rst_literal_skipped_first_line_not_indented_tab(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the previous test, but adds a second line. +def rst_literal_skipped_first_line_not_indented_tab_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that a code block with a second line that is not properly indented gets +# skipped. A valid code block needs to have an empty line separating these. +# +# One trick here is that we need to make sure the Python code in the snippet is +# valid, otherwise it would be skipped because of invalid Python. +def rst_literal_skipped_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( ''' + hiya''' ) + + Done. + """ + pass + + +# In this test, we write what looks like a code-block, but it should be treated +# as invalid due to the missing `language` argument. +# +# It does still look like it could be a literal block according to the literal +# rules, but we currently consider the `.. ` prefix to indicate that it is not +# a literal block. +def rst_literal_skipped_not_directive(): + """ + .. code-block:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# In this test, we start a line with `.. `, which makes it look like it might +# be a directive. But instead continue it as if it was just some periods from +# the previous line, and then try to end it by starting a literal block. +# +# But because of the `.. ` in the beginning, we wind up not treating this as a +# code snippet. The reST render I was using to test things does actually treat +# this as a code block, so we may be out of conformance here. +def rst_literal_skipped_possible_false_negative(): + """ + This is a test. + .. This is a test:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This tests that a doctest inside of a reST literal block doesn't get +# reformatted. It's plausible this isn't the right behavior, but it also seems +# like it might be the right behavior since it is a literal block. (The doctest +# makes the Python code invalid.) +def rst_literal_skipped_doctest(): + """ + Do cool stuff:: + + >>> cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_not_indented(): + """ + .. code-block:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_wrong_language(): + """ + .. code-block:: rust + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This gets skipped for the same reason that the doctest in a literal block +# gets skipped. +def rst_directive_skipped_doctest(): + """ + .. code-block:: python + + >>> cool_stuff( 1 ) + + Done. + """ + pass +``` + + +### Output 2 +``` +indent-style = space +line-width = 88 +indent-width = 2 +quote-style = Double +line-ending = LineFeed +magic-trailing-comma = Respect +docstring-code = Disabled +preview = Disabled +``` + +```python +############################################################################### +# DOCTEST CODE EXAMPLES +# +# This section shows examples of docstrings that contain code snippets in +# Python's "doctest" format. +# +# See: https://docs.python.org/3/library/doctest.html +############################################################################### + +# The simplest doctest to ensure basic formatting works. +def doctest_simple(): + """ + Do cool stuff. >>> cool_stuff( 1 ) 2 @@ -1075,9 +2043,493 @@ def doctest_skipped_fstring(): # correctly, but at time of writing it does not. def doctest_invalid_skipped_with_triple_double_in_single_quote_string(): """ - Do cool stuff. + Do cool stuff. + + >>> x = '\"\"\"' + """ + pass + + +############################################################################### +# reStructuredText CODE EXAMPLES +# +# This section shows examples of docstrings that contain code snippets in +# reStructuredText formatted code blocks. +# +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#literal-blocks +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#literal-blocks +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-30 +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-38 +############################################################################### + + +def rst_literal_simple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_literal_simple_continued(): + """ + Do cool stuff:: + + def cool_stuff( x ): + print( f"hi {x}" ); + + Done. + """ + pass + + +# Tests that we can end the literal block on the second +# to last line of the docstring. +def rst_literal_second_to_last(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + """ + pass + + +# Tests that we can end the literal block on the actual +# last line of the docstring. +def rst_literal_actually_last(): + """ + Do cool stuff:: + + cool_stuff( 1 )""" + pass + + +def rst_literal_with_blank_lines(): + """ + Do cool stuff:: + + def cool_stuff( x ): + print( f"hi {x}" ); + + def other_stuff( y ): + print( y ) + + Done. + """ + pass + + +# Extra blanks should be preserved. +def rst_literal_extra_blanks(): + """ + Do cool stuff:: + + + + cool_stuff( 1 ) + + + + Done. + """ + pass + + +# If a literal block is never properly ended (via a non-empty unindented line), +# then the end of the block should be the last non-empty line. And subsequent +# empty lines should be preserved as-is. +def rst_literal_extra_blanks_at_end(): + """ + Do cool stuff:: + + + cool_stuff( 1 ) + + + + """ + pass + + +# A literal block can contain many empty lines and it should not end the block +# if it continues. +def rst_literal_extra_blanks_in_snippet(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + + cool_stuff( 2 ) + + Done. + """ + pass + + +# This tests that a unindented line appearing after an indented line (but where +# the indent is still beyond the minimum) gets formatted properly. +def rst_literal_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( ''' + hiya''' ) + + Done. + """ + pass + + +# This checks that if the first line in a code snippet has been indented with +# tabs, then so long as its "indentation length" is considered bigger than the +# line with `::`, it is reformatted as code. +# +# (If your tabwidth is set to 4, then it looks like the code snippet +# isn't indented at all, which is perhaps counter-intuitive. Indeed, reST +# itself also seems to recognize this as a code block, although it appears +# under-specified.) +def rst_literal_first_line_indent_uses_tabs_4spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_4spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Another test with tabs, except in this case, if your tabwidth is less than +# 8, than the code snippet actually looks like its indent is *less* than the +# opening line with a `::`. One might presume this means that the code snippet +# is not treated as a literal block and thus not reformatted, but since we +# assume all tabs have tabwidth=8 when computing indentation length, the code +# snippet is actually seen as being more indented than the opening `::` line. +# As with the above example, reST seems to behave the same way here. +def rst_literal_first_line_indent_uses_tabs_8spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_8spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that if two lines in a literal block are indented to the same level +# but by different means (tabs versus spaces), then we correctly recognize the +# block and format it. +def rst_literal_first_line_tab_second_line_spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that when two lines in a code snippet have weird and inconsistent +# indentation, the code still gets formatted so long as the indent is greater +# than the indent of the `::` line. +# +# In this case, the minimum indent is 5 spaces (from the second line) where as +# the first line has an indent of 8 spaces via a tab (by assuming tabwidth=8). +# The minimum indent is stripped from each code line. Since tabs aren't +# divisible, the entire tab is stripped, which means the first and second lines +# wind up with the same level of indentation. +# +# An alternative behavior here would be that the tab is replaced with 3 spaces +# instead of being stripped entirely. The code snippet itself would then have +# inconsistent indentation to the point of being invalid Python, and thus code +# formatting would be skipped. +# +# I decided on the former behavior because it seems a bit easier to implement, +# but we might want to switch to the alternative if cases like this show up in +# the real world. ---AG +def rst_literal_odd_indentation(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that having a line with a lone `::` works as an introduction of a +# literal block. +def rst_literal_lone_colon(): + """ + Do cool stuff. + + :: + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_simple(): + """ + .. code-block:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_case_insensitive(): + """ + .. cOdE-bLoCk:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_sourcecode(): + """ + .. sourcecode:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_options(): + """ + .. code-block:: python + :linenos: + :emphasize-lines: 2,3 + :name: blah blah + + cool_stuff( 1 ) + cool_stuff( 2 ) + cool_stuff( 3 ) + cool_stuff( 4 ) + + Done. + """ + pass + + +# In this case, since `pycon` isn't recognized as a Python code snippet, the +# docstring reformatter ignores it. But it then picks up the doctest and +# reformats it. +def rst_directive_doctest(): + """ + .. code-block:: pycon + + >>> cool_stuff( 1 ) + + Done. + """ + pass + + +# This checks that if the first non-empty line after the start of a literal +# block is not indented more than the line containing the `::`, then it is not +# treated as a code snippet. +def rst_literal_skipped_first_line_not_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but inserts an indented line after the un-indented one. +# This should not cause the literal block to be resumed. +def rst_literal_skipped_first_line_not_indented_then_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# This also checks that a code snippet is not reformatted when the indentation +# of the first line is not more than the line with `::`, but this uses tabs to +# make it a little more confounding. It relies on the fact that indentation +# length is computed by assuming a tabwidth equal to 8. reST also rejects this +# and doesn't treat it as a literal block. +def rst_literal_skipped_first_line_not_indented_tab(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the previous test, but adds a second line. +def rst_literal_skipped_first_line_not_indented_tab_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that a code block with a second line that is not properly indented gets +# skipped. A valid code block needs to have an empty line separating these. +# +# One trick here is that we need to make sure the Python code in the snippet is +# valid, otherwise it would be skipped because of invalid Python. +def rst_literal_skipped_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( ''' + hiya''' ) + + Done. + """ + pass + + +# In this test, we write what looks like a code-block, but it should be treated +# as invalid due to the missing `language` argument. +# +# It does still look like it could be a literal block according to the literal +# rules, but we currently consider the `.. ` prefix to indicate that it is not +# a literal block. +def rst_literal_skipped_not_directive(): + """ + .. code-block:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# In this test, we start a line with `.. `, which makes it look like it might +# be a directive. But instead continue it as if it was just some periods from +# the previous line, and then try to end it by starting a literal block. +# +# But because of the `.. ` in the beginning, we wind up not treating this as a +# code snippet. The reST render I was using to test things does actually treat +# this as a code block, so we may be out of conformance here. +def rst_literal_skipped_possible_false_negative(): + """ + This is a test. + .. This is a test:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This tests that a doctest inside of a reST literal block doesn't get +# reformatted. It's plausible this isn't the right behavior, but it also seems +# like it might be the right behavior since it is a literal block. (The doctest +# makes the Python code invalid.) +def rst_literal_skipped_doctest(): + """ + Do cool stuff:: + + >>> cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_not_indented(): + """ + .. code-block:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_wrong_language(): + """ + .. code-block:: rust + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This gets skipped for the same reason that the doctest in a literal block +# gets skipped. +def rst_directive_skipped_doctest(): + """ + .. code-block:: python - >>> x = '\"\"\"' + >>> cool_stuff( 1 ) + + Done. """ pass ``` @@ -1435,14 +2887,498 @@ def doctest_skipped_fstring(): pass -# Test that a doctest containing a triple quoted string at least -# does not result in invalid Python code. Ideally this would format -# correctly, but at time of writing it does not. -def doctest_invalid_skipped_with_triple_double_in_single_quote_string(): +# Test that a doctest containing a triple quoted string at least +# does not result in invalid Python code. Ideally this would format +# correctly, but at time of writing it does not. +def doctest_invalid_skipped_with_triple_double_in_single_quote_string(): + """ + Do cool stuff. + + >>> x = '\"\"\"' + """ + pass + + +############################################################################### +# reStructuredText CODE EXAMPLES +# +# This section shows examples of docstrings that contain code snippets in +# reStructuredText formatted code blocks. +# +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#literal-blocks +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#literal-blocks +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-30 +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-38 +############################################################################### + + +def rst_literal_simple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_literal_simple_continued(): + """ + Do cool stuff:: + + def cool_stuff( x ): + print( f"hi {x}" ); + + Done. + """ + pass + + +# Tests that we can end the literal block on the second +# to last line of the docstring. +def rst_literal_second_to_last(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + """ + pass + + +# Tests that we can end the literal block on the actual +# last line of the docstring. +def rst_literal_actually_last(): + """ + Do cool stuff:: + + cool_stuff( 1 )""" + pass + + +def rst_literal_with_blank_lines(): + """ + Do cool stuff:: + + def cool_stuff( x ): + print( f"hi {x}" ); + + def other_stuff( y ): + print( y ) + + Done. + """ + pass + + +# Extra blanks should be preserved. +def rst_literal_extra_blanks(): + """ + Do cool stuff:: + + + + cool_stuff( 1 ) + + + + Done. + """ + pass + + +# If a literal block is never properly ended (via a non-empty unindented line), +# then the end of the block should be the last non-empty line. And subsequent +# empty lines should be preserved as-is. +def rst_literal_extra_blanks_at_end(): + """ + Do cool stuff:: + + + cool_stuff( 1 ) + + + + """ + pass + + +# A literal block can contain many empty lines and it should not end the block +# if it continues. +def rst_literal_extra_blanks_in_snippet(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + + cool_stuff( 2 ) + + Done. + """ + pass + + +# This tests that a unindented line appearing after an indented line (but where +# the indent is still beyond the minimum) gets formatted properly. +def rst_literal_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( ''' + hiya''' ) + + Done. + """ + pass + + +# This checks that if the first line in a code snippet has been indented with +# tabs, then so long as its "indentation length" is considered bigger than the +# line with `::`, it is reformatted as code. +# +# (If your tabwidth is set to 4, then it looks like the code snippet +# isn't indented at all, which is perhaps counter-intuitive. Indeed, reST +# itself also seems to recognize this as a code block, although it appears +# under-specified.) +def rst_literal_first_line_indent_uses_tabs_4spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_4spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Another test with tabs, except in this case, if your tabwidth is less than +# 8, than the code snippet actually looks like its indent is *less* than the +# opening line with a `::`. One might presume this means that the code snippet +# is not treated as a literal block and thus not reformatted, but since we +# assume all tabs have tabwidth=8 when computing indentation length, the code +# snippet is actually seen as being more indented than the opening `::` line. +# As with the above example, reST seems to behave the same way here. +def rst_literal_first_line_indent_uses_tabs_8spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_8spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that if two lines in a literal block are indented to the same level +# but by different means (tabs versus spaces), then we correctly recognize the +# block and format it. +def rst_literal_first_line_tab_second_line_spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that when two lines in a code snippet have weird and inconsistent +# indentation, the code still gets formatted so long as the indent is greater +# than the indent of the `::` line. +# +# In this case, the minimum indent is 5 spaces (from the second line) where as +# the first line has an indent of 8 spaces via a tab (by assuming tabwidth=8). +# The minimum indent is stripped from each code line. Since tabs aren't +# divisible, the entire tab is stripped, which means the first and second lines +# wind up with the same level of indentation. +# +# An alternative behavior here would be that the tab is replaced with 3 spaces +# instead of being stripped entirely. The code snippet itself would then have +# inconsistent indentation to the point of being invalid Python, and thus code +# formatting would be skipped. +# +# I decided on the former behavior because it seems a bit easier to implement, +# but we might want to switch to the alternative if cases like this show up in +# the real world. ---AG +def rst_literal_odd_indentation(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that having a line with a lone `::` works as an introduction of a +# literal block. +def rst_literal_lone_colon(): + """ + Do cool stuff. + + :: + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_simple(): + """ + .. code-block:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_case_insensitive(): + """ + .. cOdE-bLoCk:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_sourcecode(): + """ + .. sourcecode:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_options(): + """ + .. code-block:: python + :linenos: + :emphasize-lines: 2,3 + :name: blah blah + + cool_stuff( 1 ) + cool_stuff( 2 ) + cool_stuff( 3 ) + cool_stuff( 4 ) + + Done. + """ + pass + + +# In this case, since `pycon` isn't recognized as a Python code snippet, the +# docstring reformatter ignores it. But it then picks up the doctest and +# reformats it. +def rst_directive_doctest(): + """ + .. code-block:: pycon + + >>> cool_stuff( 1 ) + + Done. + """ + pass + + +# This checks that if the first non-empty line after the start of a literal +# block is not indented more than the line containing the `::`, then it is not +# treated as a code snippet. +def rst_literal_skipped_first_line_not_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but inserts an indented line after the un-indented one. +# This should not cause the literal block to be resumed. +def rst_literal_skipped_first_line_not_indented_then_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# This also checks that a code snippet is not reformatted when the indentation +# of the first line is not more than the line with `::`, but this uses tabs to +# make it a little more confounding. It relies on the fact that indentation +# length is computed by assuming a tabwidth equal to 8. reST also rejects this +# and doesn't treat it as a literal block. +def rst_literal_skipped_first_line_not_indented_tab(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the previous test, but adds a second line. +def rst_literal_skipped_first_line_not_indented_tab_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that a code block with a second line that is not properly indented gets +# skipped. A valid code block needs to have an empty line separating these. +# +# One trick here is that we need to make sure the Python code in the snippet is +# valid, otherwise it would be skipped because of invalid Python. +def rst_literal_skipped_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( ''' + hiya''' ) + + Done. + """ + pass + + +# In this test, we write what looks like a code-block, but it should be treated +# as invalid due to the missing `language` argument. +# +# It does still look like it could be a literal block according to the literal +# rules, but we currently consider the `.. ` prefix to indicate that it is not +# a literal block. +def rst_literal_skipped_not_directive(): + """ + .. code-block:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# In this test, we start a line with `.. `, which makes it look like it might +# be a directive. But instead continue it as if it was just some periods from +# the previous line, and then try to end it by starting a literal block. +# +# But because of the `.. ` in the beginning, we wind up not treating this as a +# code snippet. The reST render I was using to test things does actually treat +# this as a code block, so we may be out of conformance here. +def rst_literal_skipped_possible_false_negative(): + """ + This is a test. + .. This is a test:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This tests that a doctest inside of a reST literal block doesn't get +# reformatted. It's plausible this isn't the right behavior, but it also seems +# like it might be the right behavior since it is a literal block. (The doctest +# makes the Python code invalid.) +def rst_literal_skipped_doctest(): + """ + Do cool stuff:: + + >>> cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_not_indented(): + """ + .. code-block:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_wrong_language(): + """ + .. code-block:: rust + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This gets skipped for the same reason that the doctest in a literal block +# gets skipped. +def rst_directive_skipped_doctest(): """ - Do cool stuff. + .. code-block:: python - >>> x = '\"\"\"' + >>> cool_stuff( 1 ) + + Done. """ pass ``` @@ -1805,9 +3741,493 @@ def doctest_skipped_fstring(): # correctly, but at time of writing it does not. def doctest_invalid_skipped_with_triple_double_in_single_quote_string(): """ - Do cool stuff. + Do cool stuff. + + >>> x = '\"\"\"' + """ + pass + + +############################################################################### +# reStructuredText CODE EXAMPLES +# +# This section shows examples of docstrings that contain code snippets in +# reStructuredText formatted code blocks. +# +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#literal-blocks +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#literal-blocks +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-30 +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-38 +############################################################################### + + +def rst_literal_simple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_literal_simple_continued(): + """ + Do cool stuff:: + + def cool_stuff( x ): + print( f"hi {x}" ); + + Done. + """ + pass + + +# Tests that we can end the literal block on the second +# to last line of the docstring. +def rst_literal_second_to_last(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + """ + pass + + +# Tests that we can end the literal block on the actual +# last line of the docstring. +def rst_literal_actually_last(): + """ + Do cool stuff:: + + cool_stuff( 1 )""" + pass + + +def rst_literal_with_blank_lines(): + """ + Do cool stuff:: + + def cool_stuff( x ): + print( f"hi {x}" ); + + def other_stuff( y ): + print( y ) + + Done. + """ + pass + + +# Extra blanks should be preserved. +def rst_literal_extra_blanks(): + """ + Do cool stuff:: + + + + cool_stuff( 1 ) + + + + Done. + """ + pass + + +# If a literal block is never properly ended (via a non-empty unindented line), +# then the end of the block should be the last non-empty line. And subsequent +# empty lines should be preserved as-is. +def rst_literal_extra_blanks_at_end(): + """ + Do cool stuff:: + + + cool_stuff( 1 ) + + + + """ + pass + + +# A literal block can contain many empty lines and it should not end the block +# if it continues. +def rst_literal_extra_blanks_in_snippet(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + + cool_stuff( 2 ) + + Done. + """ + pass + + +# This tests that a unindented line appearing after an indented line (but where +# the indent is still beyond the minimum) gets formatted properly. +def rst_literal_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( ''' + hiya''' ) + + Done. + """ + pass + + +# This checks that if the first line in a code snippet has been indented with +# tabs, then so long as its "indentation length" is considered bigger than the +# line with `::`, it is reformatted as code. +# +# (If your tabwidth is set to 4, then it looks like the code snippet +# isn't indented at all, which is perhaps counter-intuitive. Indeed, reST +# itself also seems to recognize this as a code block, although it appears +# under-specified.) +def rst_literal_first_line_indent_uses_tabs_4spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_4spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Another test with tabs, except in this case, if your tabwidth is less than +# 8, than the code snippet actually looks like its indent is *less* than the +# opening line with a `::`. One might presume this means that the code snippet +# is not treated as a literal block and thus not reformatted, but since we +# assume all tabs have tabwidth=8 when computing indentation length, the code +# snippet is actually seen as being more indented than the opening `::` line. +# As with the above example, reST seems to behave the same way here. +def rst_literal_first_line_indent_uses_tabs_8spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_8spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that if two lines in a literal block are indented to the same level +# but by different means (tabs versus spaces), then we correctly recognize the +# block and format it. +def rst_literal_first_line_tab_second_line_spaces(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that when two lines in a code snippet have weird and inconsistent +# indentation, the code still gets formatted so long as the indent is greater +# than the indent of the `::` line. +# +# In this case, the minimum indent is 5 spaces (from the second line) where as +# the first line has an indent of 8 spaces via a tab (by assuming tabwidth=8). +# The minimum indent is stripped from each code line. Since tabs aren't +# divisible, the entire tab is stripped, which means the first and second lines +# wind up with the same level of indentation. +# +# An alternative behavior here would be that the tab is replaced with 3 spaces +# instead of being stripped entirely. The code snippet itself would then have +# inconsistent indentation to the point of being invalid Python, and thus code +# formatting would be skipped. +# +# I decided on the former behavior because it seems a bit easier to implement, +# but we might want to switch to the alternative if cases like this show up in +# the real world. ---AG +def rst_literal_odd_indentation(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that having a line with a lone `::` works as an introduction of a +# literal block. +def rst_literal_lone_colon(): + """ + Do cool stuff. + + :: + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_simple(): + """ + .. code-block:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_case_insensitive(): + """ + .. cOdE-bLoCk:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_sourcecode(): + """ + .. sourcecode:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_options(): + """ + .. code-block:: python + :linenos: + :emphasize-lines: 2,3 + :name: blah blah + + cool_stuff( 1 ) + cool_stuff( 2 ) + cool_stuff( 3 ) + cool_stuff( 4 ) + + Done. + """ + pass + + +# In this case, since `pycon` isn't recognized as a Python code snippet, the +# docstring reformatter ignores it. But it then picks up the doctest and +# reformats it. +def rst_directive_doctest(): + """ + .. code-block:: pycon + + >>> cool_stuff( 1 ) + + Done. + """ + pass + + +# This checks that if the first non-empty line after the start of a literal +# block is not indented more than the line containing the `::`, then it is not +# treated as a code snippet. +def rst_literal_skipped_first_line_not_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but inserts an indented line after the un-indented one. +# This should not cause the literal block to be resumed. +def rst_literal_skipped_first_line_not_indented_then_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# This also checks that a code snippet is not reformatted when the indentation +# of the first line is not more than the line with `::`, but this uses tabs to +# make it a little more confounding. It relies on the fact that indentation +# length is computed by assuming a tabwidth equal to 8. reST also rejects this +# and doesn't treat it as a literal block. +def rst_literal_skipped_first_line_not_indented_tab(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the previous test, but adds a second line. +def rst_literal_skipped_first_line_not_indented_tab_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that a code block with a second line that is not properly indented gets +# skipped. A valid code block needs to have an empty line separating these. +# +# One trick here is that we need to make sure the Python code in the snippet is +# valid, otherwise it would be skipped because of invalid Python. +def rst_literal_skipped_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( ''' + hiya''' ) + + Done. + """ + pass + + +# In this test, we write what looks like a code-block, but it should be treated +# as invalid due to the missing `language` argument. +# +# It does still look like it could be a literal block according to the literal +# rules, but we currently consider the `.. ` prefix to indicate that it is not +# a literal block. +def rst_literal_skipped_not_directive(): + """ + .. code-block:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# In this test, we start a line with `.. `, which makes it look like it might +# be a directive. But instead continue it as if it was just some periods from +# the previous line, and then try to end it by starting a literal block. +# +# But because of the `.. ` in the beginning, we wind up not treating this as a +# code snippet. The reST render I was using to test things does actually treat +# this as a code block, so we may be out of conformance here. +def rst_literal_skipped_possible_false_negative(): + """ + This is a test. + .. This is a test:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This tests that a doctest inside of a reST literal block doesn't get +# reformatted. It's plausible this isn't the right behavior, but it also seems +# like it might be the right behavior since it is a literal block. (The doctest +# makes the Python code invalid.) +def rst_literal_skipped_doctest(): + """ + Do cool stuff:: + + >>> cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_not_indented(): + """ + .. code-block:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_wrong_language(): + """ + .. code-block:: rust + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This gets skipped for the same reason that the doctest in a literal block +# gets skipped. +def rst_directive_skipped_doctest(): + """ + .. code-block:: python - >>> x = '\"\"\"' + >>> cool_stuff( 1 ) + + Done. """ pass ``` @@ -2165,14 +4585,501 @@ def doctest_skipped_fstring(): pass -# Test that a doctest containing a triple quoted string at least -# does not result in invalid Python code. Ideally this would format -# correctly, but at time of writing it does not. -def doctest_invalid_skipped_with_triple_double_in_single_quote_string(): +# Test that a doctest containing a triple quoted string at least +# does not result in invalid Python code. Ideally this would format +# correctly, but at time of writing it does not. +def doctest_invalid_skipped_with_triple_double_in_single_quote_string(): + """ + Do cool stuff. + + >>> x = '\"\"\"' + """ + pass + + +############################################################################### +# reStructuredText CODE EXAMPLES +# +# This section shows examples of docstrings that contain code snippets in +# reStructuredText formatted code blocks. +# +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#literal-blocks +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#literal-blocks +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-30 +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-38 +############################################################################### + + +def rst_literal_simple(): + """ + Do cool stuff:: + + cool_stuff(1) + + Done. + """ + pass + + +def rst_literal_simple_continued(): + """ + Do cool stuff:: + + def cool_stuff(x): + print(f"hi {x}") + + Done. + """ + pass + + +# Tests that we can end the literal block on the second +# to last line of the docstring. +def rst_literal_second_to_last(): + """ + Do cool stuff:: + + cool_stuff(1) + """ + pass + + +# Tests that we can end the literal block on the actual +# last line of the docstring. +def rst_literal_actually_last(): + """ + Do cool stuff:: + + cool_stuff(1)""" + pass + + +def rst_literal_with_blank_lines(): + """ + Do cool stuff:: + + def cool_stuff(x): + print(f"hi {x}") + + + def other_stuff(y): + print(y) + + Done. + """ + pass + + +# Extra blanks should be preserved. +def rst_literal_extra_blanks(): + """ + Do cool stuff:: + + + + cool_stuff(1) + + + + Done. + """ + pass + + +# If a literal block is never properly ended (via a non-empty unindented line), +# then the end of the block should be the last non-empty line. And subsequent +# empty lines should be preserved as-is. +def rst_literal_extra_blanks_at_end(): + """ + Do cool stuff:: + + + cool_stuff(1) + + + + """ + pass + + +# A literal block can contain many empty lines and it should not end the block +# if it continues. +def rst_literal_extra_blanks_in_snippet(): + """ + Do cool stuff:: + + cool_stuff(1) + + + cool_stuff(2) + + Done. + """ + pass + + +# This tests that a unindented line appearing after an indented line (but where +# the indent is still beyond the minimum) gets formatted properly. +def rst_literal_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( + ''' + hiya''' + ) + + Done. + """ + pass + + +# This checks that if the first line in a code snippet has been indented with +# tabs, then so long as its "indentation length" is considered bigger than the +# line with `::`, it is reformatted as code. +# +# (If your tabwidth is set to 4, then it looks like the code snippet +# isn't indented at all, which is perhaps counter-intuitive. Indeed, reST +# itself also seems to recognize this as a code block, although it appears +# under-specified.) +def rst_literal_first_line_indent_uses_tabs_4spaces(): + """ + Do cool stuff:: + + cool_stuff(1) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_4spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff(1) + cool_stuff(2) + + Done. + """ + pass + + +# Another test with tabs, except in this case, if your tabwidth is less than +# 8, than the code snippet actually looks like its indent is *less* than the +# opening line with a `::`. One might presume this means that the code snippet +# is not treated as a literal block and thus not reformatted, but since we +# assume all tabs have tabwidth=8 when computing indentation length, the code +# snippet is actually seen as being more indented than the opening `::` line. +# As with the above example, reST seems to behave the same way here. +def rst_literal_first_line_indent_uses_tabs_8spaces(): + """ + Do cool stuff:: + + cool_stuff(1) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_8spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff(1) + cool_stuff(2) + + Done. + """ + pass + + +# Tests that if two lines in a literal block are indented to the same level +# but by different means (tabs versus spaces), then we correctly recognize the +# block and format it. +def rst_literal_first_line_tab_second_line_spaces(): + """ + Do cool stuff:: + + cool_stuff(1) + cool_stuff(2) + + Done. + """ + pass + + +# Tests that when two lines in a code snippet have weird and inconsistent +# indentation, the code still gets formatted so long as the indent is greater +# than the indent of the `::` line. +# +# In this case, the minimum indent is 5 spaces (from the second line) where as +# the first line has an indent of 8 spaces via a tab (by assuming tabwidth=8). +# The minimum indent is stripped from each code line. Since tabs aren't +# divisible, the entire tab is stripped, which means the first and second lines +# wind up with the same level of indentation. +# +# An alternative behavior here would be that the tab is replaced with 3 spaces +# instead of being stripped entirely. The code snippet itself would then have +# inconsistent indentation to the point of being invalid Python, and thus code +# formatting would be skipped. +# +# I decided on the former behavior because it seems a bit easier to implement, +# but we might want to switch to the alternative if cases like this show up in +# the real world. ---AG +def rst_literal_odd_indentation(): + """ + Do cool stuff:: + + cool_stuff(1) + cool_stuff(2) + + Done. + """ + pass + + +# Tests that having a line with a lone `::` works as an introduction of a +# literal block. +def rst_literal_lone_colon(): + """ + Do cool stuff. + + :: + + cool_stuff(1) + + Done. + """ + pass + + +def rst_directive_simple(): + """ + .. code-block:: python + + cool_stuff(1) + + Done. + """ + pass + + +def rst_directive_case_insensitive(): + """ + .. cOdE-bLoCk:: python + + cool_stuff(1) + + Done. + """ + pass + + +def rst_directive_sourcecode(): + """ + .. sourcecode:: python + + cool_stuff(1) + + Done. + """ + pass + + +def rst_directive_options(): + """ + .. code-block:: python + :linenos: + :emphasize-lines: 2,3 + :name: blah blah + + cool_stuff(1) + cool_stuff(2) + cool_stuff(3) + cool_stuff(4) + + Done. + """ + pass + + +# In this case, since `pycon` isn't recognized as a Python code snippet, the +# docstring reformatter ignores it. But it then picks up the doctest and +# reformats it. +def rst_directive_doctest(): + """ + .. code-block:: pycon + + >>> cool_stuff(1) + + Done. + """ + pass + + +# This checks that if the first non-empty line after the start of a literal +# block is not indented more than the line containing the `::`, then it is not +# treated as a code snippet. +def rst_literal_skipped_first_line_not_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but inserts an indented line after the un-indented one. +# This should not cause the literal block to be resumed. +def rst_literal_skipped_first_line_not_indented_then_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# This also checks that a code snippet is not reformatted when the indentation +# of the first line is not more than the line with `::`, but this uses tabs to +# make it a little more confounding. It relies on the fact that indentation +# length is computed by assuming a tabwidth equal to 8. reST also rejects this +# and doesn't treat it as a literal block. +def rst_literal_skipped_first_line_not_indented_tab(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the previous test, but adds a second line. +def rst_literal_skipped_first_line_not_indented_tab_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that a code block with a second line that is not properly indented gets +# skipped. A valid code block needs to have an empty line separating these. +# +# One trick here is that we need to make sure the Python code in the snippet is +# valid, otherwise it would be skipped because of invalid Python. +def rst_literal_skipped_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( ''' + hiya''' ) + + Done. + """ + pass + + +# In this test, we write what looks like a code-block, but it should be treated +# as invalid due to the missing `language` argument. +# +# It does still look like it could be a literal block according to the literal +# rules, but we currently consider the `.. ` prefix to indicate that it is not +# a literal block. +def rst_literal_skipped_not_directive(): + """ + .. code-block:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# In this test, we start a line with `.. `, which makes it look like it might +# be a directive. But instead continue it as if it was just some periods from +# the previous line, and then try to end it by starting a literal block. +# +# But because of the `.. ` in the beginning, we wind up not treating this as a +# code snippet. The reST render I was using to test things does actually treat +# this as a code block, so we may be out of conformance here. +def rst_literal_skipped_possible_false_negative(): + """ + This is a test. + .. This is a test:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This tests that a doctest inside of a reST literal block doesn't get +# reformatted. It's plausible this isn't the right behavior, but it also seems +# like it might be the right behavior since it is a literal block. (The doctest +# makes the Python code invalid.) +def rst_literal_skipped_doctest(): + """ + Do cool stuff:: + + >>> cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_not_indented(): + """ + .. code-block:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_wrong_language(): + """ + .. code-block:: rust + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This gets skipped for the same reason that the doctest in a literal block +# gets skipped. +def rst_directive_skipped_doctest(): """ - Do cool stuff. + .. code-block:: python - >>> x = '\"\"\"' + >>> cool_stuff( 1 ) + + Done. """ pass ``` @@ -2535,9 +5442,496 @@ def doctest_skipped_fstring(): # correctly, but at time of writing it does not. def doctest_invalid_skipped_with_triple_double_in_single_quote_string(): """ - Do cool stuff. + Do cool stuff. + + >>> x = '\"\"\"' + """ + pass + + +############################################################################### +# reStructuredText CODE EXAMPLES +# +# This section shows examples of docstrings that contain code snippets in +# reStructuredText formatted code blocks. +# +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#literal-blocks +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#literal-blocks +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-30 +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-38 +############################################################################### + + +def rst_literal_simple(): + """ + Do cool stuff:: + + cool_stuff(1) + + Done. + """ + pass + + +def rst_literal_simple_continued(): + """ + Do cool stuff:: + + def cool_stuff(x): + print(f"hi {x}") + + Done. + """ + pass + + +# Tests that we can end the literal block on the second +# to last line of the docstring. +def rst_literal_second_to_last(): + """ + Do cool stuff:: + + cool_stuff(1) + """ + pass + + +# Tests that we can end the literal block on the actual +# last line of the docstring. +def rst_literal_actually_last(): + """ + Do cool stuff:: + + cool_stuff(1)""" + pass + + +def rst_literal_with_blank_lines(): + """ + Do cool stuff:: + + def cool_stuff(x): + print(f"hi {x}") + + + def other_stuff(y): + print(y) + + Done. + """ + pass + + +# Extra blanks should be preserved. +def rst_literal_extra_blanks(): + """ + Do cool stuff:: + + + + cool_stuff(1) + + + + Done. + """ + pass + + +# If a literal block is never properly ended (via a non-empty unindented line), +# then the end of the block should be the last non-empty line. And subsequent +# empty lines should be preserved as-is. +def rst_literal_extra_blanks_at_end(): + """ + Do cool stuff:: + + + cool_stuff(1) + + + + """ + pass + + +# A literal block can contain many empty lines and it should not end the block +# if it continues. +def rst_literal_extra_blanks_in_snippet(): + """ + Do cool stuff:: + + cool_stuff(1) + + + cool_stuff(2) + + Done. + """ + pass + + +# This tests that a unindented line appearing after an indented line (but where +# the indent is still beyond the minimum) gets formatted properly. +def rst_literal_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( + ''' + hiya''' + ) + + Done. + """ + pass + + +# This checks that if the first line in a code snippet has been indented with +# tabs, then so long as its "indentation length" is considered bigger than the +# line with `::`, it is reformatted as code. +# +# (If your tabwidth is set to 4, then it looks like the code snippet +# isn't indented at all, which is perhaps counter-intuitive. Indeed, reST +# itself also seems to recognize this as a code block, although it appears +# under-specified.) +def rst_literal_first_line_indent_uses_tabs_4spaces(): + """ + Do cool stuff:: + + cool_stuff(1) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_4spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff(1) + cool_stuff(2) + + Done. + """ + pass + + +# Another test with tabs, except in this case, if your tabwidth is less than +# 8, than the code snippet actually looks like its indent is *less* than the +# opening line with a `::`. One might presume this means that the code snippet +# is not treated as a literal block and thus not reformatted, but since we +# assume all tabs have tabwidth=8 when computing indentation length, the code +# snippet is actually seen as being more indented than the opening `::` line. +# As with the above example, reST seems to behave the same way here. +def rst_literal_first_line_indent_uses_tabs_8spaces(): + """ + Do cool stuff:: + + cool_stuff(1) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_8spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff(1) + cool_stuff(2) + + Done. + """ + pass + + +# Tests that if two lines in a literal block are indented to the same level +# but by different means (tabs versus spaces), then we correctly recognize the +# block and format it. +def rst_literal_first_line_tab_second_line_spaces(): + """ + Do cool stuff:: + + cool_stuff(1) + cool_stuff(2) + + Done. + """ + pass + + +# Tests that when two lines in a code snippet have weird and inconsistent +# indentation, the code still gets formatted so long as the indent is greater +# than the indent of the `::` line. +# +# In this case, the minimum indent is 5 spaces (from the second line) where as +# the first line has an indent of 8 spaces via a tab (by assuming tabwidth=8). +# The minimum indent is stripped from each code line. Since tabs aren't +# divisible, the entire tab is stripped, which means the first and second lines +# wind up with the same level of indentation. +# +# An alternative behavior here would be that the tab is replaced with 3 spaces +# instead of being stripped entirely. The code snippet itself would then have +# inconsistent indentation to the point of being invalid Python, and thus code +# formatting would be skipped. +# +# I decided on the former behavior because it seems a bit easier to implement, +# but we might want to switch to the alternative if cases like this show up in +# the real world. ---AG +def rst_literal_odd_indentation(): + """ + Do cool stuff:: + + cool_stuff(1) + cool_stuff(2) + + Done. + """ + pass + + +# Tests that having a line with a lone `::` works as an introduction of a +# literal block. +def rst_literal_lone_colon(): + """ + Do cool stuff. + + :: + + cool_stuff(1) + + Done. + """ + pass + + +def rst_directive_simple(): + """ + .. code-block:: python + + cool_stuff(1) + + Done. + """ + pass + + +def rst_directive_case_insensitive(): + """ + .. cOdE-bLoCk:: python + + cool_stuff(1) + + Done. + """ + pass + + +def rst_directive_sourcecode(): + """ + .. sourcecode:: python + + cool_stuff(1) + + Done. + """ + pass + + +def rst_directive_options(): + """ + .. code-block:: python + :linenos: + :emphasize-lines: 2,3 + :name: blah blah + + cool_stuff(1) + cool_stuff(2) + cool_stuff(3) + cool_stuff(4) + + Done. + """ + pass + + +# In this case, since `pycon` isn't recognized as a Python code snippet, the +# docstring reformatter ignores it. But it then picks up the doctest and +# reformats it. +def rst_directive_doctest(): + """ + .. code-block:: pycon + + >>> cool_stuff(1) + + Done. + """ + pass + + +# This checks that if the first non-empty line after the start of a literal +# block is not indented more than the line containing the `::`, then it is not +# treated as a code snippet. +def rst_literal_skipped_first_line_not_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but inserts an indented line after the un-indented one. +# This should not cause the literal block to be resumed. +def rst_literal_skipped_first_line_not_indented_then_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# This also checks that a code snippet is not reformatted when the indentation +# of the first line is not more than the line with `::`, but this uses tabs to +# make it a little more confounding. It relies on the fact that indentation +# length is computed by assuming a tabwidth equal to 8. reST also rejects this +# and doesn't treat it as a literal block. +def rst_literal_skipped_first_line_not_indented_tab(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the previous test, but adds a second line. +def rst_literal_skipped_first_line_not_indented_tab_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that a code block with a second line that is not properly indented gets +# skipped. A valid code block needs to have an empty line separating these. +# +# One trick here is that we need to make sure the Python code in the snippet is +# valid, otherwise it would be skipped because of invalid Python. +def rst_literal_skipped_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( ''' + hiya''' ) + + Done. + """ + pass + + +# In this test, we write what looks like a code-block, but it should be treated +# as invalid due to the missing `language` argument. +# +# It does still look like it could be a literal block according to the literal +# rules, but we currently consider the `.. ` prefix to indicate that it is not +# a literal block. +def rst_literal_skipped_not_directive(): + """ + .. code-block:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# In this test, we start a line with `.. `, which makes it look like it might +# be a directive. But instead continue it as if it was just some periods from +# the previous line, and then try to end it by starting a literal block. +# +# But because of the `.. ` in the beginning, we wind up not treating this as a +# code snippet. The reST render I was using to test things does actually treat +# this as a code block, so we may be out of conformance here. +def rst_literal_skipped_possible_false_negative(): + """ + This is a test. + .. This is a test:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This tests that a doctest inside of a reST literal block doesn't get +# reformatted. It's plausible this isn't the right behavior, but it also seems +# like it might be the right behavior since it is a literal block. (The doctest +# makes the Python code invalid.) +def rst_literal_skipped_doctest(): + """ + Do cool stuff:: + + >>> cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_not_indented(): + """ + .. code-block:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_wrong_language(): + """ + .. code-block:: rust + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This gets skipped for the same reason that the doctest in a literal block +# gets skipped. +def rst_directive_skipped_doctest(): + """ + .. code-block:: python - >>> x = '\"\"\"' + >>> cool_stuff( 1 ) + + Done. """ pass ``` @@ -2895,14 +6289,501 @@ def doctest_skipped_fstring(): pass -# Test that a doctest containing a triple quoted string at least -# does not result in invalid Python code. Ideally this would format -# correctly, but at time of writing it does not. -def doctest_invalid_skipped_with_triple_double_in_single_quote_string(): +# Test that a doctest containing a triple quoted string at least +# does not result in invalid Python code. Ideally this would format +# correctly, but at time of writing it does not. +def doctest_invalid_skipped_with_triple_double_in_single_quote_string(): + """ + Do cool stuff. + + >>> x = '\"\"\"' + """ + pass + + +############################################################################### +# reStructuredText CODE EXAMPLES +# +# This section shows examples of docstrings that contain code snippets in +# reStructuredText formatted code blocks. +# +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#literal-blocks +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#literal-blocks +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-30 +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-38 +############################################################################### + + +def rst_literal_simple(): + """ + Do cool stuff:: + + cool_stuff(1) + + Done. + """ + pass + + +def rst_literal_simple_continued(): + """ + Do cool stuff:: + + def cool_stuff(x): + print(f"hi {x}") + + Done. + """ + pass + + +# Tests that we can end the literal block on the second +# to last line of the docstring. +def rst_literal_second_to_last(): + """ + Do cool stuff:: + + cool_stuff(1) + """ + pass + + +# Tests that we can end the literal block on the actual +# last line of the docstring. +def rst_literal_actually_last(): + """ + Do cool stuff:: + + cool_stuff(1)""" + pass + + +def rst_literal_with_blank_lines(): + """ + Do cool stuff:: + + def cool_stuff(x): + print(f"hi {x}") + + + def other_stuff(y): + print(y) + + Done. + """ + pass + + +# Extra blanks should be preserved. +def rst_literal_extra_blanks(): + """ + Do cool stuff:: + + + + cool_stuff(1) + + + + Done. + """ + pass + + +# If a literal block is never properly ended (via a non-empty unindented line), +# then the end of the block should be the last non-empty line. And subsequent +# empty lines should be preserved as-is. +def rst_literal_extra_blanks_at_end(): + """ + Do cool stuff:: + + + cool_stuff(1) + + + + """ + pass + + +# A literal block can contain many empty lines and it should not end the block +# if it continues. +def rst_literal_extra_blanks_in_snippet(): + """ + Do cool stuff:: + + cool_stuff(1) + + + cool_stuff(2) + + Done. + """ + pass + + +# This tests that a unindented line appearing after an indented line (but where +# the indent is still beyond the minimum) gets formatted properly. +def rst_literal_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( + ''' + hiya''' + ) + + Done. + """ + pass + + +# This checks that if the first line in a code snippet has been indented with +# tabs, then so long as its "indentation length" is considered bigger than the +# line with `::`, it is reformatted as code. +# +# (If your tabwidth is set to 4, then it looks like the code snippet +# isn't indented at all, which is perhaps counter-intuitive. Indeed, reST +# itself also seems to recognize this as a code block, although it appears +# under-specified.) +def rst_literal_first_line_indent_uses_tabs_4spaces(): + """ + Do cool stuff:: + + cool_stuff(1) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_4spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff(1) + cool_stuff(2) + + Done. + """ + pass + + +# Another test with tabs, except in this case, if your tabwidth is less than +# 8, than the code snippet actually looks like its indent is *less* than the +# opening line with a `::`. One might presume this means that the code snippet +# is not treated as a literal block and thus not reformatted, but since we +# assume all tabs have tabwidth=8 when computing indentation length, the code +# snippet is actually seen as being more indented than the opening `::` line. +# As with the above example, reST seems to behave the same way here. +def rst_literal_first_line_indent_uses_tabs_8spaces(): + """ + Do cool stuff:: + + cool_stuff(1) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_8spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff(1) + cool_stuff(2) + + Done. + """ + pass + + +# Tests that if two lines in a literal block are indented to the same level +# but by different means (tabs versus spaces), then we correctly recognize the +# block and format it. +def rst_literal_first_line_tab_second_line_spaces(): + """ + Do cool stuff:: + + cool_stuff(1) + cool_stuff(2) + + Done. + """ + pass + + +# Tests that when two lines in a code snippet have weird and inconsistent +# indentation, the code still gets formatted so long as the indent is greater +# than the indent of the `::` line. +# +# In this case, the minimum indent is 5 spaces (from the second line) where as +# the first line has an indent of 8 spaces via a tab (by assuming tabwidth=8). +# The minimum indent is stripped from each code line. Since tabs aren't +# divisible, the entire tab is stripped, which means the first and second lines +# wind up with the same level of indentation. +# +# An alternative behavior here would be that the tab is replaced with 3 spaces +# instead of being stripped entirely. The code snippet itself would then have +# inconsistent indentation to the point of being invalid Python, and thus code +# formatting would be skipped. +# +# I decided on the former behavior because it seems a bit easier to implement, +# but we might want to switch to the alternative if cases like this show up in +# the real world. ---AG +def rst_literal_odd_indentation(): + """ + Do cool stuff:: + + cool_stuff(1) + cool_stuff(2) + + Done. + """ + pass + + +# Tests that having a line with a lone `::` works as an introduction of a +# literal block. +def rst_literal_lone_colon(): + """ + Do cool stuff. + + :: + + cool_stuff(1) + + Done. + """ + pass + + +def rst_directive_simple(): + """ + .. code-block:: python + + cool_stuff(1) + + Done. + """ + pass + + +def rst_directive_case_insensitive(): + """ + .. cOdE-bLoCk:: python + + cool_stuff(1) + + Done. + """ + pass + + +def rst_directive_sourcecode(): + """ + .. sourcecode:: python + + cool_stuff(1) + + Done. + """ + pass + + +def rst_directive_options(): + """ + .. code-block:: python + :linenos: + :emphasize-lines: 2,3 + :name: blah blah + + cool_stuff(1) + cool_stuff(2) + cool_stuff(3) + cool_stuff(4) + + Done. + """ + pass + + +# In this case, since `pycon` isn't recognized as a Python code snippet, the +# docstring reformatter ignores it. But it then picks up the doctest and +# reformats it. +def rst_directive_doctest(): + """ + .. code-block:: pycon + + >>> cool_stuff(1) + + Done. + """ + pass + + +# This checks that if the first non-empty line after the start of a literal +# block is not indented more than the line containing the `::`, then it is not +# treated as a code snippet. +def rst_literal_skipped_first_line_not_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but inserts an indented line after the un-indented one. +# This should not cause the literal block to be resumed. +def rst_literal_skipped_first_line_not_indented_then_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# This also checks that a code snippet is not reformatted when the indentation +# of the first line is not more than the line with `::`, but this uses tabs to +# make it a little more confounding. It relies on the fact that indentation +# length is computed by assuming a tabwidth equal to 8. reST also rejects this +# and doesn't treat it as a literal block. +def rst_literal_skipped_first_line_not_indented_tab(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the previous test, but adds a second line. +def rst_literal_skipped_first_line_not_indented_tab_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that a code block with a second line that is not properly indented gets +# skipped. A valid code block needs to have an empty line separating these. +# +# One trick here is that we need to make sure the Python code in the snippet is +# valid, otherwise it would be skipped because of invalid Python. +def rst_literal_skipped_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( ''' + hiya''' ) + + Done. + """ + pass + + +# In this test, we write what looks like a code-block, but it should be treated +# as invalid due to the missing `language` argument. +# +# It does still look like it could be a literal block according to the literal +# rules, but we currently consider the `.. ` prefix to indicate that it is not +# a literal block. +def rst_literal_skipped_not_directive(): + """ + .. code-block:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# In this test, we start a line with `.. `, which makes it look like it might +# be a directive. But instead continue it as if it was just some periods from +# the previous line, and then try to end it by starting a literal block. +# +# But because of the `.. ` in the beginning, we wind up not treating this as a +# code snippet. The reST render I was using to test things does actually treat +# this as a code block, so we may be out of conformance here. +def rst_literal_skipped_possible_false_negative(): + """ + This is a test. + .. This is a test:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This tests that a doctest inside of a reST literal block doesn't get +# reformatted. It's plausible this isn't the right behavior, but it also seems +# like it might be the right behavior since it is a literal block. (The doctest +# makes the Python code invalid.) +def rst_literal_skipped_doctest(): + """ + Do cool stuff:: + + >>> cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_not_indented(): + """ + .. code-block:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_wrong_language(): + """ + .. code-block:: rust + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This gets skipped for the same reason that the doctest in a literal block +# gets skipped. +def rst_directive_skipped_doctest(): """ - Do cool stuff. + .. code-block:: python - >>> x = '\"\"\"' + >>> cool_stuff( 1 ) + + Done. """ pass ``` @@ -3270,6 +7151,493 @@ def doctest_invalid_skipped_with_triple_double_in_single_quote_string(): >>> x = '\"\"\"' """ pass + + +############################################################################### +# reStructuredText CODE EXAMPLES +# +# This section shows examples of docstrings that contain code snippets in +# reStructuredText formatted code blocks. +# +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#literal-blocks +# See: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#literal-blocks +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-30 +# See: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#toc-entry-38 +############################################################################### + + +def rst_literal_simple(): + """ + Do cool stuff:: + + cool_stuff(1) + + Done. + """ + pass + + +def rst_literal_simple_continued(): + """ + Do cool stuff:: + + def cool_stuff(x): + print(f"hi {x}") + + Done. + """ + pass + + +# Tests that we can end the literal block on the second +# to last line of the docstring. +def rst_literal_second_to_last(): + """ + Do cool stuff:: + + cool_stuff(1) + """ + pass + + +# Tests that we can end the literal block on the actual +# last line of the docstring. +def rst_literal_actually_last(): + """ + Do cool stuff:: + + cool_stuff(1)""" + pass + + +def rst_literal_with_blank_lines(): + """ + Do cool stuff:: + + def cool_stuff(x): + print(f"hi {x}") + + + def other_stuff(y): + print(y) + + Done. + """ + pass + + +# Extra blanks should be preserved. +def rst_literal_extra_blanks(): + """ + Do cool stuff:: + + + + cool_stuff(1) + + + + Done. + """ + pass + + +# If a literal block is never properly ended (via a non-empty unindented line), +# then the end of the block should be the last non-empty line. And subsequent +# empty lines should be preserved as-is. +def rst_literal_extra_blanks_at_end(): + """ + Do cool stuff:: + + + cool_stuff(1) + + + + """ + pass + + +# A literal block can contain many empty lines and it should not end the block +# if it continues. +def rst_literal_extra_blanks_in_snippet(): + """ + Do cool stuff:: + + cool_stuff(1) + + + cool_stuff(2) + + Done. + """ + pass + + +# This tests that a unindented line appearing after an indented line (but where +# the indent is still beyond the minimum) gets formatted properly. +def rst_literal_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( + ''' + hiya''' + ) + + Done. + """ + pass + + +# This checks that if the first line in a code snippet has been indented with +# tabs, then so long as its "indentation length" is considered bigger than the +# line with `::`, it is reformatted as code. +# +# (If your tabwidth is set to 4, then it looks like the code snippet +# isn't indented at all, which is perhaps counter-intuitive. Indeed, reST +# itself also seems to recognize this as a code block, although it appears +# under-specified.) +def rst_literal_first_line_indent_uses_tabs_4spaces(): + """ + Do cool stuff:: + + cool_stuff(1) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_4spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff(1) + cool_stuff(2) + + Done. + """ + pass + + +# Another test with tabs, except in this case, if your tabwidth is less than +# 8, than the code snippet actually looks like its indent is *less* than the +# opening line with a `::`. One might presume this means that the code snippet +# is not treated as a literal block and thus not reformatted, but since we +# assume all tabs have tabwidth=8 when computing indentation length, the code +# snippet is actually seen as being more indented than the opening `::` line. +# As with the above example, reST seems to behave the same way here. +def rst_literal_first_line_indent_uses_tabs_8spaces(): + """ + Do cool stuff:: + + cool_stuff(1) + + Done. + """ + pass + + +# Like the test above, but with multiple lines. +def rst_literal_first_line_indent_uses_tabs_8spaces_multiple(): + """ + Do cool stuff:: + + cool_stuff(1) + cool_stuff(2) + + Done. + """ + pass + + +# Tests that if two lines in a literal block are indented to the same level +# but by different means (tabs versus spaces), then we correctly recognize the +# block and format it. +def rst_literal_first_line_tab_second_line_spaces(): + """ + Do cool stuff:: + + cool_stuff(1) + cool_stuff(2) + + Done. + """ + pass + + +# Tests that when two lines in a code snippet have weird and inconsistent +# indentation, the code still gets formatted so long as the indent is greater +# than the indent of the `::` line. +# +# In this case, the minimum indent is 5 spaces (from the second line) where as +# the first line has an indent of 8 spaces via a tab (by assuming tabwidth=8). +# The minimum indent is stripped from each code line. Since tabs aren't +# divisible, the entire tab is stripped, which means the first and second lines +# wind up with the same level of indentation. +# +# An alternative behavior here would be that the tab is replaced with 3 spaces +# instead of being stripped entirely. The code snippet itself would then have +# inconsistent indentation to the point of being invalid Python, and thus code +# formatting would be skipped. +# +# I decided on the former behavior because it seems a bit easier to implement, +# but we might want to switch to the alternative if cases like this show up in +# the real world. ---AG +def rst_literal_odd_indentation(): + """ + Do cool stuff:: + + cool_stuff(1) + cool_stuff(2) + + Done. + """ + pass + + +# Tests that having a line with a lone `::` works as an introduction of a +# literal block. +def rst_literal_lone_colon(): + """ + Do cool stuff. + + :: + + cool_stuff(1) + + Done. + """ + pass + + +def rst_directive_simple(): + """ + .. code-block:: python + + cool_stuff(1) + + Done. + """ + pass + + +def rst_directive_case_insensitive(): + """ + .. cOdE-bLoCk:: python + + cool_stuff(1) + + Done. + """ + pass + + +def rst_directive_sourcecode(): + """ + .. sourcecode:: python + + cool_stuff(1) + + Done. + """ + pass + + +def rst_directive_options(): + """ + .. code-block:: python + :linenos: + :emphasize-lines: 2,3 + :name: blah blah + + cool_stuff(1) + cool_stuff(2) + cool_stuff(3) + cool_stuff(4) + + Done. + """ + pass + + +# In this case, since `pycon` isn't recognized as a Python code snippet, the +# docstring reformatter ignores it. But it then picks up the doctest and +# reformats it. +def rst_directive_doctest(): + """ + .. code-block:: pycon + + >>> cool_stuff(1) + + Done. + """ + pass + + +# This checks that if the first non-empty line after the start of a literal +# block is not indented more than the line containing the `::`, then it is not +# treated as a code snippet. +def rst_literal_skipped_first_line_not_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the test above, but inserts an indented line after the un-indented one. +# This should not cause the literal block to be resumed. +def rst_literal_skipped_first_line_not_indented_then_indented(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# This also checks that a code snippet is not reformatted when the indentation +# of the first line is not more than the line with `::`, but this uses tabs to +# make it a little more confounding. It relies on the fact that indentation +# length is computed by assuming a tabwidth equal to 8. reST also rejects this +# and doesn't treat it as a literal block. +def rst_literal_skipped_first_line_not_indented_tab(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# Like the previous test, but adds a second line. +def rst_literal_skipped_first_line_not_indented_tab_multiple(): + """ + Do cool stuff:: + + cool_stuff( 1 ) + cool_stuff( 2 ) + + Done. + """ + pass + + +# Tests that a code block with a second line that is not properly indented gets +# skipped. A valid code block needs to have an empty line separating these. +# +# One trick here is that we need to make sure the Python code in the snippet is +# valid, otherwise it would be skipped because of invalid Python. +def rst_literal_skipped_subsequent_line_not_indented(): + """ + Do cool stuff:: + + if True: + cool_stuff( ''' + hiya''' ) + + Done. + """ + pass + + +# In this test, we write what looks like a code-block, but it should be treated +# as invalid due to the missing `language` argument. +# +# It does still look like it could be a literal block according to the literal +# rules, but we currently consider the `.. ` prefix to indicate that it is not +# a literal block. +def rst_literal_skipped_not_directive(): + """ + .. code-block:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# In this test, we start a line with `.. `, which makes it look like it might +# be a directive. But instead continue it as if it was just some periods from +# the previous line, and then try to end it by starting a literal block. +# +# But because of the `.. ` in the beginning, we wind up not treating this as a +# code snippet. The reST render I was using to test things does actually treat +# this as a code block, so we may be out of conformance here. +def rst_literal_skipped_possible_false_negative(): + """ + This is a test. + .. This is a test:: + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This tests that a doctest inside of a reST literal block doesn't get +# reformatted. It's plausible this isn't the right behavior, but it also seems +# like it might be the right behavior since it is a literal block. (The doctest +# makes the Python code invalid.) +def rst_literal_skipped_doctest(): + """ + Do cool stuff:: + + >>> cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_not_indented(): + """ + .. code-block:: python + + cool_stuff( 1 ) + + Done. + """ + pass + + +def rst_directive_skipped_wrong_language(): + """ + .. code-block:: rust + + cool_stuff( 1 ) + + Done. + """ + pass + + +# This gets skipped for the same reason that the doctest in a literal block +# gets skipped. +def rst_directive_skipped_doctest(): + """ + .. code-block:: python + + >>> cool_stuff( 1 ) + + Done. + """ + pass ``` From 618e48d5c7f132fddd48f9d654bf212eea5385f2 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Tue, 5 Dec 2023 09:46:29 -0500 Subject: [PATCH 6/6] ruff_python_formatter: add line breaks between fields This is what seems consistent with the prevailing code. --- .../src/expression/string/docstring.rs | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/crates/ruff_python_formatter/src/expression/string/docstring.rs b/crates/ruff_python_formatter/src/expression/string/docstring.rs index dbd01a9f8c6f2..41f350bd4deb2 100644 --- a/crates/ruff_python_formatter/src/expression/string/docstring.rs +++ b/crates/ruff_python_formatter/src/expression/string/docstring.rs @@ -225,6 +225,7 @@ fn contains_unescaped_newline(haystack: &str) -> bool { /// An abstraction for printing each line of a docstring. struct DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { f: &'fmt mut PyFormatter<'ast, 'buf>, + /// A queue of actions to perform. /// /// Whenever we process a line, it is possible for it to generate multiple @@ -236,17 +237,22 @@ struct DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { /// Actions are pushed on to the end of the queue and popped from the /// beginning. action_queue: VecDeque>, + /// The source offset of the beginning of the line that is currently being /// printed. offset: TextSize, + /// Indentation alignment based on the least indented line in the /// docstring. stripped_indentation_length: TextSize, + /// Whether the docstring is overall already considered normalized. When it /// is, the formatter can take a fast path. already_normalized: bool, + /// The quote style used by the docstring being printed. quote_style: QuoteStyle, + /// The current code example detected in the docstring. code_example: CodeExample<'src>, } @@ -525,8 +531,10 @@ struct InputDocstringLine<'src> { /// unformatted line in a docstring, and owned when it corresponds to a /// reformatted line (e.g., from a code snippet) in a docstring. line: &'src str, + /// The offset into the source document which this line corresponds to. offset: TextSize, + /// For any input line that isn't the last line, this contains a reference /// to the line immediately following this one. /// @@ -565,9 +573,11 @@ struct OutputDocstringLine<'src> { /// a line from a reformatted code snippet. In other cases, it is borrowed /// from the input docstring line as-is. line: Cow<'src, str>, + /// The offset into the source document which this line corresponds to. /// Currently, this is an estimate. offset: TextSize, + /// Whether this is the last line in a docstring or not. This is determined /// by whether the last line in the code snippet was also the last line in /// the docstring. If it was, then it follows that the last line in the @@ -730,6 +740,7 @@ impl<'src> CodeExampleKind<'src> { struct CodeExampleDoctest<'src> { /// The lines that have been seen so far that make up the doctest. lines: Vec>, + /// The indent observed in the first doctest line. /// /// More precisely, this corresponds to the whitespace observed before @@ -832,15 +843,19 @@ impl<'src> CodeExampleDoctest<'src> { struct CodeExampleRst<'src> { /// The lines that have been seen so far that make up the block. lines: Vec>, - /// The indent of the line "opening" this block. It can either be the - /// indent of a line ending with `::` (for a literal block) or the indent - /// of a line starting with `.. ` (a directive). + + /// The indent of the line "opening" this block measured via + /// `indentation_length`. + /// + /// It can either be the indent of a line ending with `::` (for a literal + /// block) or the indent of a line starting with `.. ` (a directive). /// /// The content body of a block needs to be indented more than the line /// opening the block, so we use this indentation to look for indentation /// that is "more than" it. opening_indent: TextSize, - /// The minimum indent of the block. + + /// The minimum indent of the block measured via `indentation_length`. /// /// This is `None` until the first such line is seen. If no such line is /// found, then we consider it an invalid block and bail out of trying to @@ -858,6 +873,7 @@ struct CodeExampleRst<'src> { /// reformatted. The minimum indent is stripped from each line when it is /// re-built. min_indent: Option, + /// Whether this is a directive block or not. When not a directive, this is /// a literal block. The main difference between them is that they start /// differently. A literal block is started merely by trailing a line with @@ -1159,6 +1175,7 @@ struct CodeExampleLine<'src> { /// example, contain a `>>> ` or `... ` prefix if this code example is a /// doctest. original: InputDocstringLine<'src>, + /// The code extracted from the line. code: &'src str, }