From 42b39352151fa7e3ba8809cc879c790c0abd7dff Mon Sep 17 00:00:00 2001 From: schneems Date: Tue, 4 Jan 2022 15:01:50 -0600 Subject: [PATCH] Fix incorrect keyword lexing For some weird reason this line is detected as a keyword line: ``` type: :module, ``` Though it clearly isn't. Ripper: ``` require 'ripper' pp Ripper.lex(<<~'EOM') { type: :module, } EOM ``` Produces: ``` [[[1, 0], :on_lbrace, "{", BEG|LABEL], [[1, 1], :on_ignored_nl, "\n", BEG|LABEL], [[2, 0], :on_sp, " ", BEG|LABEL], [[2, 2], :on_label, "type:", ARG|LABELED], [[2, 7], :on_sp, " ", ARG|LABELED], [[2, 8], :on_symbeg, ":", FNAME], [[2, 9], :on_kw, "module", ENDFN], [[2, 15], :on_comma, ",", BEG|LABEL], [[2, 16], :on_ignored_nl, "\n", BEG|LABEL], [[3, 0], :on_rbrace, "}", END], [[3, 1], :on_nl, "\n", BEG], [[4, 0], :on_const, "EOM", CMDARG]] ``` This is the problem line: ``` [[2, 9], :on_kw, "module", ENDFN], ``` Digging into the IRB source code they handled this case here https://github.com/ruby/ruby/commit/776759e300e4659bb7468e2b97c8c2d4359a2953. Based on the description I believe this may be a bug in the lexer, but I'm not sure how to validate it. --- lib/dead_end/lex_all.rb | 5 ++++- lib/dead_end/lex_value.rb | 12 +++++++++--- spec/unit/code_line_spec.rb | 12 ++++++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/lib/dead_end/lex_all.rb b/lib/dead_end/lex_all.rb index 08973ce..ec62461 100644 --- a/lib/dead_end/lex_all.rb +++ b/lib/dead_end/lex_all.rb @@ -25,7 +25,10 @@ def initialize(source:, source_lines: nil) lineno = @lex.last.pos.first + 1 end - @lex.map! { |elem| LexValue.new(elem.pos.first, elem.event, elem.tok, elem.state) } + last_lex = nil + @lex.map! { |elem| + last_lex = LexValue.new(elem.pos.first, elem.event, elem.tok, elem.state, last_lex) + } end def to_a diff --git a/lib/dead_end/lex_value.rb b/lib/dead_end/lex_value.rb index 3119953..8bb07b8 100644 --- a/lib/dead_end/lex_value.rb +++ b/lib/dead_end/lex_value.rb @@ -15,19 +15,21 @@ module DeadEnd class LexValue attr_reader :line, :type, :token, :state - def initialize(line, type, token, state) + def initialize(line, type, token, state, last_lex = nil) @line = line @type = type @token = token @state = state - set_kw_end + set_kw_end(last_lex) end - private def set_kw_end + private def set_kw_end(last_lex) @is_end = false @is_kw = false return if type != :on_kw + # + return if last_lex && last_lex.fname? # https://github.com/ruby/ruby/commit/776759e300e4659bb7468e2b97c8c2d4359a2953 case token when "if", "unless", "while", "until" @@ -41,6 +43,10 @@ def initialize(line, type, token, state) end end + def fname? + state.allbits?(Ripper::EXPR_FNAME) + end + def ignore_newline? type == :on_ignored_nl end diff --git a/spec/unit/code_line_spec.rb b/spec/unit/code_line_spec.rb index 1eb1a86..cf3e75d 100644 --- a/spec/unit/code_line_spec.rb +++ b/spec/unit/code_line_spec.rb @@ -4,6 +4,18 @@ module DeadEnd RSpec.describe CodeLine do + it "bug in keyword detection" do + lines = CodeLine.from_source(<<~'EOM') + def to_json(*opts) + { + type: :module, + }.to_json(*opts) + end + EOM + expect(lines.count(&:is_kw?)).to eq(1) + expect(lines.count(&:is_end?)).to eq(1) + end + it "supports endless method definitions" do skip("Unsupported ruby version") unless Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("3")