[Changed] updated Simplifed BSD license

made license detection from templates more flexible [#171961625] Signed-off-by: Debbie Chen <dechen@pivotal.io>
pivotal · May 11, 2020 · acf5705 · acf5705
1 parent 204798c
commit acf5705
Show file tree

Hide file tree

Showing 3 changed files with 134 additions and 8 deletions.
diff --git a/lib/license_finder/license/templates/SimplifiedBSD.txt b/lib/license_finder/license/templates/SimplifiedBSD.txt
@@ -17,7 +17,3 @@ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-The views and conclusions contained in the software and documentation are those
-of the authors and should not be interpreted as representing official policies,
-either expressed or implied, of the FreeBSD Project.
diff --git a/lib/license_finder/license/text.rb b/lib/license_finder/license/text.rb
@@ -6,15 +6,28 @@ module Text
       SPACES = /\s+/.freeze
       QUOTES = /['`"]{1,2}/.freeze
       PLACEHOLDERS = /<[^<>]+>/.freeze
+      SPECIAL_SINGLE_QUOTES = /[‘’]/.freeze
+      SPECIAL_DOUBLE_QUOTES = /[“”„«»]/.freeze
+      ALPHABET_ORDERED_LIST = /\\\([a-z]\\\)\\\s/.freeze
+      ALPHABET_ORDERED_LIST_OPTIONAL = '(\([a-z]\)\s)?'
+      LIST_BULLETS = /(\d{1,2}\\\.|\\\*)\\\s/.freeze
+      LIST_BULLETS_OPTIONAL = '(\d{1,2}.|\*)\s*'
 
       def self.normalize_punctuation(text)
-        text.gsub(SPACES, ' ')
-            .gsub(QUOTES, '"')
-            .strip
+        text.dup.force_encoding('UTF-8')
+                 .gsub(SPECIAL_DOUBLE_QUOTES, '"')
+                 .gsub(SPECIAL_SINGLE_QUOTES, "'")
+                 .gsub(SPACES, ' ')
+                 .gsub(QUOTES, '"')
+                 .strip
       end
 
       def self.compile_to_regex(text)
-        Regexp.new(Regexp.escape(text).gsub(PLACEHOLDERS, '(.*)'))
+        Regexp.new(Regexp.escape(normalize_punctuation(text))
+                       .gsub(PLACEHOLDERS, '(.*)')
+                       .gsub(',', '(,)?')
+                       .gsub(ALPHABET_ORDERED_LIST, ALPHABET_ORDERED_LIST_OPTIONAL)
+                       .gsub(LIST_BULLETS, LIST_BULLETS_OPTIONAL))
       end
     end
   end

diff --git a/spec/lib/license_finder/license/text_spec.rb b/spec/lib/license_finder/license/text_spec.rb
@@ -0,0 +1,117 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe LicenseFinder::License::Text do
+  describe '.normalize_punctuation' do
+    context 'when text contains special singe/double quotes' do
+      it 'normalizes specials quotes to generic double quotes' do
+        text = <<~TEXT
+          ‘surrounded with special single quotes’
+          “surrounded with special double quotes”
+          “surrounded with special double quotes„
+          «surrounded with special double quotes»
+        TEXT
+
+        expected_text = '"surrounded with special single quotes" "surrounded with special double quotes" "surrounded with special double quotes" "surrounded with special double quotes"'
+
+        expect(described_class.normalize_punctuation(text)).to eq(expected_text)
+      end
+    end
+
+    context 'when text contains whitespace tags' do
+      it 'normalizes whitespace tag to a single space' do
+        text = <<~TEXT
+          far                              away
+          far far                                  away
+        TEXT
+
+        expected_text = 'far away far far away'
+
+        expect(described_class.normalize_punctuation(text)).to eq(expected_text)
+      end
+    end
+
+    context 'when text contains multiple types of quotes' do
+      it 'normalizes multiple types of quotes to generic double quotes' do
+        text = <<~TEXT
+          'surrounded with single quotes'
+          "surrounded with double quotes"
+          `surrounded with backtick`
+        TEXT
+
+        expected_text = '"surrounded with single quotes" "surrounded with double quotes" "surrounded with backtick"'
+
+        expect(described_class.normalize_punctuation(text)).to eq(expected_text)
+      end
+    end
+  end
+
+  describe '.compile_to_regex' do
+    context 'when the text contains placeholders' do
+      it 'returns regex with wildcards' do
+        text = <<~TEXT
+          I am <thing>
+          You are <thing2>
+        TEXT
+
+        expected_regex = Regexp.new('I\ am\ (.*)\ You\ are\ (.*)')
+
+        expect(described_class.compile_to_regex(text)).to eq(expected_regex)
+      end
+    end
+
+    context 'when the text contains commas' do
+      it 'returns regex with comma optionals' do
+        text = <<~TEXT
+          This is a comma,
+          This is also a comma,
+        TEXT
+
+        expected_regex = Regexp.new('This\ is\ a\ comma(,)?\ This\ is\ also\ a\ comma(,)?')
+
+        expect(described_class.compile_to_regex(text)).to eq(expected_regex)
+      end
+    end
+
+    context 'when the text contains alphabetically ordered list' do
+      it 'returns regex with optional alphabetically order list' do
+        text = <<~TEXT
+          (a) for an apple
+          (b) for a loaf of bread
+        TEXT
+
+        expected_regex = Regexp.new('(\([a-z]\)\s)?for\ an\ apple\ (\([a-z]\)\s)?for\ a\ loaf\ of\ bread')
+
+        expect(described_class.compile_to_regex(text)).to eq(expected_regex)
+      end
+    end
+
+    context 'when the text contains numerically ordered/unordered list' do
+      it 'returns regex with optional alphabetically order list' do
+        text = <<~TEXT
+          1. for an apple
+          * for a loaf of bread
+        TEXT
+
+        expected_regex = Regexp.new('(\d{1,2}.|\*)\s*for\ an\ apple\ (\d{1,2}.|\*)\s*for\ a\ loaf\ of\ bread')
+
+        expect(described_class.compile_to_regex(text)).to eq(expected_regex)
+      end
+
+      context 'when the text contains brackets near the unordered bullets' do
+        it 'returns properly formatted regex' do
+          text = <<~TEXT
+          **
+          * (banana bread)
+          **
+          TEXT
+
+          expected_regex = Regexp.new('\*(\d{1,2}.|\*)\s*(\d{1,2}.|\*)\s*\(banana\ bread\)\ \*\*')
+
+          expect(described_class.compile_to_regex(text)).to eq(expected_regex)
+        end
+      end
+    end
+  end
+end