Merge pull request #1543 from 1c-syntax/feature/optimizeTypo

Оптимизация TypoDiagnostic
1c-syntax · Apr 4, 2021 · eefcde8 · eefcde8
2 parents 65a0f80 + 90fa160
commit eefcde8
Show file tree

Hide file tree

Showing 6 changed files with 101 additions and 118 deletions.
diff --git a/src/main/java/com/github/_1c_syntax/bsl/languageserver/diagnostics/TypoDiagnostic.java b/src/main/java/com/github/_1c_syntax/bsl/languageserver/diagnostics/TypoDiagnostic.java
@@ -28,7 +28,6 @@
 import com.github._1c_syntax.bsl.languageserver.diagnostics.metadata.DiagnosticTag;
 import com.github._1c_syntax.bsl.languageserver.diagnostics.metadata.DiagnosticType;
 import com.github._1c_syntax.bsl.languageserver.diagnostics.typo.JLanguageToolPool;
-import com.github._1c_syntax.bsl.languageserver.diagnostics.typo.JLanguageToolPoolEntry;
 import com.github._1c_syntax.bsl.languageserver.utils.Trees;
 import com.github._1c_syntax.bsl.parser.BSLParser;
 import com.github._1c_syntax.bsl.parser.BSLParserRuleContext;
@@ -46,11 +45,13 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.function.Predicate;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 
@@ -62,7 +63,6 @@
     DiagnosticTag.BADPRACTICE
   }
 )
-
 @Slf4j
 public class TypoDiagnostic extends AbstractDiagnostic {
 
@@ -72,6 +72,14 @@ public class TypoDiagnostic extends AbstractDiagnostic {
     "ru", new JLanguageToolPool(new Russian())
   );
 
+  /**
+   * Карта, хранящая результат проверки слова (ошибка/нет ошибки) в разрезе языков.
+   */
+  private static final Map<String, Map<String, Boolean>> checkedWords = Map.of(
+    "en", new ConcurrentHashMap<>(),
+    "ru", new ConcurrentHashMap<>()
+  );
+
   private static final Pattern SPACES_PATTERN = Pattern.compile("\\s+");
   private static final Pattern QUOTE_PATTERN = Pattern.compile("\"");
   private static final String FORMAT_STRING_RU = "Л=|ЧЦ=|ЧДЦ=|ЧС=|ЧРД=|ЧРГ=|ЧН=|ЧВН=|ЧГ=|ЧО=|ДФ=|ДЛФ=|ДП=|БЛ=|БИ=";
@@ -110,88 +118,112 @@ public void configure(Map<String, Object> configuration) {
     minWordLength = Math.max(minWordLength, DEFAULT_MIN_WORD_LENGTH);
   }
 
-  private String getWordsToIgnore() {
+  private Set<String> getWordsToIgnore() {
+    String delimiter = ",";
     String exceptions = SPACES_PATTERN.matcher(info.getResourceString("diagnosticExceptions")).replaceAll("");
     if (!userWordsToIgnore.isEmpty()) {
-      exceptions = exceptions + "," + SPACES_PATTERN.matcher(userWordsToIgnore).replaceAll("");
+      exceptions = exceptions + delimiter + SPACES_PATTERN.matcher(userWordsToIgnore).replaceAll("");
     }
 
-    return exceptions.intern();
+    return Arrays.stream(exceptions.split(delimiter))
+      .collect(Collectors.toSet());
   }
 
-  JLanguageToolPoolEntry acquireLanguageTool(String lang) {
+  private static JLanguageTool acquireLanguageTool(String lang) {
     return getLanguageToolPoolMap().get(lang).checkOut();
   }
 
-  private static void releaseLanguageTool(String lang, JLanguageToolPoolEntry languageToolPoolEntry) {
-    getLanguageToolPoolMap().get(lang).checkIn(languageToolPoolEntry);
+  private static void releaseLanguageTool(String lang, JLanguageTool languageTool) {
+    getLanguageToolPoolMap().get(lang).checkIn(languageTool);
   }
 
-  private String getTokenizedStringFromTokens(DocumentContext documentContext, Map<String, List<Token>> tokensMap) {
-    StringBuilder text = new StringBuilder();
+  private Map<String, List<Token>> getTokensMap(
+    DocumentContext documentContext
+  ) {
+    Set<String> wordsToIgnore = getWordsToIgnore();
+    Map<String, List<Token>> tokensMap = new HashMap<>();
 
     Trees.findAllRuleNodes(documentContext.getAst(), rulesToFind).stream()
       .map(BSLParserRuleContext.class::cast)
       .flatMap(ruleContext -> ruleContext.getTokens().stream())
       .filter(token -> tokenTypes.contains(token.getType()))
       .filter(token -> !FORMAT_STRING_PATTERN.matcher(token.getText()).find())
       .forEach((Token token) -> {
-          String curText = QUOTE_PATTERN.matcher(token.getText()).replaceAll("");
-          var splitList = Arrays.asList(StringUtils.splitByCharacterTypeCamelCase(curText));
-          splitList.stream()
+          String curText = QUOTE_PATTERN.matcher(token.getText()).replaceAll("").trim();
+          String[] camelCaseSplitedWords = StringUtils.splitByCharacterTypeCamelCase(curText);
+
+          Arrays.stream(camelCaseSplitedWords)
+            .filter(Predicate.not(String::isBlank))
             .filter(element -> element.length() >= minWordLength)
+            .filter(Predicate.not(wordsToIgnore::contains))
             .forEach(element -> tokensMap.computeIfAbsent(element, newElement -> new ArrayList<>()).add(token));
-
-          text.append(" ");
-          text.append(String.join(" ", splitList));
-
         }
       );
 
-    return Arrays.stream(SPACES_PATTERN.split(text.toString().trim()))
-      .distinct()
-      .collect(Collectors.joining(" "));
+    return tokensMap;
   }
 
   @Override
   protected void check() {
 
     String lang = info.getResourceString("diagnosticLanguage");
-    Map<String, List<Token>> tokensMap = new HashMap<>();
+    Map<String, Boolean> checkedWordsForLang = checkedWords.get(lang);
+    Map<String, List<Token>> tokensMap = getTokensMap(documentContext);
 
-    JLanguageToolPoolEntry languageToolPoolEntry = acquireLanguageTool(lang);
-    JLanguageTool languageTool = languageToolPoolEntry.getLanguageTool(getWordsToIgnore());
+    // build string of unchecked words
+    Set<String> uncheckedWords = tokensMap.keySet().stream()
+      .filter(word -> !checkedWordsForLang.containsKey(word))
+      .collect(Collectors.toSet());
+
+    if (uncheckedWords.isEmpty()) {
+      fireDiagnosticOnCheckedWordsWithErrors(tokensMap);
+      return;
+    }
 
-    String result = getTokenizedStringFromTokens(documentContext, tokensMap);
+    // Join with double \n to force LT make paragraph after each word.
+    // Otherwise results may be flaky cause of sort order of words in file.
+    String uncheckedWordsString = String.join("\n\n", uncheckedWords);
 
+    JLanguageTool languageTool = acquireLanguageTool(lang);
+
+    List<RuleMatch> matches = Collections.emptyList();
     try {
-      List<RuleMatch> matches = languageTool.check(
-        result,
+      matches = languageTool.check(
+        uncheckedWordsString,
         true,
         JLanguageTool.ParagraphHandling.ONLYNONPARA
       );
-
-      if (!matches.isEmpty()) {
-
-        Set<Token> uniqueValues = new HashSet<>();
-        matches
-          .stream()
-          .filter(ruleMatch -> !ruleMatch.getSuggestedReplacements().isEmpty())
-          .map(ruleMatch -> result.substring(ruleMatch.getFromPos(), ruleMatch.getToPos()))
-          .forEach((String substring) -> {
-            List<Token> tokens = tokensMap.get(substring);
-            if (tokens != null) {
-              tokens.stream()
-                .filter(uniqueValues::add)
-                .forEach(token -> diagnosticStorage.addDiagnostic(token, info.getMessage(substring)));
-            }
-          });
-      }
     } catch (IOException e) {
       LOGGER.error(e.getMessage(), e);
+    } finally {
+      releaseLanguageTool(lang, languageTool);
     }
 
-    releaseLanguageTool(lang, languageToolPoolEntry);
+    // check words and mark matched as checked
+    matches.stream()
+      .map(ruleMatch -> ruleMatch.getSentence().getTokens()[1].getToken())
+      .forEach(word -> checkedWordsForLang.put(word, true));
+
+    // mark unmatched words without errors as checked
+    uncheckedWords.forEach(word -> checkedWordsForLang.putIfAbsent(word, false));
+
+    fireDiagnosticOnCheckedWordsWithErrors(tokensMap);
+  }
+
+  private void fireDiagnosticOnCheckedWordsWithErrors(
+    Map<String, List<Token>> tokensMap
+  ) {
+    String lang = info.getResourceString("diagnosticLanguage");
+    Map<String, Boolean> checkedWordsForLang = checkedWords.get(lang);
+
+    tokensMap.entrySet().stream()
+      .filter(entry -> checkedWordsForLang.getOrDefault(entry.getKey(), false))
+      .forEach((Map.Entry<String, List<Token>> entry) -> {
+        String word = entry.getKey();
+        List<Token> tokens = entry.getValue();
+
+        tokens.forEach(token -> diagnosticStorage.addDiagnostic(token, info.getMessage(word)));
+      });
   }
 
 }
diff --git a/...ain/java/com/github/_1c_syntax/bsl/languageserver/diagnostics/typo/JLanguageToolPool.java b/...ain/java/com/github/_1c_syntax/bsl/languageserver/diagnostics/typo/JLanguageToolPool.java
@@ -23,15 +23,26 @@
 
 import com.github._1c_syntax.bsl.languageserver.utils.AbstractObjectPool;
 import lombok.AllArgsConstructor;
+import org.languagetool.JLanguageTool;
 import org.languagetool.Language;
+import org.languagetool.rules.Rule;
+
+import java.util.function.Predicate;
 
 @AllArgsConstructor
-public class JLanguageToolPool extends AbstractObjectPool<JLanguageToolPoolEntry> {
+public class JLanguageToolPool extends AbstractObjectPool<JLanguageTool> {
 
   private final Language language;
 
   @Override
-  protected JLanguageToolPoolEntry create() {
-    return new JLanguageToolPoolEntry(language);
+  protected JLanguageTool create() {
+    JLanguageTool languageTool = new JLanguageTool(language);
+
+    languageTool.getAllRules().stream()
+      .filter(Predicate.not(Rule::isDictionaryBasedSpellingRule))
+      .map(Rule::getId)
+      .forEach(languageTool::disableRule);
+
+    return languageTool;
   }
 }
diff --git a/...ava/com/github/_1c_syntax/bsl/languageserver/diagnostics/typo/JLanguageToolPoolEntry.java b/...ava/com/github/_1c_syntax/bsl/languageserver/diagnostics/typo/JLanguageToolPoolEntry.java
diff --git a/...ub/_1c_syntax/bsl/languageserver/codeactions/DisableDiagnosticTriggeringSupplierTest.java b/...ub/_1c_syntax/bsl/languageserver/codeactions/DisableDiagnosticTriggeringSupplierTest.java
@@ -77,13 +77,14 @@ void testGetCodeActions() {
     List<CodeAction> codeActions = codeActionSupplier.getCodeActions(params, documentContext);
 
     assertThat(codeActions)
-      .hasSize(10)
+      .hasSize(11)
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable all diagnostic in file"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable NumberOfValuesInStructureConstructor in file"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable ExportVariables in file"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable IfElseDuplicatedCondition in file"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable CanonicalSpellingKeywords in file"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable FunctionShouldHaveReturn in file"))
+      .anyMatch(codeAction -> codeAction.getTitle().equals("Disable Typo in file"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable IfElseIfEndsWithElse in file"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable MagicNumber in file"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable MissingSpace in file"))
@@ -121,11 +122,13 @@ void testGetCodeActionsOneLine() {
     List<CodeAction> codeActions = codeActionSupplier.getCodeActions(params, documentContext);
 
     assertThat(codeActions)
-      .hasSize(6)
+      .hasSize(8)
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable MagicNumber in line"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable MissingSpace in line"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable MagicNumber in file"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable MissingSpace in file"))
+      .anyMatch(codeAction -> codeAction.getTitle().equals("Disable Typo in file"))
+      .anyMatch(codeAction -> codeAction.getTitle().equals("Disable Typo in line"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable all diagnostic in line"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable all diagnostic in file"));
   }
@@ -158,11 +161,13 @@ void testGetCodeActionsRegion() {
     List<CodeAction> codeActions = codeActionSupplier.getCodeActions(params, documentContext);
 
     assertThat(codeActions)
-      .hasSize(8)
+      .hasSize(10)
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable MagicNumber in range"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable MissingSpace in range"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable MagicNumber in file"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable MissingSpace in file"))
+      .anyMatch(codeAction -> codeAction.getTitle().equals("Disable Typo in file"))
+      .anyMatch(codeAction -> codeAction.getTitle().equals("Disable Typo in range"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable all diagnostic in range"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable all diagnostic in file"))
       .anyMatch(codeAction -> codeAction.getTitle().equals("Disable CanonicalSpellingKeywords in range"))

diff --git a/src/test/java/com/github/_1c_syntax/bsl/languageserver/diagnostics/TypoDiagnosticTest.java b/src/test/java/com/github/_1c_syntax/bsl/languageserver/diagnostics/TypoDiagnosticTest.java
@@ -23,7 +23,6 @@
 
 import com.github._1c_syntax.bsl.languageserver.util.CleanupContextBeforeClassAndAfterEachTestMethod;
 import org.eclipse.lsp4j.Diagnostic;
-import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.springframework.boot.test.context.SpringBootTest;
 
@@ -40,12 +39,6 @@ class TypoDiagnosticTest extends AbstractDiagnosticTest<TypoDiagnostic> {
     super(TypoDiagnostic.class);
   }
 
-  @BeforeEach
-  void resetJLanguageToolPool() {
-    var lang = diagnosticInstance.getInfo().getResourceString("diagnosticLanguage");
-    diagnosticInstance.acquireLanguageTool(lang).getLanguageTool("Ы");
-  }
-
   @Test
   void test() {
     Map<String, Object> configuration = diagnosticInstance.getInfo().getDefaultConfiguration();

diff --git a/...est/java/com/github/_1c_syntax/bsl/languageserver/providers/DocumentLinkProviderTest.java b/...est/java/com/github/_1c_syntax/bsl/languageserver/providers/DocumentLinkProviderTest.java
@@ -82,11 +82,11 @@ void testGetDocumentLinksEn() {
     // then
     assertThat(documentLinks)
       .isNotEmpty()
-      .hasSize(6)
+      .hasSize(8)
       .allMatch(documentLink -> documentLink.getTarget()
         .startsWith(SITE_EN_URL))
       .filteredOn(documentLink -> !documentLink.getTarget().endsWith(DIAGNOSTIC_CODE))
-      .hasSize(2);
+      .hasSize(4);
   }
 
   @Test