Skip to content

Commit

Permalink
Merge pull request #1543 from 1c-syntax/feature/optimizeTypo
Browse files Browse the repository at this point in the history
Оптимизация TypoDiagnostic
  • Loading branch information
nixel2007 authored Apr 4, 2021
2 parents 65a0f80 + 90fa160 commit eefcde8
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 118 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import com.github._1c_syntax.bsl.languageserver.diagnostics.metadata.DiagnosticTag;
import com.github._1c_syntax.bsl.languageserver.diagnostics.metadata.DiagnosticType;
import com.github._1c_syntax.bsl.languageserver.diagnostics.typo.JLanguageToolPool;
import com.github._1c_syntax.bsl.languageserver.diagnostics.typo.JLanguageToolPoolEntry;
import com.github._1c_syntax.bsl.languageserver.utils.Trees;
import com.github._1c_syntax.bsl.parser.BSLParser;
import com.github._1c_syntax.bsl.parser.BSLParserRuleContext;
Expand All @@ -46,11 +45,13 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

Expand All @@ -62,7 +63,6 @@
DiagnosticTag.BADPRACTICE
}
)

@Slf4j
public class TypoDiagnostic extends AbstractDiagnostic {

Expand All @@ -72,6 +72,14 @@ public class TypoDiagnostic extends AbstractDiagnostic {
"ru", new JLanguageToolPool(new Russian())
);

/**
* Карта, хранящая результат проверки слова (ошибка/нет ошибки) в разрезе языков.
*/
private static final Map<String, Map<String, Boolean>> checkedWords = Map.of(
"en", new ConcurrentHashMap<>(),
"ru", new ConcurrentHashMap<>()
);

private static final Pattern SPACES_PATTERN = Pattern.compile("\\s+");
private static final Pattern QUOTE_PATTERN = Pattern.compile("\"");
private static final String FORMAT_STRING_RU = "Л=|ЧЦ=|ЧДЦ=|ЧС=|ЧРД=|ЧРГ=|ЧН=|ЧВН=|ЧГ=|ЧО=|ДФ=|ДЛФ=|ДП=|БЛ=|БИ=";
Expand Down Expand Up @@ -110,88 +118,112 @@ public void configure(Map<String, Object> configuration) {
minWordLength = Math.max(minWordLength, DEFAULT_MIN_WORD_LENGTH);
}

private String getWordsToIgnore() {
private Set<String> getWordsToIgnore() {
String delimiter = ",";
String exceptions = SPACES_PATTERN.matcher(info.getResourceString("diagnosticExceptions")).replaceAll("");
if (!userWordsToIgnore.isEmpty()) {
exceptions = exceptions + "," + SPACES_PATTERN.matcher(userWordsToIgnore).replaceAll("");
exceptions = exceptions + delimiter + SPACES_PATTERN.matcher(userWordsToIgnore).replaceAll("");
}

return exceptions.intern();
return Arrays.stream(exceptions.split(delimiter))
.collect(Collectors.toSet());
}

JLanguageToolPoolEntry acquireLanguageTool(String lang) {
private static JLanguageTool acquireLanguageTool(String lang) {
return getLanguageToolPoolMap().get(lang).checkOut();
}

private static void releaseLanguageTool(String lang, JLanguageToolPoolEntry languageToolPoolEntry) {
getLanguageToolPoolMap().get(lang).checkIn(languageToolPoolEntry);
private static void releaseLanguageTool(String lang, JLanguageTool languageTool) {
getLanguageToolPoolMap().get(lang).checkIn(languageTool);
}

private String getTokenizedStringFromTokens(DocumentContext documentContext, Map<String, List<Token>> tokensMap) {
StringBuilder text = new StringBuilder();
private Map<String, List<Token>> getTokensMap(
DocumentContext documentContext
) {
Set<String> wordsToIgnore = getWordsToIgnore();
Map<String, List<Token>> tokensMap = new HashMap<>();

Trees.findAllRuleNodes(documentContext.getAst(), rulesToFind).stream()
.map(BSLParserRuleContext.class::cast)
.flatMap(ruleContext -> ruleContext.getTokens().stream())
.filter(token -> tokenTypes.contains(token.getType()))
.filter(token -> !FORMAT_STRING_PATTERN.matcher(token.getText()).find())
.forEach((Token token) -> {
String curText = QUOTE_PATTERN.matcher(token.getText()).replaceAll("");
var splitList = Arrays.asList(StringUtils.splitByCharacterTypeCamelCase(curText));
splitList.stream()
String curText = QUOTE_PATTERN.matcher(token.getText()).replaceAll("").trim();
String[] camelCaseSplitedWords = StringUtils.splitByCharacterTypeCamelCase(curText);

Arrays.stream(camelCaseSplitedWords)
.filter(Predicate.not(String::isBlank))
.filter(element -> element.length() >= minWordLength)
.filter(Predicate.not(wordsToIgnore::contains))
.forEach(element -> tokensMap.computeIfAbsent(element, newElement -> new ArrayList<>()).add(token));

text.append(" ");
text.append(String.join(" ", splitList));

}
);

return Arrays.stream(SPACES_PATTERN.split(text.toString().trim()))
.distinct()
.collect(Collectors.joining(" "));
return tokensMap;
}

@Override
protected void check() {

String lang = info.getResourceString("diagnosticLanguage");
Map<String, List<Token>> tokensMap = new HashMap<>();
Map<String, Boolean> checkedWordsForLang = checkedWords.get(lang);
Map<String, List<Token>> tokensMap = getTokensMap(documentContext);

JLanguageToolPoolEntry languageToolPoolEntry = acquireLanguageTool(lang);
JLanguageTool languageTool = languageToolPoolEntry.getLanguageTool(getWordsToIgnore());
// build string of unchecked words
Set<String> uncheckedWords = tokensMap.keySet().stream()
.filter(word -> !checkedWordsForLang.containsKey(word))
.collect(Collectors.toSet());

if (uncheckedWords.isEmpty()) {
fireDiagnosticOnCheckedWordsWithErrors(tokensMap);
return;
}

String result = getTokenizedStringFromTokens(documentContext, tokensMap);
// Join with double \n to force LT make paragraph after each word.
// Otherwise results may be flaky cause of sort order of words in file.
String uncheckedWordsString = String.join("\n\n", uncheckedWords);

JLanguageTool languageTool = acquireLanguageTool(lang);

List<RuleMatch> matches = Collections.emptyList();
try {
List<RuleMatch> matches = languageTool.check(
result,
matches = languageTool.check(
uncheckedWordsString,
true,
JLanguageTool.ParagraphHandling.ONLYNONPARA
);

if (!matches.isEmpty()) {

Set<Token> uniqueValues = new HashSet<>();
matches
.stream()
.filter(ruleMatch -> !ruleMatch.getSuggestedReplacements().isEmpty())
.map(ruleMatch -> result.substring(ruleMatch.getFromPos(), ruleMatch.getToPos()))
.forEach((String substring) -> {
List<Token> tokens = tokensMap.get(substring);
if (tokens != null) {
tokens.stream()
.filter(uniqueValues::add)
.forEach(token -> diagnosticStorage.addDiagnostic(token, info.getMessage(substring)));
}
});
}
} catch (IOException e) {
LOGGER.error(e.getMessage(), e);
} finally {
releaseLanguageTool(lang, languageTool);
}

releaseLanguageTool(lang, languageToolPoolEntry);
// check words and mark matched as checked
matches.stream()
.map(ruleMatch -> ruleMatch.getSentence().getTokens()[1].getToken())
.forEach(word -> checkedWordsForLang.put(word, true));

// mark unmatched words without errors as checked
uncheckedWords.forEach(word -> checkedWordsForLang.putIfAbsent(word, false));

fireDiagnosticOnCheckedWordsWithErrors(tokensMap);
}

private void fireDiagnosticOnCheckedWordsWithErrors(
Map<String, List<Token>> tokensMap
) {
String lang = info.getResourceString("diagnosticLanguage");
Map<String, Boolean> checkedWordsForLang = checkedWords.get(lang);

tokensMap.entrySet().stream()
.filter(entry -> checkedWordsForLang.getOrDefault(entry.getKey(), false))
.forEach((Map.Entry<String, List<Token>> entry) -> {
String word = entry.getKey();
List<Token> tokens = entry.getValue();

tokens.forEach(token -> diagnosticStorage.addDiagnostic(token, info.getMessage(word)));
});
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,26 @@

import com.github._1c_syntax.bsl.languageserver.utils.AbstractObjectPool;
import lombok.AllArgsConstructor;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.rules.Rule;

import java.util.function.Predicate;

@AllArgsConstructor
public class JLanguageToolPool extends AbstractObjectPool<JLanguageToolPoolEntry> {
public class JLanguageToolPool extends AbstractObjectPool<JLanguageTool> {

private final Language language;

@Override
protected JLanguageToolPoolEntry create() {
return new JLanguageToolPoolEntry(language);
protected JLanguageTool create() {
JLanguageTool languageTool = new JLanguageTool(language);

languageTool.getAllRules().stream()
.filter(Predicate.not(Rule::isDictionaryBasedSpellingRule))
.map(Rule::getId)
.forEach(languageTool::disableRule);

return languageTool;
}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,14 @@ void testGetCodeActions() {
List<CodeAction> codeActions = codeActionSupplier.getCodeActions(params, documentContext);

assertThat(codeActions)
.hasSize(10)
.hasSize(11)
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable all diagnostic in file"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable NumberOfValuesInStructureConstructor in file"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable ExportVariables in file"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable IfElseDuplicatedCondition in file"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable CanonicalSpellingKeywords in file"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable FunctionShouldHaveReturn in file"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable Typo in file"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable IfElseIfEndsWithElse in file"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable MagicNumber in file"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable MissingSpace in file"))
Expand Down Expand Up @@ -121,11 +122,13 @@ void testGetCodeActionsOneLine() {
List<CodeAction> codeActions = codeActionSupplier.getCodeActions(params, documentContext);

assertThat(codeActions)
.hasSize(6)
.hasSize(8)
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable MagicNumber in line"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable MissingSpace in line"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable MagicNumber in file"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable MissingSpace in file"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable Typo in file"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable Typo in line"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable all diagnostic in line"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable all diagnostic in file"));
}
Expand Down Expand Up @@ -158,11 +161,13 @@ void testGetCodeActionsRegion() {
List<CodeAction> codeActions = codeActionSupplier.getCodeActions(params, documentContext);

assertThat(codeActions)
.hasSize(8)
.hasSize(10)
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable MagicNumber in range"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable MissingSpace in range"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable MagicNumber in file"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable MissingSpace in file"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable Typo in file"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable Typo in range"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable all diagnostic in range"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable all diagnostic in file"))
.anyMatch(codeAction -> codeAction.getTitle().equals("Disable CanonicalSpellingKeywords in range"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

import com.github._1c_syntax.bsl.languageserver.util.CleanupContextBeforeClassAndAfterEachTestMethod;
import org.eclipse.lsp4j.Diagnostic;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;

Expand All @@ -40,12 +39,6 @@ class TypoDiagnosticTest extends AbstractDiagnosticTest<TypoDiagnostic> {
super(TypoDiagnostic.class);
}

@BeforeEach
void resetJLanguageToolPool() {
var lang = diagnosticInstance.getInfo().getResourceString("diagnosticLanguage");
diagnosticInstance.acquireLanguageTool(lang).getLanguageTool("Ы");
}

@Test
void test() {
Map<String, Object> configuration = diagnosticInstance.getInfo().getDefaultConfiguration();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,11 @@ void testGetDocumentLinksEn() {
// then
assertThat(documentLinks)
.isNotEmpty()
.hasSize(6)
.hasSize(8)
.allMatch(documentLink -> documentLink.getTarget()
.startsWith(SITE_EN_URL))
.filteredOn(documentLink -> !documentLink.getTarget().endsWith(DIAGNOSTIC_CODE))
.hasSize(2);
.hasSize(4);
}

@Test
Expand Down

0 comments on commit eefcde8

Please sign in to comment.