From ca20318692c03093cabd4dd0a10f1f5bf4cdec0f Mon Sep 17 00:00:00 2001 From: overflowerror Date: Fri, 25 Feb 2022 18:13:31 +0100 Subject: [PATCH] rewrite of minimal path difference analysis old version was too slow to recognize nested identical paths because the complete context needed to be analyzed. now the token analysis saves meta information about whether a given token is from the context or the target element. using this we can detect nested identical paths by checking if the context starts at the same position if two token lists are identical. this can be done when ever the index list is full (no gaps), which is a lot sooner than the end of the tokenCombinations call. --- .../antlr/hoisting/HoistingProcessor.java | 11 +- .../hoisting/pathAnalysis/TokenAnalysis.java | 119 +++++++++++++----- .../pathAnalysis/TokenAnalysisPath.java | 4 +- .../pathAnalysis/TokenAnalysisPaths.java | 4 +- .../parser/antlr/hoisting/token/EofToken.java | 12 +- .../antlr/hoisting/token/KeywordToken.java | 11 +- .../hoisting/token/TerminalRuleToken.java | 11 +- .../parser/antlr/hoisting/token/Token.java | 41 ++++-- 8 files changed, 135 insertions(+), 78 deletions(-) diff --git a/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/HoistingProcessor.java b/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/HoistingProcessor.java index 05b56031d..977e5cb3d 100644 --- a/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/HoistingProcessor.java +++ b/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/HoistingProcessor.java @@ -324,15 +324,14 @@ public class HoistingProcessor { (TokenGuard tokenGuard, MergedPathGuard pathGuard) -> Tuples.pair(tokenGuard, pathGuard) ).map(p -> new PathGuard(p.getFirst(), p.getSecond())) .collect(AlternativesGuard.collector()); - } catch(NestedIdenticalPathException e) { + } catch (EndlessPrefixException e) { if (hasEndlessPrefix(paths)) { - // endless paths in combination with a NestedIndenticalPathException means - // there are probably endless prefixes in this element - // TODO: maybe only use predicates (analog to ANTLR) instead of error-ing out throw new EndlessPrefixException("endless prefix in more than two paths", currentRule); + } else { + throw new TokenAnalysisAbortedException("difference analysis failed: probably insufficient context", e, currentRule); } - + } catch(NestedIdenticalPathException e) { // nested identical paths // -> flatten paths to alternative and try again // this is very inefficient @@ -346,7 +345,7 @@ public class HoistingProcessor { if (config.isDebug()) log.info(abstractElementToString(flattened)); - + log.info(flattened.getElements().size()); // TODO: value configurable? if (flattened.getElements().size() > 100) { diff --git a/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/pathAnalysis/TokenAnalysis.java b/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/pathAnalysis/TokenAnalysis.java index ad184b015..aa973bb45 100644 --- a/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/pathAnalysis/TokenAnalysis.java +++ b/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/pathAnalysis/TokenAnalysis.java @@ -38,6 +38,7 @@ import org.eclipse.xtext.RuleCall; import org.eclipse.xtext.UnorderedGroup; import org.eclipse.xtext.util.XtextSwitch; import org.eclipse.xtext.xtext.generator.parser.antlr.hoisting.HoistingConfiguration; +import org.eclipse.xtext.xtext.generator.parser.antlr.hoisting.exceptions.EndlessPrefixException; import org.eclipse.xtext.xtext.generator.parser.antlr.hoisting.exceptions.NestedIdenticalPathException; import org.eclipse.xtext.xtext.generator.parser.antlr.hoisting.exceptions.TokenAnalysisAbortedException; import org.eclipse.xtext.xtext.generator.parser.antlr.hoisting.exceptions.UnsupportedConstructException; @@ -208,7 +209,7 @@ public class TokenAnalysis { TokenAnalysisPaths path = new TokenAnalysisPaths(prefix); path.resetProgress(); - path = getTokenPaths(element, path, false, false); + path = getTokenPaths(element, path, false, false, true); if (!path.isDone() && element != null) { boolean _considerCardinalities = true; @@ -252,11 +253,11 @@ public class TokenAnalysis { return result; } - private TokenAnalysisPaths getTokenPathsTrivial(Group path, TokenAnalysisPaths prefix) { + private TokenAnalysisPaths getTokenPathsTrivial(Group path, TokenAnalysisPaths prefix, boolean inContext) { TokenAnalysisPaths result = new TokenAnalysisPaths(prefix); for(AbstractElement element : path.getElements()) { - result = getTokenPaths(element, result, false, false); + result = getTokenPaths(element, result, false, false, inContext); if (result.isDone()) { break; } @@ -264,16 +265,16 @@ public class TokenAnalysis { return result; } - private TokenAnalysisPaths getTokenPathsTrivial(Alternatives path, TokenAnalysisPaths prefix) { + private TokenAnalysisPaths getTokenPathsTrivial(Alternatives path, TokenAnalysisPaths prefix, boolean inContext) { TokenAnalysisPaths result = TokenAnalysisPaths.empty(prefix); for(AbstractElement element : path.getElements()) { - result = result.merge(getTokenPaths(element, prefix, false, false)); + result = result.merge(getTokenPaths(element, prefix, false, false, inContext)); } return result; } - private TokenAnalysisPaths getTokenPathsTrivial(UnorderedGroup path, TokenAnalysisPaths prefix) { + private TokenAnalysisPaths getTokenPathsTrivial(UnorderedGroup path, TokenAnalysisPaths prefix, boolean inContext) { TokenAnalysisPaths result; TokenAnalysisPaths current; @@ -288,7 +289,7 @@ public class TokenAnalysis { current.resetProgress(); for (AbstractElement element : path.getElements()) { - current = current.merge(getTokenPaths(element, result, false, false)); + current = current.merge(getTokenPaths(element, result, false, false, inContext)); } result.resetProgress(); @@ -299,23 +300,23 @@ public class TokenAnalysis { return result; } - private TokenAnalysisPaths getTokenPathsTrivial(AbstractElement path, TokenAnalysisPaths prefix) { + private TokenAnalysisPaths getTokenPathsTrivial(AbstractElement path, TokenAnalysisPaths prefix, boolean inContext) { return new XtextSwitch() { @Override public TokenAnalysisPaths caseGroup(Group group) { - return getTokenPathsTrivial(group, prefix); + return getTokenPathsTrivial(group, prefix, inContext); }; @Override public TokenAnalysisPaths caseAlternatives(Alternatives alternatives) { - return getTokenPathsTrivial(alternatives, prefix); + return getTokenPathsTrivial(alternatives, prefix, inContext); }; @Override public TokenAnalysisPaths caseUnorderedGroup(UnorderedGroup unorderedGroup) { - return getTokenPathsTrivial(unorderedGroup, prefix); + return getTokenPathsTrivial(unorderedGroup, prefix, inContext); }; @Override public TokenAnalysisPaths caseAssignment(Assignment assignment) { - return getTokenPaths(assignment.getTerminal(), prefix, false, false); + return getTokenPaths(assignment.getTerminal(), prefix, false, false, inContext); }; @Override public TokenAnalysisPaths caseRuleCall(RuleCall call) { @@ -323,7 +324,7 @@ public class TokenAnalysis { isParserRuleCall(call) || isEnumRuleCall(call) ) { - return getTokenPaths(call.getRule().getAlternatives(), prefix, false, false); + return getTokenPaths(call.getRule().getAlternatives(), prefix, false, false, inContext); } else { // probably terminal rule call -> go to default case return null; @@ -335,7 +336,7 @@ public class TokenAnalysis { if (Token.isToken(element)) { TokenAnalysisPaths result = new TokenAnalysisPaths(prefix); - result.add(element); + result.add(element, inContext); return result; } else { // Actions, Predicates, JavaActions, ... @@ -356,6 +357,12 @@ public class TokenAnalysis { private TokenAnalysisPaths getTokenPaths( AbstractElement path, TokenAnalysisPaths prefix, boolean analyseContext, boolean needsLength + ) { + return getTokenPaths(path, prefix, analyseContext, needsLength, false); + } + + private TokenAnalysisPaths getTokenPaths( + AbstractElement path, TokenAnalysisPaths prefix, boolean analyseContext, boolean needsLength, boolean inContext ) { if (prefix.isDone()) { return prefix; @@ -365,16 +372,22 @@ public class TokenAnalysis { // empty path means EOF TokenAnalysisPaths result; result = new TokenAnalysisPaths(prefix); - result.add(path); + result.add(path, inContext); return result; } // use actual cardinality from path - return getTokenPaths(path, path.getCardinality(), prefix, analyseContext, needsLength); + return getTokenPaths(path, path.getCardinality(), prefix, analyseContext, needsLength, inContext); + } + + private TokenAnalysisPaths getTokenPaths( + AbstractElement path, String cardinality, TokenAnalysisPaths prefix, boolean analyseContext, boolean needsLength + ) { + return getTokenPaths(path, cardinality, prefix, analyseContext, needsLength, false); } private TokenAnalysisPaths getTokenPaths( - AbstractElement path, String cardinality, TokenAnalysisPaths prefix, boolean analyseContext, boolean needsLength + AbstractElement path, String cardinality, TokenAnalysisPaths prefix, boolean analyseContext, boolean needsLength, boolean inContext ) { // analyseContext implies needsLength @@ -387,7 +400,7 @@ public class TokenAnalysis { if (path == null) { // empty path means EOF result = new TokenAnalysisPaths(prefix); - result.add(path); + result.add(path, inContext); return result; } @@ -408,7 +421,7 @@ public class TokenAnalysis { do { result.resetProgress(); - TokenAnalysisPaths tokenPaths = getTokenPathsTrivial(path, result); + TokenAnalysisPaths tokenPaths = getTokenPathsTrivial(path, result, inContext); if (tokenPaths.isDone()) { result = result.merge(tokenPaths); @@ -501,7 +514,7 @@ public class TokenAnalysis { } else { // path length exhausted while searching for minimal differences // this indicates the presence of nested identical paths - throw new NestedIdenticalPathException(); + throw new EndlessPrefixException(); } } private boolean tokenCombinations(long prefix, int prefixLength, int ones, Function, Boolean> callback, MutableWrapper limit) { @@ -525,6 +538,8 @@ public class TokenAnalysis { if (tokenCombinations(current, i + 1, ones - 1, callback, limit)) { return true; } + } catch (NestedIdenticalPathException e) { + throw e; } catch (TokenAnalysisAbortedException e) { if (config.isDebug()) log.info("tokens exhausted: " + e.getMessage()); @@ -590,6 +605,9 @@ public class TokenAnalysis { } tokenCombinations(indexList -> { + int numberOfIndices = indexList.size(); + boolean isFullIndexList = indexList.get(numberOfIndices - 1) == numberOfIndices - 1; + if (config.isDebug()) log.info(indexList); @@ -605,19 +623,62 @@ public class TokenAnalysis { int size = result.size(); for (int i = 0; i < size; i++) { if (result.get(i) != null) { + // we already have a result for this path continue; } - List> tokenListOfCurrentPath = tokenListsForPaths.get(i); - if (!tokenListOfCurrentPath.stream() - .anyMatch(tokenList -> tokenListsForPaths.stream() - .filter(s -> s != tokenListOfCurrentPath) - // does any other path contain a similar token list (= alternative) ? - .anyMatch(s -> s.contains(tokenList)) - ) - ) { + List> tokenListsOfCurrentPath = tokenListsForPaths.get(i); + + boolean setResult = true; + + for (List tokenListOfCurrentPath : tokenListsOfCurrentPath) { + boolean doContextCheck = + isFullIndexList && // index list is full (no jumps) + numberOfIndices >= 2 && // check that number of tokens is enough + tokenListOfCurrentPath.get(numberOfIndices - 1).isInContext() && // context is used + !tokenListOfCurrentPath.get(numberOfIndices - 2).isInContext(); // we are only interested in the last token not in context + boolean doContextCheckWithoutToken = // for empty paths + isFullIndexList && // index list is full (no jumps) + numberOfIndices == 1 && // only one token given + tokenListOfCurrentPath.get(0).isInContext(); // first token is in context + + + for (int j = 0; j < size; j++) { + if (i == j) { + // don't compare to current path + continue; + } + + for (List tokenListOfOther : tokenListsForPaths.get(j)) { + if (tokenListOfCurrentPath.equals(tokenListOfOther)) { + setResult = false; + + if (doContextCheck) { + if (tokenListOfOther.get(numberOfIndices - 1).isInContext()) { + // last token of other is in context + if (!tokenListOfOther.get(numberOfIndices - 2).isInContext()) { + // context start is at the same position + // -> paths are identical + + throw new NestedIdenticalPathException("identical context start in identical token path"); + } + } + } + if (doContextCheckWithoutToken) { + if (tokenListOfOther.get(0).isInContext()) { + // this path also is empty + + throw new NestedIdenticalPathException("empty nested identical path"); + } + } + } + } + } + } + + if (setResult) { // token list set is unique for path i - result.set(i, tokenListOfCurrentPath); + result.set(i, tokenListsOfCurrentPath); } } diff --git a/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/pathAnalysis/TokenAnalysisPath.java b/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/pathAnalysis/TokenAnalysisPath.java index 883383943..f786bdfb1 100644 --- a/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/pathAnalysis/TokenAnalysisPath.java +++ b/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/pathAnalysis/TokenAnalysisPath.java @@ -49,12 +49,12 @@ public class TokenAnalysisPath { } } - boolean add(AbstractElement element) { + boolean add(AbstractElement element, boolean inContext) { if (isDone()) return false; if (remainingIndexes.get(0) <= 0) { - path.add(Token.fromElement(element, position)); + path.add(Token.fromElement(element, position, inContext)); remainingIndexes.remove(0); shift(); return true; diff --git a/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/pathAnalysis/TokenAnalysisPaths.java b/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/pathAnalysis/TokenAnalysisPaths.java index 35171007a..4d6be2431 100644 --- a/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/pathAnalysis/TokenAnalysisPaths.java +++ b/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/pathAnalysis/TokenAnalysisPaths.java @@ -59,9 +59,9 @@ public class TokenAnalysisPaths { hasCandidates = false; } - public void add(AbstractElement element) { + public void add(AbstractElement element, boolean inContext) { hasCandidates = true; - tokenPaths.forEach(p -> hasProgress = p.add(element) || hasProgress); + tokenPaths.forEach(p -> hasProgress = p.add(element, inContext) || hasProgress); } private boolean addAllDistinct(TokenAnalysisPaths other) { diff --git a/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/token/EofToken.java b/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/token/EofToken.java index 7d6c1fb52..188335871 100644 --- a/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/token/EofToken.java +++ b/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/token/EofToken.java @@ -13,12 +13,9 @@ import org.eclipse.xtext.AbstractElement; /** * @author overflow - Initial contribution and API */ -public class EofToken implements Token { - - private int position; +public class EofToken extends Token { - public EofToken(int position) { - this.position = position; + EofToken() { } @Override @@ -57,9 +54,4 @@ public class EofToken implements Token { public AbstractElement getElement() { return null; } - - @Override - public int getPosition() { - return position; - } } diff --git a/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/token/KeywordToken.java b/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/token/KeywordToken.java index 403844d62..aa47b3e81 100644 --- a/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/token/KeywordToken.java +++ b/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/token/KeywordToken.java @@ -14,13 +14,11 @@ import org.eclipse.xtext.Keyword; /** * @author overflow - Initial contribution and API */ -public class KeywordToken implements Token { +public class KeywordToken extends Token { private Keyword keyword; - private int position; - public KeywordToken(Keyword keyword, int position) { + KeywordToken(Keyword keyword) { this.keyword = keyword; - this.position = position; } @Override @@ -65,9 +63,4 @@ public class KeywordToken implements Token { public AbstractElement getElement() { return keyword; } - - @Override - public int getPosition() { - return position; - } } diff --git a/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/token/TerminalRuleToken.java b/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/token/TerminalRuleToken.java index 577094672..02d6ea8a8 100644 --- a/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/token/TerminalRuleToken.java +++ b/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/token/TerminalRuleToken.java @@ -15,15 +15,13 @@ import org.eclipse.xtext.TerminalRule; /** * @author overflow - Initial contribution and API */ -public class TerminalRuleToken implements Token { +public class TerminalRuleToken extends Token { private RuleCall call; private TerminalRule rule; - private int position; - TerminalRuleToken(RuleCall call, TerminalRule rule, int position) { + TerminalRuleToken(RuleCall call, TerminalRule rule) { this.call = call; this.rule = rule; - this.position = position; } @Override @@ -68,9 +66,4 @@ public class TerminalRuleToken implements Token { public AbstractElement getElement() { return call; } - - @Override - public int getPosition() { - return position; - } } diff --git a/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/token/Token.java b/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/token/Token.java index db0b73afb..a96e17215 100644 --- a/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/token/Token.java +++ b/org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting/token/Token.java @@ -19,14 +19,24 @@ import org.eclipse.xtext.xtext.generator.parser.antlr.hoisting.exceptions.NotATo /** * @author overflow - Initial contribution and API */ -public interface Token { - String negatedCondition(); +public abstract class Token { + public abstract String negatedCondition(); - AbstractElement getElement(); + public abstract AbstractElement getElement(); - int getPosition(); + protected int position; + private boolean inContext; - static boolean isToken(AbstractElement element) { + public int getPosition() { + return position; + } + + public boolean isInContext() { + return inContext; + } + + + public static boolean isToken(AbstractElement element) { if (element == null) { return true; } else if (element instanceof Keyword) { @@ -40,20 +50,29 @@ public interface Token { } } - static Token fromElement(AbstractElement element, int position) { + public static Token fromElement(AbstractElement element, int position, boolean inContext) { + + Token token = null; + if (element == null) { - return new EofToken(position); + token = new EofToken(); } else if (element instanceof Keyword) { - return new KeywordToken((Keyword) element, position); + token = new KeywordToken((Keyword) element); } else if (element instanceof RuleCall) { AbstractRule rule = ((RuleCall) element).getRule(); if (rule instanceof TerminalRule) { - return new TerminalRuleToken((RuleCall) element, (TerminalRule) rule, position); + token = new TerminalRuleToken((RuleCall) element, (TerminalRule) rule); } } else if (element instanceof EnumLiteralDeclaration) { - return new KeywordToken(((EnumLiteralDeclaration) element).getLiteral(), position); + token = new KeywordToken(((EnumLiteralDeclaration) element).getLiteral()); } - throw new NotATokenException(element.eClass().getName()); + if (token == null) { + throw new NotATokenException(element.eClass().getName()); + } else { + token.position = position; + token.inContext = inContext; + return token; + } } }