rewrite of minimal path difference analysis

old version was too slow to recognize nested identical paths because the
complete context needed to be analyzed.

now the token analysis saves meta information about whether a given
token is from the context or the target element. using this we can
detect nested identical paths by checking if the context starts at the
same position if two token lists are identical.
this can be done when ever the index list is full (no gaps), which is a
lot sooner than the end of the tokenCombinations call.
This commit is contained in:
overflowerror 2022-02-25 18:13:31 +01:00
parent e6d0b4a23b
commit ca20318692
8 changed files with 135 additions and 78 deletions
org.eclipse.xtext.xtext.generator/src/org/eclipse/xtext/xtext/generator/parser/antlr/hoisting

View file

@ -324,15 +324,14 @@ public class HoistingProcessor {
(TokenGuard tokenGuard, MergedPathGuard pathGuard) -> Tuples.pair(tokenGuard, pathGuard)
).map(p -> new PathGuard(p.getFirst(), p.getSecond()))
.collect(AlternativesGuard.collector());
} catch(NestedIdenticalPathException e) {
} catch (EndlessPrefixException e) {
if (hasEndlessPrefix(paths)) {
// endless paths in combination with a NestedIndenticalPathException means
// there are probably endless prefixes in this element
// TODO: maybe only use predicates (analog to ANTLR) instead of error-ing out
throw new EndlessPrefixException("endless prefix in more than two paths", currentRule);
} else {
throw new TokenAnalysisAbortedException("difference analysis failed: probably insufficient context", e, currentRule);
}
} catch(NestedIdenticalPathException e) {
// nested identical paths
// -> flatten paths to alternative and try again
// this is very inefficient
@ -346,7 +345,7 @@ public class HoistingProcessor {
if (config.isDebug())
log.info(abstractElementToString(flattened));
log.info(flattened.getElements().size());
// TODO: value configurable?
if (flattened.getElements().size() > 100) {

View file

@ -38,6 +38,7 @@ import org.eclipse.xtext.RuleCall;
import org.eclipse.xtext.UnorderedGroup;
import org.eclipse.xtext.util.XtextSwitch;
import org.eclipse.xtext.xtext.generator.parser.antlr.hoisting.HoistingConfiguration;
import org.eclipse.xtext.xtext.generator.parser.antlr.hoisting.exceptions.EndlessPrefixException;
import org.eclipse.xtext.xtext.generator.parser.antlr.hoisting.exceptions.NestedIdenticalPathException;
import org.eclipse.xtext.xtext.generator.parser.antlr.hoisting.exceptions.TokenAnalysisAbortedException;
import org.eclipse.xtext.xtext.generator.parser.antlr.hoisting.exceptions.UnsupportedConstructException;
@ -208,7 +209,7 @@ public class TokenAnalysis {
TokenAnalysisPaths path = new TokenAnalysisPaths(prefix);
path.resetProgress();
path = getTokenPaths(element, path, false, false);
path = getTokenPaths(element, path, false, false, true);
if (!path.isDone() && element != null) {
boolean _considerCardinalities = true;
@ -252,11 +253,11 @@ public class TokenAnalysis {
return result;
}
private TokenAnalysisPaths getTokenPathsTrivial(Group path, TokenAnalysisPaths prefix) {
private TokenAnalysisPaths getTokenPathsTrivial(Group path, TokenAnalysisPaths prefix, boolean inContext) {
TokenAnalysisPaths result = new TokenAnalysisPaths(prefix);
for(AbstractElement element : path.getElements()) {
result = getTokenPaths(element, result, false, false);
result = getTokenPaths(element, result, false, false, inContext);
if (result.isDone()) {
break;
}
@ -264,16 +265,16 @@ public class TokenAnalysis {
return result;
}
private TokenAnalysisPaths getTokenPathsTrivial(Alternatives path, TokenAnalysisPaths prefix) {
private TokenAnalysisPaths getTokenPathsTrivial(Alternatives path, TokenAnalysisPaths prefix, boolean inContext) {
TokenAnalysisPaths result = TokenAnalysisPaths.empty(prefix);
for(AbstractElement element : path.getElements()) {
result = result.merge(getTokenPaths(element, prefix, false, false));
result = result.merge(getTokenPaths(element, prefix, false, false, inContext));
}
return result;
}
private TokenAnalysisPaths getTokenPathsTrivial(UnorderedGroup path, TokenAnalysisPaths prefix) {
private TokenAnalysisPaths getTokenPathsTrivial(UnorderedGroup path, TokenAnalysisPaths prefix, boolean inContext) {
TokenAnalysisPaths result;
TokenAnalysisPaths current;
@ -288,7 +289,7 @@ public class TokenAnalysis {
current.resetProgress();
for (AbstractElement element : path.getElements()) {
current = current.merge(getTokenPaths(element, result, false, false));
current = current.merge(getTokenPaths(element, result, false, false, inContext));
}
result.resetProgress();
@ -299,23 +300,23 @@ public class TokenAnalysis {
return result;
}
private TokenAnalysisPaths getTokenPathsTrivial(AbstractElement path, TokenAnalysisPaths prefix) {
private TokenAnalysisPaths getTokenPathsTrivial(AbstractElement path, TokenAnalysisPaths prefix, boolean inContext) {
return new XtextSwitch<TokenAnalysisPaths>() {
@Override
public TokenAnalysisPaths caseGroup(Group group) {
return getTokenPathsTrivial(group, prefix);
return getTokenPathsTrivial(group, prefix, inContext);
};
@Override
public TokenAnalysisPaths caseAlternatives(Alternatives alternatives) {
return getTokenPathsTrivial(alternatives, prefix);
return getTokenPathsTrivial(alternatives, prefix, inContext);
};
@Override
public TokenAnalysisPaths caseUnorderedGroup(UnorderedGroup unorderedGroup) {
return getTokenPathsTrivial(unorderedGroup, prefix);
return getTokenPathsTrivial(unorderedGroup, prefix, inContext);
};
@Override
public TokenAnalysisPaths caseAssignment(Assignment assignment) {
return getTokenPaths(assignment.getTerminal(), prefix, false, false);
return getTokenPaths(assignment.getTerminal(), prefix, false, false, inContext);
};
@Override
public TokenAnalysisPaths caseRuleCall(RuleCall call) {
@ -323,7 +324,7 @@ public class TokenAnalysis {
isParserRuleCall(call) ||
isEnumRuleCall(call)
) {
return getTokenPaths(call.getRule().getAlternatives(), prefix, false, false);
return getTokenPaths(call.getRule().getAlternatives(), prefix, false, false, inContext);
} else {
// probably terminal rule call -> go to default case
return null;
@ -335,7 +336,7 @@ public class TokenAnalysis {
if (Token.isToken(element)) {
TokenAnalysisPaths result = new TokenAnalysisPaths(prefix);
result.add(element);
result.add(element, inContext);
return result;
} else {
// Actions, Predicates, JavaActions, ...
@ -356,6 +357,12 @@ public class TokenAnalysis {
private TokenAnalysisPaths getTokenPaths(
AbstractElement path, TokenAnalysisPaths prefix, boolean analyseContext, boolean needsLength
) {
return getTokenPaths(path, prefix, analyseContext, needsLength, false);
}
private TokenAnalysisPaths getTokenPaths(
AbstractElement path, TokenAnalysisPaths prefix, boolean analyseContext, boolean needsLength, boolean inContext
) {
if (prefix.isDone()) {
return prefix;
@ -365,16 +372,22 @@ public class TokenAnalysis {
// empty path means EOF
TokenAnalysisPaths result;
result = new TokenAnalysisPaths(prefix);
result.add(path);
result.add(path, inContext);
return result;
}
// use actual cardinality from path
return getTokenPaths(path, path.getCardinality(), prefix, analyseContext, needsLength);
return getTokenPaths(path, path.getCardinality(), prefix, analyseContext, needsLength, inContext);
}
private TokenAnalysisPaths getTokenPaths(
AbstractElement path, String cardinality, TokenAnalysisPaths prefix, boolean analyseContext, boolean needsLength
) {
return getTokenPaths(path, cardinality, prefix, analyseContext, needsLength, false);
}
private TokenAnalysisPaths getTokenPaths(
AbstractElement path, String cardinality, TokenAnalysisPaths prefix, boolean analyseContext, boolean needsLength
AbstractElement path, String cardinality, TokenAnalysisPaths prefix, boolean analyseContext, boolean needsLength, boolean inContext
) {
// analyseContext implies needsLength
@ -387,7 +400,7 @@ public class TokenAnalysis {
if (path == null) {
// empty path means EOF
result = new TokenAnalysisPaths(prefix);
result.add(path);
result.add(path, inContext);
return result;
}
@ -408,7 +421,7 @@ public class TokenAnalysis {
do {
result.resetProgress();
TokenAnalysisPaths tokenPaths = getTokenPathsTrivial(path, result);
TokenAnalysisPaths tokenPaths = getTokenPathsTrivial(path, result, inContext);
if (tokenPaths.isDone()) {
result = result.merge(tokenPaths);
@ -501,7 +514,7 @@ public class TokenAnalysis {
} else {
// path length exhausted while searching for minimal differences
// this indicates the presence of nested identical paths
throw new NestedIdenticalPathException();
throw new EndlessPrefixException();
}
}
private boolean tokenCombinations(long prefix, int prefixLength, int ones, Function<List<Integer>, Boolean> callback, MutableWrapper<Integer> limit) {
@ -525,6 +538,8 @@ public class TokenAnalysis {
if (tokenCombinations(current, i + 1, ones - 1, callback, limit)) {
return true;
}
} catch (NestedIdenticalPathException e) {
throw e;
} catch (TokenAnalysisAbortedException e) {
if (config.isDebug())
log.info("tokens exhausted: " + e.getMessage());
@ -590,6 +605,9 @@ public class TokenAnalysis {
}
tokenCombinations(indexList -> {
int numberOfIndices = indexList.size();
boolean isFullIndexList = indexList.get(numberOfIndices - 1) == numberOfIndices - 1;
if (config.isDebug())
log.info(indexList);
@ -605,19 +623,62 @@ public class TokenAnalysis {
int size = result.size();
for (int i = 0; i < size; i++) {
if (result.get(i) != null) {
// we already have a result for this path
continue;
}
List<List<Token>> tokenListOfCurrentPath = tokenListsForPaths.get(i);
if (!tokenListOfCurrentPath.stream()
.anyMatch(tokenList -> tokenListsForPaths.stream()
.filter(s -> s != tokenListOfCurrentPath)
// does any other path contain a similar token list (= alternative) ?
.anyMatch(s -> s.contains(tokenList))
)
) {
List<List<Token>> tokenListsOfCurrentPath = tokenListsForPaths.get(i);
boolean setResult = true;
for (List<Token> tokenListOfCurrentPath : tokenListsOfCurrentPath) {
boolean doContextCheck =
isFullIndexList && // index list is full (no jumps)
numberOfIndices >= 2 && // check that number of tokens is enough
tokenListOfCurrentPath.get(numberOfIndices - 1).isInContext() && // context is used
!tokenListOfCurrentPath.get(numberOfIndices - 2).isInContext(); // we are only interested in the last token not in context
boolean doContextCheckWithoutToken = // for empty paths
isFullIndexList && // index list is full (no jumps)
numberOfIndices == 1 && // only one token given
tokenListOfCurrentPath.get(0).isInContext(); // first token is in context
for (int j = 0; j < size; j++) {
if (i == j) {
// don't compare to current path
continue;
}
for (List<Token> tokenListOfOther : tokenListsForPaths.get(j)) {
if (tokenListOfCurrentPath.equals(tokenListOfOther)) {
setResult = false;
if (doContextCheck) {
if (tokenListOfOther.get(numberOfIndices - 1).isInContext()) {
// last token of other is in context
if (!tokenListOfOther.get(numberOfIndices - 2).isInContext()) {
// context start is at the same position
// -> paths are identical
throw new NestedIdenticalPathException("identical context start in identical token path");
}
}
}
if (doContextCheckWithoutToken) {
if (tokenListOfOther.get(0).isInContext()) {
// this path also is empty
throw new NestedIdenticalPathException("empty nested identical path");
}
}
}
}
}
}
if (setResult) {
// token list set is unique for path i
result.set(i, tokenListOfCurrentPath);
result.set(i, tokenListsOfCurrentPath);
}
}

View file

@ -49,12 +49,12 @@ public class TokenAnalysisPath {
}
}
boolean add(AbstractElement element) {
boolean add(AbstractElement element, boolean inContext) {
if (isDone())
return false;
if (remainingIndexes.get(0) <= 0) {
path.add(Token.fromElement(element, position));
path.add(Token.fromElement(element, position, inContext));
remainingIndexes.remove(0);
shift();
return true;

View file

@ -59,9 +59,9 @@ public class TokenAnalysisPaths {
hasCandidates = false;
}
public void add(AbstractElement element) {
public void add(AbstractElement element, boolean inContext) {
hasCandidates = true;
tokenPaths.forEach(p -> hasProgress = p.add(element) || hasProgress);
tokenPaths.forEach(p -> hasProgress = p.add(element, inContext) || hasProgress);
}
private boolean addAllDistinct(TokenAnalysisPaths other) {

View file

@ -13,12 +13,9 @@ import org.eclipse.xtext.AbstractElement;
/**
* @author overflow - Initial contribution and API
*/
public class EofToken implements Token {
private int position;
public class EofToken extends Token {
public EofToken(int position) {
this.position = position;
EofToken() {
}
@Override
@ -57,9 +54,4 @@ public class EofToken implements Token {
public AbstractElement getElement() {
return null;
}
@Override
public int getPosition() {
return position;
}
}

View file

@ -14,13 +14,11 @@ import org.eclipse.xtext.Keyword;
/**
* @author overflow - Initial contribution and API
*/
public class KeywordToken implements Token {
public class KeywordToken extends Token {
private Keyword keyword;
private int position;
public KeywordToken(Keyword keyword, int position) {
KeywordToken(Keyword keyword) {
this.keyword = keyword;
this.position = position;
}
@Override
@ -65,9 +63,4 @@ public class KeywordToken implements Token {
public AbstractElement getElement() {
return keyword;
}
@Override
public int getPosition() {
return position;
}
}

View file

@ -15,15 +15,13 @@ import org.eclipse.xtext.TerminalRule;
/**
* @author overflow - Initial contribution and API
*/
public class TerminalRuleToken implements Token {
public class TerminalRuleToken extends Token {
private RuleCall call;
private TerminalRule rule;
private int position;
TerminalRuleToken(RuleCall call, TerminalRule rule, int position) {
TerminalRuleToken(RuleCall call, TerminalRule rule) {
this.call = call;
this.rule = rule;
this.position = position;
}
@Override
@ -68,9 +66,4 @@ public class TerminalRuleToken implements Token {
public AbstractElement getElement() {
return call;
}
@Override
public int getPosition() {
return position;
}
}

View file

@ -19,14 +19,24 @@ import org.eclipse.xtext.xtext.generator.parser.antlr.hoisting.exceptions.NotATo
/**
* @author overflow - Initial contribution and API
*/
public interface Token {
String negatedCondition();
public abstract class Token {
public abstract String negatedCondition();
AbstractElement getElement();
public abstract AbstractElement getElement();
int getPosition();
protected int position;
private boolean inContext;
static boolean isToken(AbstractElement element) {
public int getPosition() {
return position;
}
public boolean isInContext() {
return inContext;
}
public static boolean isToken(AbstractElement element) {
if (element == null) {
return true;
} else if (element instanceof Keyword) {
@ -40,20 +50,29 @@ public interface Token {
}
}
static Token fromElement(AbstractElement element, int position) {
public static Token fromElement(AbstractElement element, int position, boolean inContext) {
Token token = null;
if (element == null) {
return new EofToken(position);
token = new EofToken();
} else if (element instanceof Keyword) {
return new KeywordToken((Keyword) element, position);
token = new KeywordToken((Keyword) element);
} else if (element instanceof RuleCall) {
AbstractRule rule = ((RuleCall) element).getRule();
if (rule instanceof TerminalRule) {
return new TerminalRuleToken((RuleCall) element, (TerminalRule) rule, position);
token = new TerminalRuleToken((RuleCall) element, (TerminalRule) rule);
}
} else if (element instanceof EnumLiteralDeclaration) {
return new KeywordToken(((EnumLiteralDeclaration) element).getLiteral(), position);
token = new KeywordToken(((EnumLiteralDeclaration) element).getLiteral());
}
throw new NotATokenException(element.eClass().getName());
if (token == null) {
throw new NotATokenException(element.eClass().getName());
} else {
token.position = position;
token.inContext = inContext;
return token;
}
}
}