From dcf7c578c706d0221fcd5f80fb73b477aa6c3f20 Mon Sep 17 00:00:00 2001 From: jkohnlein Date: Fri, 4 Jul 2008 11:35:53 +0000 Subject: [PATCH] Refactored parser: - Don't hand in currentNode in createLeafNode - Hand lexer errors through --- .../parser/antlr/AbstractAntlrParser.java | 57 +++++------- .../org/eclipse/xtext/parser/antlr/Lexer.java | 81 +++++++++++++++++ .../xtext/parser/antlr/XtextTokenStream.java | 91 ++++++++----------- .../xtext/parser/impl/PartialParsingUtil.java | 10 +- .../xtext/parsetree/InvalidTokenTest.java | 29 ++++++ 5 files changed, 174 insertions(+), 94 deletions(-) create mode 100644 plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/Lexer.java create mode 100644 tests/org.eclipse.xtext.generator.tests/src/org/eclipse/xtext/parsetree/InvalidTokenTest.java diff --git a/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/AbstractAntlrParser.java b/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/AbstractAntlrParser.java index 2856f0954..2b8260c49 100644 --- a/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/AbstractAntlrParser.java +++ b/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/AbstractAntlrParser.java @@ -81,25 +81,20 @@ public abstract class AbstractAntlrParser extends Parser { } } - protected Object createLeafNode(String grammarElementID, CompositeNode parentNode, String feature) { + protected Object createLeafNode(String grammarElementID, String feature) { Token token = input.LT(-1); if (token.getTokenIndex() > lastConsumedIndex) { int indexOfTokenBefore = lastConsumedIndex; if (indexOfTokenBefore + 1 < token.getTokenIndex()) { for (int x = indexOfTokenBefore + 1; x < token.getTokenIndex(); x++) { Token hidden = input.get(x); - LeafNode leafNode = createLeafNode(isSemanticChannel(hidden)); - leafNode.setText(hidden.getText()); - leafNode.setHidden(true); + LeafNode leafNode = createLeafNode(hidden, true); setLexerRule(leafNode, hidden); - parentNode.getChildren().add(leafNode); } } - LeafNode leafNode = createLeafNode(isSemanticChannel(token)); - leafNode.setText(token.getText()); + LeafNode leafNode = createLeafNode(token, false); leafNode.setGrammarElement(getGrammarElement(grammarElementID)); leafNode.setFeature(feature); - parentNode.getChildren().add(leafNode); lastConsumedIndex = token.getTokenIndex(); tokenConsumed(token, leafNode); return leafNode; @@ -168,21 +163,27 @@ public abstract class AbstractAntlrParser extends Parser { } } - private LeafNode createLeafNode(boolean appendError) { - LeafNode ln = ParsetreeFactory.eINSTANCE.createLeafNode(); - if (appendError) - appendError(ln); - return ln; + private LeafNode createLeafNode(Token token, boolean isHidden) { + LeafNode leafNode = ParsetreeFactory.eINSTANCE.createLeafNode(); + leafNode.setText(token.getText()); + leafNode.setHidden(isHidden); + if (isSemanticChannel(token)) + appendError(leafNode); + if(token.getType() == Token.INVALID_TOKEN_TYPE) { + SyntaxError error = ParsetreeFactory.eINSTANCE.createSyntaxError(); + String lexerErrorMessage = ((XtextTokenStream)input).getLexerErrorMessage(token); + error.setMessage(lexerErrorMessage); + leafNode.setSyntaxError(error); + } + currentNode.getChildren().add(leafNode); + return leafNode; } protected void appendAllTokens() { for (int x = lastConsumedIndex + 1; input.size() > x; input.consume(), x++) { Token hidden = input.get(x); - LeafNode leafNode = createLeafNode(isSemanticChannel(hidden)); - leafNode.setText(hidden.getText()); - leafNode.setHidden(true); + LeafNode leafNode = createLeafNode(hidden, true); setLexerRule(leafNode, hidden); - currentNode.getChildren().add(leafNode); } if (currentError != null) { EList leafNodes = currentNode.getLeafNodes(); @@ -208,37 +209,28 @@ public abstract class AbstractAntlrParser extends Parser { if (indexOfTokenBefore + 1 < currentTokenIndex) { for (int x = indexOfTokenBefore + 1; x < currentTokenIndex; x++) { Token hidden = input.get(x); - LeafNode leafNode = createLeafNode(isSemanticChannel(hidden)); - leafNode.setText(hidden.getText()); - leafNode.setHidden(true); + LeafNode leafNode = createLeafNode(hidden, true); setLexerRule(leafNode, hidden); - currentNode.getChildren().add(leafNode); skipped.add(leafNode); } } if (lastConsumedIndex < currentTokenIndex) { - LeafNode leafNode = createLeafNode(isSemanticChannel(currentToken)); - leafNode.setText(currentToken.getText()); - leafNode.setHidden(true); + LeafNode leafNode = createLeafNode(currentToken, true); setLexerRule(leafNode, currentToken); - currentNode.getChildren().add(leafNode); skipped.add(leafNode); lastConsumedIndex = currentToken.getTokenIndex(); } return skipped; } - protected void appendTrailingHiddenTokens(CompositeNode parentNode) { + protected void appendTrailingHiddenTokens() { Token tokenBefore = input.LT(-1); int size = input.size(); if (tokenBefore != null && tokenBefore.getTokenIndex() < size) { for (int x = tokenBefore.getTokenIndex() + 1; x < size; x++) { Token hidden = input.get(x); - LeafNode leafNode = createLeafNode(isSemanticChannel(hidden)); - leafNode.setText(hidden.getText()); - leafNode.setHidden(true); + LeafNode leafNode = createLeafNode(hidden, true); setLexerRule(leafNode, hidden); - parentNode.getChildren().add(leafNode); lastConsumedIndex = hidden.getTokenIndex(); } } @@ -287,7 +279,7 @@ public abstract class AbstractAntlrParser extends Parser { catch (Exception e) { throw new WrappedException(e); } - appendTrailingHiddenTokens(currentNode); + appendTrailingHiddenTokens(); } finally { try { @@ -323,8 +315,6 @@ public abstract class AbstractAntlrParser extends Parser { } deferredLookaheadMap.remove(token); token2NodeMap.put(token, leafNode); - ((XtextTokenStream) input).consumeLookahead(); - // ((XtextTokenStream) input).decrementLookahead(); } /** @@ -382,7 +372,6 @@ public abstract class AbstractAntlrParser extends Parser { public void match(IntStream input, int ttype, BitSet follow) throws RecognitionException { super.match(input, ttype, follow); ((XtextTokenStream) input).removeLastLookaheadToken(); - ((XtextTokenStream) input).decrementLookahead(); } protected InputStream getTokenFile() { diff --git a/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/Lexer.java b/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/Lexer.java new file mode 100644 index 000000000..4a488a09c --- /dev/null +++ b/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/Lexer.java @@ -0,0 +1,81 @@ +/******************************************************************************* + * Copyright (c) 2008 itemis AG (http://www.itemis.eu) and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + *******************************************************************************/ +package org.eclipse.xtext.parser.antlr; + +import java.util.HashMap; +import java.util.Map; + +import org.antlr.runtime.CharStream; +import org.antlr.runtime.CommonToken; +import org.antlr.runtime.NoViableAltException; +import org.antlr.runtime.RecognitionException; +import org.antlr.runtime.Token; + +/** + * Hack: As AntLR does not allow to define the superclass of the generated + * lexer, we call this class Lexer and import it to be used instead of the + * original lexer class. + * + * @author Jan Köhnlein - Initial contribution and API + * + */ +public abstract class Lexer extends org.antlr.runtime.Lexer { + + public Lexer() { + super(); + } + + public Lexer(CharStream input) { + super(input); + } + + private Map tokenErrorMap = new HashMap(); + + public Token nextToken() { + while (true) { + this.token = null; + this.channel = Token.DEFAULT_CHANNEL; + this.tokenStartCharIndex = input.index(); + this.tokenStartCharPositionInLine = input.getCharPositionInLine(); + this.tokenStartLine = input.getLine(); + this.text = null; + if (input.LA(1) == CharStream.EOF) { + return Token.EOF_TOKEN; + } + try { + mTokens(); + if (this.token == null) { + emit(); + } + else if (this.token == Token.SKIP_TOKEN) { + continue; + } + return this.token; + } + catch (RecognitionException re) { + reportError(re); + if (re instanceof NoViableAltException) { + recover(re); + } + // create token that holds mismatched char + Token t = new CommonToken(input, Token.INVALID_TOKEN_TYPE, Token.HIDDEN_CHANNEL, + this.tokenStartCharIndex, getCharIndex() - 1); + t.setLine(this.tokenStartLine); + t.setCharPositionInLine(this.tokenStartCharPositionInLine); + tokenErrorMap.put(t, getErrorMessage(re, this.getTokenNames())); + emit(t); + return this.token; + } + } + } + + public String getErrorMessage(Token t) { + return tokenErrorMap.get(t); + } +} diff --git a/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/XtextTokenStream.java b/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/XtextTokenStream.java index cfbb91868..416d084a3 100644 --- a/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/XtextTokenStream.java +++ b/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/XtextTokenStream.java @@ -9,7 +9,9 @@ package org.eclipse.xtext.parser.antlr; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import org.antlr.runtime.CommonTokenStream; import org.antlr.runtime.Token; @@ -22,46 +24,37 @@ import org.antlr.runtime.TokenSource; */ public class XtextTokenStream extends CommonTokenStream { - private int currentLookahead; - private int lookaheadConsumedByParent; + private List lookaheadTokens = new ArrayList(); - private List lookaheadTokens = new ArrayList(); - - public XtextTokenStream() { - super(); - } + private Map tokenErrorMap = new HashMap(); - public XtextTokenStream(TokenSource tokenSource, int channel) { - super(tokenSource, channel); - } + public XtextTokenStream() { + super(); + } - public XtextTokenStream(TokenSource tokenSource) { - super(tokenSource); - } + public XtextTokenStream(TokenSource tokenSource, int channel) { + super(tokenSource, channel); + } - /* - * (non-Javadoc) - * - * @see org.antlr.runtime.CommonTokenStream#LA(int) - */ - @Override - public int LA(int i) { - currentLookahead = Math.max(currentLookahead, i); - Token lookaheadToken = LT(i); - if(!lookaheadTokens.contains(lookaheadToken)) { - lookaheadTokens.add(lookaheadToken); - } - return super.LA(i); - } + public XtextTokenStream(TokenSource tokenSource) { + super(tokenSource); + } - /** - * @return the currentLookahead - */ - public int getCurrentLookahead() { - return currentLookahead; - } + /* + * (non-Javadoc) + * + * @see org.antlr.runtime.CommonTokenStream#LA(int) + */ + @Override + public int LA(int i) { + Token lookaheadToken = LT(i); + if (!lookaheadTokens.contains(lookaheadToken)) { + lookaheadTokens.add(lookaheadToken); + } + return super.LA(i); + } - /** + /** * @return the lookaheadTokens */ public List getLookaheadTokens() { @@ -69,28 +62,18 @@ public class XtextTokenStream extends CommonTokenStream { } public void removeLastLookaheadToken() { - lookaheadTokens.remove(lookaheadTokens.size()-1); + lookaheadTokens.remove(lookaheadTokens.size() - 1); } - - /** - * @return the lookaheadConsumedByParent - */ - public int getLookaheadConsumedByParent() { - return lookaheadConsumedByParent; - } - public void resetLookahead() { - currentLookahead = 0; - lookaheadConsumedByParent = 0; - lookaheadTokens.clear(); - } + public void resetLookahead() { + lookaheadTokens.clear(); + } - public void decrementLookahead() { - --currentLookahead; - } - - public void consumeLookahead() { - ++lookaheadConsumedByParent; - } + public String getLexerErrorMessage(Token invalidToken) { + if (tokenSource instanceof org.eclipse.xtext.parser.antlr.Lexer) { + return ((org.eclipse.xtext.parser.antlr.Lexer) tokenSource).getErrorMessage(invalidToken); + } + return (invalidToken.getType() == Token.INVALID_TOKEN_TYPE) ? "Invalid token " + invalidToken.getText() : null; + } } diff --git a/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/impl/PartialParsingUtil.java b/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/impl/PartialParsingUtil.java index f42ee5463..a9c81d828 100644 --- a/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/impl/PartialParsingUtil.java +++ b/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/impl/PartialParsingUtil.java @@ -134,8 +134,9 @@ public class PartialParsingUtil { List nodesEnclosingRegion) { nodesEnclosingRegion.add(parent); EList children = parent.getChildren(); - // Hack: iterate children backward, so we evaluate the node.length() function less often - for (int i=children.size()-1; i>=0; --i) { + // Hack: iterate children backward, so we evaluate the node.length() + // function less often + for (int i = children.size() - 1; i >= 0; --i) { AbstractNode child = children.get(i); if (child instanceof CompositeNode) { CompositeNode childCompositeNode = (CompositeNode) child; @@ -148,10 +149,7 @@ public class PartialParsingUtil { } private static boolean nodeEnclosesRegion(CompositeNode node, int offset, int length) { - if(node.getOffset() <= offset) { - return node.getOffset() + node.getLength() >= offset + length; - } - return false; + return node.getOffset() <= offset && node.getOffset() + node.getLength() >= offset + length; } /** diff --git a/tests/org.eclipse.xtext.generator.tests/src/org/eclipse/xtext/parsetree/InvalidTokenTest.java b/tests/org.eclipse.xtext.generator.tests/src/org/eclipse/xtext/parsetree/InvalidTokenTest.java new file mode 100644 index 000000000..df56726c7 --- /dev/null +++ b/tests/org.eclipse.xtext.generator.tests/src/org/eclipse/xtext/parsetree/InvalidTokenTest.java @@ -0,0 +1,29 @@ +/******************************************************************************* + * Copyright (c) 2008 itemis AG (http://www.itemis.eu) and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + *******************************************************************************/ +package org.eclipse.xtext.parsetree; + +import org.eclipse.emf.common.util.EList; +import org.eclipse.xtext.testlanguages.OptionalEmptyLanguageStandaloneSetup; +import org.eclipse.xtext.tests.AbstractGeneratorTest; + +/** + * @author Jan Köhnlein - Initial contribution and API + * + */ +public class InvalidTokenTest extends AbstractGeneratorTest { + + public void testInvalidTokenError() throws Exception { + with(OptionalEmptyLanguageStandaloneSetup.class); + CompositeNode rootNode = getRootNode("/*"); + EList allSyntaxErrors = rootNode.allSyntaxErrors(); + assertFalse(allSyntaxErrors.isEmpty()); + SyntaxError syntaxError = allSyntaxErrors.get(0); + assertTrue(syntaxError.getMessage().contains("mismatched character")); + } +}