From dcf7c578c706d0221fcd5f80fb73b477aa6c3f20 Mon Sep 17 00:00:00 2001
From: jkohnlein <jkohnlein>
Date: Fri, 4 Jul 2008 11:35:53 +0000
Subject: [PATCH] Refactored parser: - Don't hand in currentNode in
 createLeafNode - Hand lexer errors through

---
 .../parser/antlr/AbstractAntlrParser.java     | 57 +++++-------
 .../org/eclipse/xtext/parser/antlr/Lexer.java | 81 +++++++++++++++++
 .../xtext/parser/antlr/XtextTokenStream.java  | 91 ++++++++-----------
 .../xtext/parser/impl/PartialParsingUtil.java | 10 +-
 .../xtext/parsetree/InvalidTokenTest.java     | 29 ++++++
 5 files changed, 174 insertions(+), 94 deletions(-)
 create mode 100644 plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/Lexer.java
 create mode 100644 tests/org.eclipse.xtext.generator.tests/src/org/eclipse/xtext/parsetree/InvalidTokenTest.java
diff --git a/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/AbstractAntlrParser.java b/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/AbstractAntlrParser.java
index 2856f0954..2b8260c49 100644
--- a/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/AbstractAntlrParser.java
+++ b/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/AbstractAntlrParser.java
@@ -81,25 +81,20 @@ public abstract class AbstractAntlrParser extends Parser {
 		}
 	}
 
-	protected Object createLeafNode(String grammarElementID, CompositeNode parentNode, String feature) {
+	protected Object createLeafNode(String grammarElementID, String feature) {
 		Token token = input.LT(-1);
 		if (token.getTokenIndex() > lastConsumedIndex) {
 			int indexOfTokenBefore = lastConsumedIndex;
 			if (indexOfTokenBefore + 1 < token.getTokenIndex()) {
 				for (int x = indexOfTokenBefore + 1; x < token.getTokenIndex(); x++) {
 					Token hidden = input.get(x);
-					LeafNode leafNode = createLeafNode(isSemanticChannel(hidden));
-					leafNode.setText(hidden.getText());
-					leafNode.setHidden(true);
+					LeafNode leafNode = createLeafNode(hidden, true);
 					setLexerRule(leafNode, hidden);
-					parentNode.getChildren().add(leafNode);
 				}
 			}
-			LeafNode leafNode = createLeafNode(isSemanticChannel(token));
-			leafNode.setText(token.getText());
+			LeafNode leafNode = createLeafNode(token, false);
 			leafNode.setGrammarElement(getGrammarElement(grammarElementID));
 			leafNode.setFeature(feature);
-			parentNode.getChildren().add(leafNode);
 			lastConsumedIndex = token.getTokenIndex();
 			tokenConsumed(token, leafNode);
 			return leafNode;
@@ -168,21 +163,27 @@ public abstract class AbstractAntlrParser extends Parser {
 		}
 	}
 
-	private LeafNode createLeafNode(boolean appendError) {
-		LeafNode ln = ParsetreeFactory.eINSTANCE.createLeafNode();
-		if (appendError)
-			appendError(ln);
-		return ln;
+	private LeafNode createLeafNode(Token token, boolean isHidden) {
+		LeafNode leafNode = ParsetreeFactory.eINSTANCE.createLeafNode();
+		leafNode.setText(token.getText());
+		leafNode.setHidden(isHidden);
+		if (isSemanticChannel(token))
+			appendError(leafNode);
+		if(token.getType() == Token.INVALID_TOKEN_TYPE) {
+			SyntaxError error = ParsetreeFactory.eINSTANCE.createSyntaxError();
+			String lexerErrorMessage = ((XtextTokenStream)input).getLexerErrorMessage(token);
+			error.setMessage(lexerErrorMessage);
+			leafNode.setSyntaxError(error);
+		}
+		currentNode.getChildren().add(leafNode);
+		return leafNode;
 	}
 
 	protected void appendAllTokens() {
 		for (int x = lastConsumedIndex + 1; input.size() > x; input.consume(), x++) {
 			Token hidden = input.get(x);
-			LeafNode leafNode = createLeafNode(isSemanticChannel(hidden));
-			leafNode.setText(hidden.getText());
-			leafNode.setHidden(true);
+			LeafNode leafNode = createLeafNode(hidden, true);
 			setLexerRule(leafNode, hidden);
-			currentNode.getChildren().add(leafNode);
 		}
 		if (currentError != null) {
 			EList<LeafNode> leafNodes = currentNode.getLeafNodes();
@@ -208,37 +209,28 @@ public abstract class AbstractAntlrParser extends Parser {
 		if (indexOfTokenBefore + 1 < currentTokenIndex) {
 			for (int x = indexOfTokenBefore + 1; x < currentTokenIndex; x++) {
 				Token hidden = input.get(x);
-				LeafNode leafNode = createLeafNode(isSemanticChannel(hidden));
-				leafNode.setText(hidden.getText());
-				leafNode.setHidden(true);
+				LeafNode leafNode = createLeafNode(hidden, true);
 				setLexerRule(leafNode, hidden);
-				currentNode.getChildren().add(leafNode);
 				skipped.add(leafNode);
 			}
 		}
 		if (lastConsumedIndex < currentTokenIndex) {
-			LeafNode leafNode = createLeafNode(isSemanticChannel(currentToken));
-			leafNode.setText(currentToken.getText());
-			leafNode.setHidden(true);
+			LeafNode leafNode = createLeafNode(currentToken, true);
 			setLexerRule(leafNode, currentToken);
-			currentNode.getChildren().add(leafNode);
 			skipped.add(leafNode);
 			lastConsumedIndex = currentToken.getTokenIndex();
 		}
 		return skipped;
 	}
 
-	protected void appendTrailingHiddenTokens(CompositeNode parentNode) {
+	protected void appendTrailingHiddenTokens() {
 		Token tokenBefore = input.LT(-1);
 		int size = input.size();
 		if (tokenBefore != null && tokenBefore.getTokenIndex() < size) {
 			for (int x = tokenBefore.getTokenIndex() + 1; x < size; x++) {
 				Token hidden = input.get(x);
-				LeafNode leafNode = createLeafNode(isSemanticChannel(hidden));
-				leafNode.setText(hidden.getText());
-				leafNode.setHidden(true);
+				LeafNode leafNode = createLeafNode(hidden, true);
 				setLexerRule(leafNode, hidden);
-				parentNode.getChildren().add(leafNode);
 				lastConsumedIndex = hidden.getTokenIndex();
 			}
 		}
@@ -287,7 +279,7 @@ public abstract class AbstractAntlrParser extends Parser {
 			catch (Exception e) {
 				throw new WrappedException(e);
 			}
-			appendTrailingHiddenTokens(currentNode);
+			appendTrailingHiddenTokens();
 		}
 		finally {
 			try {
@@ -323,8 +315,6 @@ public abstract class AbstractAntlrParser extends Parser {
 		}
 		deferredLookaheadMap.remove(token);
 		token2NodeMap.put(token, leafNode);
-		((XtextTokenStream) input).consumeLookahead();
-		// ((XtextTokenStream) input).decrementLookahead();
 	}
 
 	/**
@@ -382,7 +372,6 @@ public abstract class AbstractAntlrParser extends Parser {
 	public void match(IntStream input, int ttype, BitSet follow) throws RecognitionException {
 		super.match(input, ttype, follow);
 		((XtextTokenStream) input).removeLastLookaheadToken();
-		((XtextTokenStream) input).decrementLookahead();
 	}
 
 	protected InputStream getTokenFile() {
diff --git a/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/Lexer.java b/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/Lexer.java
new file mode 100644
index 000000000..4a488a09c
--- /dev/null
+++ b/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/Lexer.java
@@ -0,0 +1,81 @@
+/*******************************************************************************
+ * Copyright (c) 2008 itemis AG (http://www.itemis.eu) and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ *******************************************************************************/
+package org.eclipse.xtext.parser.antlr;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.antlr.runtime.CharStream;
+import org.antlr.runtime.CommonToken;
+import org.antlr.runtime.NoViableAltException;
+import org.antlr.runtime.RecognitionException;
+import org.antlr.runtime.Token;
+
+/**
+ * Hack: As AntLR does not allow to define the superclass of the generated
+ * lexer, we call this class Lexer and import it to be used instead of the
+ * original lexer class.
+ * 
+ * @author Jan Köhnlein - Initial contribution and API
+ * 
+ */
+public abstract class Lexer extends org.antlr.runtime.Lexer {
+
+	public Lexer() {
+		super();
+	}
+
+	public Lexer(CharStream input) {
+		super(input);
+	}
+
+	private Map<Token, String> tokenErrorMap = new HashMap<Token, String>();
+
+	public Token nextToken() {
+		while (true) {
+			this.token = null;
+			this.channel = Token.DEFAULT_CHANNEL;
+			this.tokenStartCharIndex = input.index();
+			this.tokenStartCharPositionInLine = input.getCharPositionInLine();
+			this.tokenStartLine = input.getLine();
+			this.text = null;
+			if (input.LA(1) == CharStream.EOF) {
+				return Token.EOF_TOKEN;
+			}
+			try {
+				mTokens();
+				if (this.token == null) {
+					emit();
+				}
+				else if (this.token == Token.SKIP_TOKEN) {
+					continue;
+				}
+				return this.token;
+			}
+			catch (RecognitionException re) {
+				reportError(re);
+				if (re instanceof NoViableAltException) {
+					recover(re);
+				}
+				// create token that holds mismatched char
+				Token t = new CommonToken(input, Token.INVALID_TOKEN_TYPE, Token.HIDDEN_CHANNEL,
+						this.tokenStartCharIndex, getCharIndex() - 1);
+				t.setLine(this.tokenStartLine);
+				t.setCharPositionInLine(this.tokenStartCharPositionInLine);
+				tokenErrorMap.put(t, getErrorMessage(re, this.getTokenNames()));
+				emit(t);
+				return this.token;
+			}
+		}
+	}
+
+	public String getErrorMessage(Token t) {
+		return tokenErrorMap.get(t);
+	}
+}
diff --git a/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/XtextTokenStream.java b/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/XtextTokenStream.java
index cfbb91868..416d084a3 100644
--- a/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/XtextTokenStream.java
+++ b/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/antlr/XtextTokenStream.java
@@ -9,7 +9,9 @@
 package org.eclipse.xtext.parser.antlr;
 
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 import org.antlr.runtime.CommonTokenStream;
 import org.antlr.runtime.Token;
@@ -22,46 +24,37 @@ import org.antlr.runtime.TokenSource;
  */
 public class XtextTokenStream extends CommonTokenStream {
 
-    private int currentLookahead;
-    private int lookaheadConsumedByParent;
+	private List<Token> lookaheadTokens = new ArrayList<Token>();
 
-    private List<Token> lookaheadTokens = new ArrayList<Token>(); 
-    
-    public XtextTokenStream() {
-        super();
-    }
+	private Map<Token, String> tokenErrorMap = new HashMap<Token, String>();
 
-    public XtextTokenStream(TokenSource tokenSource, int channel) {
-        super(tokenSource, channel);
-    }
+	public XtextTokenStream() {
+		super();
+	}
 
-    public XtextTokenStream(TokenSource tokenSource) {
-        super(tokenSource);
-    }
+	public XtextTokenStream(TokenSource tokenSource, int channel) {
+		super(tokenSource, channel);
+	}
 
-    /*
-     * (non-Javadoc)
-     * 
-     * @see org.antlr.runtime.CommonTokenStream#LA(int)
-     */
-    @Override
-    public int LA(int i) {
-        currentLookahead = Math.max(currentLookahead, i);
-        Token lookaheadToken = LT(i);
-        if(!lookaheadTokens.contains(lookaheadToken)) {
-        	lookaheadTokens.add(lookaheadToken);
-        }
-        return super.LA(i);
-    }
+	public XtextTokenStream(TokenSource tokenSource) {
+		super(tokenSource);
+	}
 
-    /**
-     * @return the currentLookahead
-     */
-    public int getCurrentLookahead() {
-        return currentLookahead;
-    }
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see org.antlr.runtime.CommonTokenStream#LA(int)
+	 */
+	@Override
+	public int LA(int i) {
+		Token lookaheadToken = LT(i);
+		if (!lookaheadTokens.contains(lookaheadToken)) {
+			lookaheadTokens.add(lookaheadToken);
+		}
+		return super.LA(i);
+	}
 
-    /**
+	/**
 	 * @return the lookaheadTokens
 	 */
 	public List<Token> getLookaheadTokens() {
@@ -69,28 +62,18 @@ public class XtextTokenStream extends CommonTokenStream {
 	}
 
 	public void removeLastLookaheadToken() {
-		lookaheadTokens.remove(lookaheadTokens.size()-1);
+		lookaheadTokens.remove(lookaheadTokens.size() - 1);
 	}
-	
-    /**
-     * @return the lookaheadConsumedByParent
-     */
-    public int getLookaheadConsumedByParent() {
-        return lookaheadConsumedByParent;
-    }
 
-    public void resetLookahead() {
-        currentLookahead = 0;
-        lookaheadConsumedByParent = 0;
-        lookaheadTokens.clear();
-    }
+	public void resetLookahead() {
+		lookaheadTokens.clear();
+	}
 
-    public void decrementLookahead() {
-        --currentLookahead;
-    }
-
-    public void consumeLookahead() {
-        ++lookaheadConsumedByParent;
-    }
+	public String getLexerErrorMessage(Token invalidToken) {
+		if (tokenSource instanceof org.eclipse.xtext.parser.antlr.Lexer) {
+			return ((org.eclipse.xtext.parser.antlr.Lexer) tokenSource).getErrorMessage(invalidToken);
+		}
+		return (invalidToken.getType() == Token.INVALID_TOKEN_TYPE) ? "Invalid token " + invalidToken.getText() : null;
+	}
 
 }
diff --git a/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/impl/PartialParsingUtil.java b/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/impl/PartialParsingUtil.java
index f42ee5463..a9c81d828 100644
--- a/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/impl/PartialParsingUtil.java
+++ b/plugins/org.eclipse.xtext/src/org/eclipse/xtext/parser/impl/PartialParsingUtil.java
@@ -134,8 +134,9 @@ public class PartialParsingUtil {
 			List<CompositeNode> nodesEnclosingRegion) {
 		nodesEnclosingRegion.add(parent);
 		EList<AbstractNode> children = parent.getChildren();
-		// Hack: iterate children backward, so we evaluate the node.length() function less often  
-		for (int i=children.size()-1; i>=0; --i) {
+		// Hack: iterate children backward, so we evaluate the node.length()
+		// function less often
+		for (int i = children.size() - 1; i >= 0; --i) {
 			AbstractNode child = children.get(i);
 			if (child instanceof CompositeNode) {
 				CompositeNode childCompositeNode = (CompositeNode) child;
@@ -148,10 +149,7 @@ public class PartialParsingUtil {
 	}
 
 	private static boolean nodeEnclosesRegion(CompositeNode node, int offset, int length) {
-		if(node.getOffset() <= offset) {
-			return node.getOffset() + node.getLength() >= offset + length;
-		}
-		return false;
+		return node.getOffset() <= offset && node.getOffset() + node.getLength() >= offset + length;
 	}
 
 	/**
diff --git a/tests/org.eclipse.xtext.generator.tests/src/org/eclipse/xtext/parsetree/InvalidTokenTest.java b/tests/org.eclipse.xtext.generator.tests/src/org/eclipse/xtext/parsetree/InvalidTokenTest.java
new file mode 100644
index 000000000..df56726c7
--- /dev/null
+++ b/tests/org.eclipse.xtext.generator.tests/src/org/eclipse/xtext/parsetree/InvalidTokenTest.java
@@ -0,0 +1,29 @@
+/*******************************************************************************
+ * Copyright (c) 2008 itemis AG (http://www.itemis.eu) and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ *******************************************************************************/
+package org.eclipse.xtext.parsetree;
+
+import org.eclipse.emf.common.util.EList;
+import org.eclipse.xtext.testlanguages.OptionalEmptyLanguageStandaloneSetup;
+import org.eclipse.xtext.tests.AbstractGeneratorTest;
+
+/**
+ * @author Jan Köhnlein - Initial contribution and API
+ *
+ */
+public class InvalidTokenTest extends AbstractGeneratorTest {
+
+	public void testInvalidTokenError() throws Exception {
+		with(OptionalEmptyLanguageStandaloneSetup.class);
+		CompositeNode rootNode = getRootNode("/*");
+		EList<SyntaxError> allSyntaxErrors = rootNode.allSyntaxErrors();
+		assertFalse(allSyntaxErrors.isEmpty());
+		SyntaxError syntaxError = allSyntaxErrors.get(0);
+		assertTrue(syntaxError.getMessage().contains("mismatched character"));
+	}
+}