import java.io.BufferedReader; import java.io.FileReader; import java.io.FileNotFoundException; import java.io.IOException; class Token { public enum Type {IDENTIFIER, KEYWORD, LITERAL, SEPARATOR, OPERATOR, OTHER, EOF} private Type type; private String value; public Token(Type type, String value){ this.type = type; this.value = value; } public Type getType(){ return type; } public String getValue(){ return value; } public String toString(){ return type + ": " + value; } } public class TokenStream { private BufferedReader input; public TokenStream(String fileName) throws FileNotFoundException { input = new BufferedReader(new FileReader(fileName)); } public Token nextToken() throws IOException{ // tokenInitial is the numeric code of the first character of the token // or -1 for EOF int tokenInitial; // first check for whitespace and comments, bypass them do { tokenInitial = input.read(); /* START OF COMMENTED OUT SECTION if(tokenInitial == '/'){ // possible comment start, peek ahead input.mark(1); if(input.read() != '/'){ // not a comment input.reset(); // so put the second character back break; // and we've found the token start } else { // a comment was found, skip over it ... } } END OF COMMENTED OUT SECTION */ } while(isWhiteSpace(tokenInitial)); if(tokenInitial == -1){ // EOF return new Token(Token.Type.EOF, ""); } StringBuffer sb = new StringBuffer(); // builds up the token value sb.appendCodePoint(tokenInitial); /* START OF COMMENTED OUT SECTION { // peek ahead to see if we have a two-character operator input.mark(1); int char2 = input.read(); ... } // try again for an operator, but just one character if(isOperator(sb.toString())){ return new Token(Token.Type.OPERATOR, sb.toString()); } // Then check for a Separator if(isSeparator(tokenInitial)) { ... } // Then check for an Identifier, Keyword, or Boolean Literal if(isLetter(tokenInitial)){ ... } END OF COMMENTED OUT SECTION */ if(isDigit(tokenInitial)) { // check for integers while(true) { input.mark(1); // be prepared to put a character back int nextChar = input.read(); if(!isDigit(nextChar)){ input.reset(); // put it back break; // the string of digits is done } sb.appendCodePoint(nextChar); } return new Token(Token.Type.LITERAL, sb.toString()); } return new Token(Token.Type.OTHER, sb.toString()); // any other char } /* START OF COMMENTED OUT SECTION private boolean isKeyword (String s) { return ...; } private boolean isBooleanLiteral (String s) { return ...; } END OF COMMENTED OUT SECTION */ // white space is a space, tab, return, newline, or form feed private boolean isWhiteSpace (int c) { return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f'; } /* START OF COMMENTED OUT SECTION private boolean isEndOfLine(int c){ // count EOF as an end of line return ...; } private boolean isSeparator(int c) { return (c=='[' || c==']' || c=='(' || c==')' || c==':' || c=='{' || c=='}' || c==';' || c==',' || c=='.'); } private boolean isOperator(String s) { return ...; } private boolean isLetter(int c) { return ...; } END OF COMMENTED OUT SECTION */ private boolean isDigit(int c) { return c >= '0' && c <= '9'; } public static void main(String[] args){ // for testing TokenStream alone if(args.length != 1){ System.err.println("Usage: java TokenStream inputfile"); System.exit(1); } try { TokenStream ts = new TokenStream(args[0]); // now get each token from the stream (until EOF), // convert it to a string, and print it out // with System.out.println while(true){ Token t = ts.nextToken(); if(t.getType() == Token.Type.EOF) break; System.out.println(t); } } catch (IOException e){ e.printStackTrace(); System.exit(1); } } }