elisp.js/elisp/parser.js

288 lines
7.6 KiB
JavaScript

////
// Emacs Lisp implementation in JavaScript.
//
// Copyright (c) 2009 Sami Samhuri - sami.samhuri@gmail.com
//
// Released under the terms of the MIT license. See the included file
// LICENSE.
var utils = require('elisp/utils'),
type = require('elisp/types');
var Parser = function(data) {
this.data = data || '';
};
Parser.Error = function(name, message) {
this.parserError = true;
this.name = name;
this.message = message;
};
Parser.Error.messages = {
'eof': "no more input"
};
Parser.prototype.error = function(name) {
throw(new Parser.Error(name, Parser.Error.messages[name]));
};
Parser.prototype.peek = function() {
return this.data[this.pos];
};
Parser.prototype.consumeChar = function() {
if (this.pos >= this.data.length) this.error('eof');
return this.data[this.pos++];
};
Parser.prototype.consumeWhitespace = function() {
var c;
while ((c = this.peek()) && c.match(/[\s\n]/)) {
this.consumeChar();
}
};
Parser.prototype.rewind = function() {
this.pos = 0;
};
Parser.prototype.rest = function() {
return this.data.substring(this.pos);
};
Parser.prototype.moreInput = function() {
return (this.pos < this.data.length);
};
Parser.prototype.parse = function(string) {
if (string) this.data = string;
this.rewind();
var exprs = [];
while (this.moreInput()) {
try {
exprs.push(this.parseExpression());
} catch (e) {
if (e.parserError && e.name == 'eof') {
print("error: " + e.message);
break;
}
else {
throw(e);
}
}
}
this.expressions = exprs;
// print('');
// Utils.pp(exprs);
// print('');
return type.mkList(exprs);
};
Parser.prototype.parseOne = function(string) {
return this.parse(string).car();
};
Parser.prototype.parseUntil = function(regex, initial, next, consumeTerminator) {
var c,
token = initial,
condition = function(c){ return c.match(regex) === null; };
while ((c = this.peek()) && condition(c)) {
token = next(token, this.consumeChar());
}
if (consumeTerminator && this.peek()) this.consumeChar();
return token;
};
Parser.prototype.parseList = function() {
var list = [],
expr;
// consume initial paren '('
this.consumeChar();
while ((expr = this.parseExpression()) && expr != ')') {
list.push(expr);
}
return type.mkList(list);
};
Parser.prototype.parseCons = function() {
var car, cdr, expr;
// consume initial paren '('
this.consumeChar();
car = this.parseExpression();
// ignore .
this.parseExpression();
cdr = this.parseExpression();
return new type.LispCons(car, cdr);
};
Parser.prototype.parseString = function() {
// consume initial quotation mark
this.consumeChar();
var self = this;
return new type.LispString(this.parseUntil(/"/, '', function(s,c){
if (c == '\\') {
c = self.consumeChar();
}
return s + c;
}, true /* consume terminator */));
};
Parser.prototype.parseSymbol = function() {
var symbol = this.parseUntil(/[\s()]/, '', function(t,c){return t + c;});
return new type.LispSymbol(symbol);
};
// Probably easy to break
Parser.prototype.parseRegex = function() {
// consume initial slash
this.consumeChar();
var self = this;
return new RegExp(this.parseUntil(/\//, '', function(s,c){
if (c == '\\') {
c = self.consumeChar();
}
return s + c;
}, true /* consume terminator */));
};
// In Emacs Lisp a trailing . is allowed on integers.
// Valid kinds of numbers we parse here are:
//
// * Integers of the form 42, +17, -300, 7300. (trailing .), +1. and
// -1.
//
// * Floating point numbers of the form -4.5, 0.0, and +933825.3450133492
//
// * Exponential notation for floats, e.g. 1.5e2 (150.0) or 420e-1 (42.0)
// (There is no trailing . allowed anywhere in exponent notation)
//
// Binary, octal, hex, or arbitrary radix integers not yet parsed.
// (e.g. #x100 == #o400 == #b100000000 == #24rag
Parser.prototype.parseNumber = function() {
var value = this.parseIntOrFloat(),
exponentAllowed = value === parseInt(value, 10),
exp;
// now check for an exponent
if (this.exponentAllowed && (this.peek() == 'e' || this.peek() == 'E')) {
this.consumeChar();
// Technically this is an error as a float is not allowed for exponents
// but the regex is strict enough to keep us from trying to do that.
exp = this.parseIntOrFloat();
value *= Math.pow(10, exp);
}
return new type.LispNumber(value);
};
// Pack int and float parsing together for simplicity's sake.
Parser.prototype.parseIntOrFloat = function() {
this.exponentAllowed = true;
var sign = this.peek() == '-' || this.peek() == '+' ? this.consumeChar() : '+',
value;
// There may or may not be an integer part of the number.
if (this.peek() != '.') {
value = this.parseUntil(/[^\d]/, 0, function(n,c) {
return n*10 + parseInt(c, 10);
});
}
// if we see a . there might be a float to parse
if (this.peek() == '.') {
this.consumeChar();
if (this.peek() && this.peek().match(/\d/)) {
var decimal = this.parseUntil(/[^\d]/, '', function(s,c) {return s + c;});
// value may be undefined at this point
value = parseFloat('' + (value||'') + '.' + decimal);
}
else {
this.exponentAllowed = false;
}
}
// Value can technically be undefined but the regex prevents it from
// ever being so.
return sign == '-' ? -1*value : value;
};
// These regexes matches all the inputs specified above parseNumber.
// They are paramount as they exclude some invalid cases the parser
// itself doesn't catch. Sloppy, should be fixed in the future.
// The reason there are so many is that we can't match the end of
// string or some chars in the same regex.
//
// TODO: pick up Friedl and find a way to consolidate these.
Parser.prototype.lookingAtNumber = function() {
var pos = this.pos,
rest = this.rest(),
match = rest.match(/^[+-]?\d+(\.\d*)?[)\s\n]/) ||
rest.match(/^[+-]?\d+(\.\d*)?$/) ||
rest.match(/^[+-]?\d+(\.\d+)?([eE][+-]?\d+)?[)\s\n]/) ||
rest.match(/^[+-]?\d+(\.\d+)?([eE][+-]?\d+)?$/) ||
rest.match(/^[+-]?(\d+)?\.\d+([eE][+-]?\d+)?[)\s\n]/) ||
rest.match(/^[+-]?(\d+)?\.\d+([eE][+-]?\d+)?$/);
return (match !== null);
};
Parser.prototype.lookingAtCons = function() {
// FIXME
return false;
var orig_pos = this.pos,
_ = this.consumeChar(),
__ = _ && this.peek() && this.parseExpression(),
cdr = __ && this.peek() &&this.parseExpression();
this.pos = orig_pos; // rewind, like it never happened.
// print('[Parser.lookingAtCons]');
return _ == ')' || cdr.isCons() && cdr.isSymbol() && cdr.symbolName() == '.';
};
Parser.prototype.parseExpression = function() {
var value,
c = this.peek();
if (c == '(' && this.lookingAtCons()) {
value = this.parseCons();
}
else if (c == '(') {
value = this.parseList();
}
else if (c == ')') {
return this.consumeChar();
}
else if (c == "'") {
this.consumeChar();
value = new type.LispCons(new type.LispSymbol('quote'), this.parseExpression());
}
else if (c == '"') {
value = this.parseString();
}
else if (this.lookingAtNumber()) {
value = this.parseNumber();
}
else if (c) {
value = this.parseSymbol();
}
else {
if (this.pos == this.data.length) {
print('[error] no more input. unterminated string or list? (continuing anyway)');
}
print('[warning] in Parser.parseExpression: unrecognized char "' + c + '"');
print('this.pos = ' + this.pos);
print('this.data.length = ' + this.data.length);
print('this.rest = ' + this.rest());
}
this.consumeWhitespace();
return value;
};
exports.Parser = Parser;