|
|
|
@ -0,0 +1,554 @@
|
|
|
|
|
<html>
|
|
|
|
|
<head>
|
|
|
|
|
<title>Comparative SL3 Parsing</title>
|
|
|
|
|
<script type="text/javascript">
|
|
|
|
|
// Functions that append special characters to the sentence box
|
|
|
|
|
function band() { document.getElementById("sentence").value += "\u2227"; }
|
|
|
|
|
function bor() { document.getElementById("sentence").value += "\u2228"; }
|
|
|
|
|
function barr() { document.getElementById("sentence").value += "\u2192"; }
|
|
|
|
|
function bbic() { document.getElementById("sentence").value += "\u2194"; }
|
|
|
|
|
|
|
|
|
|
// Declare aliases for token identifiers
|
|
|
|
|
var ERROR = 0, ATOM = 1, NEGATE = 2, LPAR = 3, RPAR = 4, ARROW = 5,
|
|
|
|
|
BICOND = 6, AND = 7, OR = 8;
|
|
|
|
|
|
|
|
|
|
// Converts each symbol to a token and returns a list of tokens
|
|
|
|
|
function tokenize(s) {
|
|
|
|
|
var tokens = [];
|
|
|
|
|
for (var i = 0; i < s.length; i++) {
|
|
|
|
|
if (/[ABC]/.test(s[i])) {
|
|
|
|
|
tokens.push(ATOM);
|
|
|
|
|
} else if (s[i] == ("~")) {
|
|
|
|
|
tokens.push(NEGATE);
|
|
|
|
|
} else if (s[i] == ("(")) {
|
|
|
|
|
tokens.push(LPAR);
|
|
|
|
|
} else if (s[i] == (")")) {
|
|
|
|
|
tokens.push(RPAR);
|
|
|
|
|
} else if (s[i] == ("\u2192")) {
|
|
|
|
|
tokens.push(ARROW);
|
|
|
|
|
} else if (s[i] == ("\u2194")) {
|
|
|
|
|
tokens.push(BICOND);
|
|
|
|
|
} else if (s[i] == ("\u2227")) {
|
|
|
|
|
tokens.push(AND);
|
|
|
|
|
} else if (s[i] == ("\u2228")) {
|
|
|
|
|
tokens.push(OR);
|
|
|
|
|
} else {
|
|
|
|
|
return [ ERROR ];
|
|
|
|
|
console.log("Parsing error")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return tokens;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function parseInputBox() {
|
|
|
|
|
// Get the sentence to validate
|
|
|
|
|
var input = document.getElementById("sentence").value.replace(/^\s+|\s+$/g, '');
|
|
|
|
|
|
|
|
|
|
// Tokenize the sentence into numbers so it's easier to deal with
|
|
|
|
|
var tokens = tokenize(input);
|
|
|
|
|
|
|
|
|
|
// Attempt to parse the sentence without extended junctions
|
|
|
|
|
var parsedBinary = parseSentence(tokens, false);
|
|
|
|
|
// The sentence should parse validly with no leftover tokens
|
|
|
|
|
var validBinary = parsedBinary[0] && parsedBinary[1].length == 0;
|
|
|
|
|
|
|
|
|
|
// Attempt to parse the sentence with extended junctions
|
|
|
|
|
var parsedExtended = parseSentence(tokens, true);
|
|
|
|
|
var validExtended = parsedExtended[0] && parsedExtended[1].length == 0;
|
|
|
|
|
|
|
|
|
|
// Print result to console
|
|
|
|
|
console.log(input + " -> [" + tokens.join(",") + "] -> " + validBinary + "/" + validExtended);
|
|
|
|
|
|
|
|
|
|
// Update UI display with sentence validity
|
|
|
|
|
var left = document.getElementById("status-left");
|
|
|
|
|
var right = document.getElementById("status-right");
|
|
|
|
|
left.innerHTML = validBinary ? "\u2714 Valid" : "\u2716 Invalid";
|
|
|
|
|
left.className = validBinary ? "green" : "red";
|
|
|
|
|
right.innerHTML = validExtended ? "\u2714 Valid" : "\u2716 Invalid";
|
|
|
|
|
right.className = validExtended ? "green" : "red";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function parseAtom(tokens, flag) { // Attempt to parse an atomic sentence
|
|
|
|
|
if (tokens[0] != ATOM) {
|
|
|
|
|
return [ false, tokens ]; // Fail if not an atomic sentence
|
|
|
|
|
}
|
|
|
|
|
return [ true, tokens.slice(1) ]; // Succeed and consume an ATOM token
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function parseNegation(tokens, flag) { // Attempt to parse a negation
|
|
|
|
|
if (tokens[0] != NEGATE) {
|
|
|
|
|
return [ false, tokens ]; // Fail of it didn't begin with a negate
|
|
|
|
|
}
|
|
|
|
|
var iter = tokens.slice(1); // Consume the negate
|
|
|
|
|
var parseTry = parseSentence(iter, flag); // Try to parse a sentence after the negate
|
|
|
|
|
if (!parseTry[0]) {
|
|
|
|
|
return [ false, tokens ]; // Fail if it wasn't a sentence
|
|
|
|
|
}
|
|
|
|
|
return parseTry; // Succeed and consume a NEGATE and a sentence
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function parseBinaryOperator(tokens, operator, flag) { // Attempt to parse a binary operator
|
|
|
|
|
if (tokens[0] != LPAR) { // Begin by trying for a left paren
|
|
|
|
|
return [ false, tokens ]; // Fail if there was no left paren
|
|
|
|
|
}
|
|
|
|
|
var iterA = tokens.slice(1); // Otherwise, consume the LPAR
|
|
|
|
|
var parseTryOne = parseSentence(iterA, flag); // Then try to parse a sentence
|
|
|
|
|
if (!parseTryOne[0]) {
|
|
|
|
|
return [ false, tokens ]; // Fail if there wasn't a sentence
|
|
|
|
|
}
|
|
|
|
|
if (parseTryOne[1][0] != operator) { // Next try to consume the operator
|
|
|
|
|
return [ false, tokens ]; // Fail if there wasn't one
|
|
|
|
|
}
|
|
|
|
|
var iterB = parseTryOne[1].slice(1); // Consume the operator
|
|
|
|
|
var parseTryTwo = parseSentence(iterB, flag); // Try to parse the next sentence
|
|
|
|
|
if (!parseTryTwo[0]) {
|
|
|
|
|
return [ false, tokens ]; // Fail if there wasn't a sentence
|
|
|
|
|
}
|
|
|
|
|
if (parseTryTwo[1][0] != RPAR) { // Finally try to consume a right paren
|
|
|
|
|
return [ false, tokens ]; // Fail if there was no right paren
|
|
|
|
|
}
|
|
|
|
|
return [ true, parseTryTwo[1].slice(1) ]; // Consume the used tokens
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function parseNAryOperator(tokens, operator, flag) { // Attempt to parse an extended junction
|
|
|
|
|
if (tokens[0] != LPAR) { // Begin by trying for a left paren
|
|
|
|
|
return [ false, tokens ]; // Fail if there was no left paren
|
|
|
|
|
}
|
|
|
|
|
var iterA = tokens.slice(1); // Otherwise, consume the LPAR
|
|
|
|
|
var parseTryOne = parseSentence(iterA, flag); // Then try to parse a sentence
|
|
|
|
|
if (!parseTryOne[0]) {
|
|
|
|
|
return [ false, tokens ]; // Fail if there wasn't a sentence
|
|
|
|
|
}
|
|
|
|
|
if (parseTryOne[1][0] != operator) { // Next try to consume the operator
|
|
|
|
|
return [ false, tokens ]; // Fail if there wasn't one
|
|
|
|
|
}
|
|
|
|
|
var iterB = parseTryOne[1].slice(1); // Consume the operator
|
|
|
|
|
var parseTryTwo = parseSentence(iterB, flag); // Try to parse the next sentence
|
|
|
|
|
if (!parseTryTwo[0]) {
|
|
|
|
|
return [ false, tokens ]; // Fail if there wasn't a sentence
|
|
|
|
|
}
|
|
|
|
|
var tokensRemaining = parseTryTwo[1]; // Prepare to iteratively search the remaining tokens
|
|
|
|
|
while (tokensRemaining.length > 0) {
|
|
|
|
|
if (tokensRemaining[0] == RPAR) { // If the junction ends validly,
|
|
|
|
|
return [ true, tokensRemaining.slice(1) ]; // consume the RPAR and return
|
|
|
|
|
}
|
|
|
|
|
if (tokensRemaining[0] != operator) { // If the last sentence wasn't followed by the ending paren,
|
|
|
|
|
return [ false, tokens ]; // it should be followed by an operator; if not, fail
|
|
|
|
|
}
|
|
|
|
|
tokensRemaining = tokensRemaining.slice(1); // Consume the operator
|
|
|
|
|
var parseTryLoop = parseSentence(tokensRemaining, flag); // Try a sentence
|
|
|
|
|
if (!parseTryLoop[0]) {
|
|
|
|
|
return [ false, tokens ]; // Fail if there wasn't a sentence
|
|
|
|
|
}
|
|
|
|
|
tokensRemaining = parseTryLoop[1]; // Consume the sentence and loop back if there was
|
|
|
|
|
}
|
|
|
|
|
return [ false, tokens ]; // Fail if we ran out of tokens without the junction ending
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function parseSentence(tokens, flag) { // Attempt to parse a sentence
|
|
|
|
|
var tryAtom = parseAtom(tokens, flag);
|
|
|
|
|
if (tryAtom[0]) // Is it an atomic sentence?
|
|
|
|
|
return tryAtom; // If it is, consume and return
|
|
|
|
|
var tryNegation = parseNegation(tokens, flag);
|
|
|
|
|
if (tryNegation[0]) // If not, is it a negation?
|
|
|
|
|
return tryNegation; // If it is, consume and return
|
|
|
|
|
var tryConditional = parseBinaryOperator(tokens, ARROW, flag);
|
|
|
|
|
if (tryConditional[0]) // If it's not, is it a conditional?
|
|
|
|
|
return tryConditional; // If it is, consume and return
|
|
|
|
|
var tryBicondition = parseBinaryOperator(tokens, BICOND, flag);
|
|
|
|
|
if (tryBicondition[0]) // If it's not, is it a biconditional?
|
|
|
|
|
return tryBicondition; // If it is, consume and return
|
|
|
|
|
var tryConjunction = flag
|
|
|
|
|
? parseNAryOperator(tokens, AND, flag)
|
|
|
|
|
: parseBinaryOperator(tokens, AND, flag);
|
|
|
|
|
if (tryConjunction[0]) // If it's not, is it a conjunction?
|
|
|
|
|
return tryConjunction; // If it is, consume and return
|
|
|
|
|
var tryDisjunction = flag
|
|
|
|
|
? parseNAryOperator(tokens, OR, flag)
|
|
|
|
|
: parseBinaryOperator(tokens, OR, flag);
|
|
|
|
|
if (tryDisjunction[0]) // If it's not, is it a biconditional?
|
|
|
|
|
return tryDisjunction; // If it is, consume and return
|
|
|
|
|
return [ false, tokens ]; // By closure, if it's not one of those, it isn't a sentence
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
</script>
|
|
|
|
|
<style>
|
|
|
|
|
#sentence {
|
|
|
|
|
text-align: center;
|
|
|
|
|
}
|
|
|
|
|
#status-box-left {
|
|
|
|
|
display: inline-block;
|
|
|
|
|
border: 1px solid black;
|
|
|
|
|
width: 400px;
|
|
|
|
|
}
|
|
|
|
|
#status-box-right {
|
|
|
|
|
display: inline-block;
|
|
|
|
|
border: 1px solid black;
|
|
|
|
|
width: 400px;
|
|
|
|
|
}
|
|
|
|
|
.green {
|
|
|
|
|
color: #228b22;
|
|
|
|
|
}
|
|
|
|
|
.red {
|
|
|
|
|
color: #c00000;
|
|
|
|
|
}
|
|
|
|
|
.text1 {
|
|
|
|
|
text-align: left;
|
|
|
|
|
margin: 0 200px;
|
|
|
|
|
}
|
|
|
|
|
.text2 {
|
|
|
|
|
vertical-align:top;
|
|
|
|
|
display: inline-block;
|
|
|
|
|
width: 500px;
|
|
|
|
|
text-align: left;
|
|
|
|
|
}
|
|
|
|
|
</style>
|
|
|
|
|
</head>
|
|
|
|
|
<body>
|
|
|
|
|
<center>
|
|
|
|
|
<h1>Comparative SL3 Parsing</h1>
|
|
|
|
|
<p>
|
|
|
|
|
<input id="sentence" type="text" value="A" onchange="javascript:parseInputBox()">
|
|
|
|
|
<button onclick="javascript:parseInputBox()">↺</button>
|
|
|
|
|
</p>
|
|
|
|
|
<p>
|
|
|
|
|
<button onclick="javascript:band()">∧</button>
|
|
|
|
|
<button onclick="javascript:bor()">∨</button>
|
|
|
|
|
<button onclick="javascript:barr()">→</button>
|
|
|
|
|
<button onclick="javascript:bbic()">↔</button>
|
|
|
|
|
</p>
|
|
|
|
|
<br>
|
|
|
|
|
<div id="status-box-left">
|
|
|
|
|
<p>Input sentence validity in SL3 without extended junctions:</p>
|
|
|
|
|
<h2 id="status-left" class="green">✔ Valid</h2>
|
|
|
|
|
</div>
|
|
|
|
|
<div id="status-box-right">
|
|
|
|
|
<p>Input sentence validity in SL3 with extended junctions:</p>
|
|
|
|
|
<h2 id="status-right" class="green">✔ Valid</h2>
|
|
|
|
|
</div>
|
|
|
|
|
<br>
|
|
|
|
|
<div class="text1">
|
|
|
|
|
<center><h2>Recursive descent parsing</h2></center>
|
|
|
|
|
<p>Languages that are defined by recursive grammars can be parsed by means of a recursive descent parser.
|
|
|
|
|
A recursive descent parser will consist of two main pieces: the tokenizer and the parser. The tokenizer
|
|
|
|
|
converts the input, which is a string of characters where one element may consist of multiple characters,
|
|
|
|
|
into a sequence of tokens, which abstractly represent the structural elements of the sequence. For example,
|
|
|
|
|
a natural language parser might tokenize a string of characters by splitting on whitespace and converting
|
|
|
|
|
each group of letters into a WORD token, and a JSON parser might convert a string of numerals and notation
|
|
|
|
|
characters into a JNUMBER token.</p>
|
|
|
|
|
<p>The parser takes in a sequence of tokens and decides whether it can be parsed according to a set grammar.
|
|
|
|
|
To do this, the parser defines recursive functions that correspond to each rule in the recursive grammar.
|
|
|
|
|
A particular method call to one of these functions can then call whatever other recursive functions are
|
|
|
|
|
required to validate that grammar rule. For example, a function that parses math expressions might call an
|
|
|
|
|
expression parser on each side of a + token. At each step in the parsing procedure, recursive calls will
|
|
|
|
|
consume some number of tokens off of the token list. If a recursive call fails, the original token list
|
|
|
|
|
will be returned instead of a list with some tokens consumed, so the parent function call can try another
|
|
|
|
|
recursive call or fail.</p>
|
|
|
|
|
<center><h2>Writing an SL3 RDP: Tokenizer</h2></center>
|
|
|
|
|
<p>SL3 is defined by the following recursive grammar rule, where Φ is a sentence:</p>
|
|
|
|
|
<pre>
|
|
|
|
|
Φ := A | B | C | ~Φ | (Φ∧Φ) | (Φ∨Φ) | (Φ→Φ) | (Φ↔Φ)</pre>
|
|
|
|
|
<p>The closure clause is implicit. Thus the characters we should expect to see are A, B, C, (, ), ∧,
|
|
|
|
|
∨, →, and ↔. Fortunately for the tokenizer, we don't have any sequence elements that are
|
|
|
|
|
more than one character long, so we can simply convert the sequence of characters into tokens. We represent
|
|
|
|
|
the tokens as integers under the hood, but in the code for our parser we'll use aliases so it's clear what
|
|
|
|
|
kind of token we're talking about.</p>
|
|
|
|
|
<pre>
|
|
|
|
|
1 // Declare aliases for token identifiers
|
|
|
|
|
2 var ERROR = 0, ATOM = 1, NEGATE = 2, LPAR = 3, RPAR = 4, ARROW = 5,
|
|
|
|
|
3 BICOND = 6, AND = 7, OR = 8;
|
|
|
|
|
4
|
|
|
|
|
5 // Converts each symbol to a token and returns a list of tokens
|
|
|
|
|
6 function tokenize(s) {
|
|
|
|
|
7 var tokens = [];
|
|
|
|
|
8 for (var i = 0; i < s.length; i++) {
|
|
|
|
|
9 if (/[ABC]/.test(s[i])) {
|
|
|
|
|
10 tokens.push(ATOM);
|
|
|
|
|
11 } else if (s[i] == ("~")) {
|
|
|
|
|
12 tokens.push(NEGATE);
|
|
|
|
|
13 } else if (s[i] == ("(")) {
|
|
|
|
|
14 tokens.push(LPAR);
|
|
|
|
|
15 } else if (s[i] == (")")) {
|
|
|
|
|
16 tokens.push(RPAR);
|
|
|
|
|
17 } else if (s[i] == ("\u2192")) {
|
|
|
|
|
18 tokens.push(ARROW);
|
|
|
|
|
19 } else if (s[i] == ("\u2194")) {
|
|
|
|
|
20 tokens.push(BICOND);
|
|
|
|
|
21 } else if (s[i] == ("\u2227")) {
|
|
|
|
|
22 tokens.push(AND);
|
|
|
|
|
23 } else if (s[i] == ("\u2228")) {
|
|
|
|
|
24 tokens.push(OR);
|
|
|
|
|
25 } else {
|
|
|
|
|
26 return [ ERROR ];
|
|
|
|
|
27 console.log("Parsing error")
|
|
|
|
|
28 }
|
|
|
|
|
29 }
|
|
|
|
|
30 return tokens;
|
|
|
|
|
31 }</pre>
|
|
|
|
|
<p>Some things to note: We define an ERROR token that gets returned when we encounter a character that
|
|
|
|
|
isn't part of the grammar (25-28). If we find an illegal character, we know immediately that the sequence
|
|
|
|
|
won't parse. The parser won't try to interpret an ERROR token and fail immediately. Also note that we can
|
|
|
|
|
match all the atomic sentences with a single regular expression match (9).</p>
|
|
|
|
|
<center><h2>Writing an SL3 RDP: Parser</h2></center>
|
|
|
|
|
<p>For the parser, we can simplify the number of functions required by matching the rules by their structural
|
|
|
|
|
similarity:</p>
|
|
|
|
|
<pre>
|
|
|
|
|
Φ := A | B | C
|
|
|
|
|
Φ := ~Φ
|
|
|
|
|
Φ := (Φ∧Φ) | (Φ∨Φ) | (Φ→Φ) | (Φ↔Φ)</pre>
|
|
|
|
|
<p>The atomic sentences consist of a single ATOM token, negative sentences of a NEGATE token followed by a
|
|
|
|
|
sentence, and the other complicated sentences by two sentences around an operator token, flanked by an LPAR
|
|
|
|
|
token and an RPAR token. Using the computer scientist's first resort, anotehr layer of indirection, we can
|
|
|
|
|
get away with writing three functions: one to try and parse an atomic sentence from the token list, one to
|
|
|
|
|
try to parse a negative sentence, and one to parse a sentence given some binary operator. Let's look at these
|
|
|
|
|
in order. Ignore <i>flag</i> for now; we'll make use of that later.</p>
|
|
|
|
|
<pre>
|
|
|
|
|
1 function parseAtom(tokens, flag) {
|
|
|
|
|
2 if (tokens[0] != ATOM) {
|
|
|
|
|
3 return [ false, tokens ];
|
|
|
|
|
4 }
|
|
|
|
|
5 return [ true, tokens.slice(1) ];
|
|
|
|
|
6 }</pre>
|
|
|
|
|
<p>If all we need to successfully parse an atomic sentence is a single ATOM token, then it suffices to find
|
|
|
|
|
one of them at the head of the token list. If we don't find one, we report that we failed to parse an
|
|
|
|
|
atomic sentence and return an untouched token list (2-4). If we do find one, we consume the ATOM token and return
|
|
|
|
|
rest of the tokens as a success (5). The <i>.slice()</i> function in Javascript returns a subsequence of a list,
|
|
|
|
|
which we use to remove tokens from the head of the list.</p>
|
|
|
|
|
<pre>
|
|
|
|
|
1 function parseNegation(tokens, flag) {
|
|
|
|
|
2 if (tokens[0] != NEGATE) {
|
|
|
|
|
3 return [ false, tokens ];
|
|
|
|
|
4 }
|
|
|
|
|
5 var iter = tokens.slice(1);
|
|
|
|
|
6 var parseTry = parseSentence(iter, flag);
|
|
|
|
|
7 if (!parseTry[0]) {
|
|
|
|
|
8 return [ false, tokens ];
|
|
|
|
|
9 }
|
|
|
|
|
10 return parseTry;
|
|
|
|
|
11 }</pre>
|
|
|
|
|
<p>Here we see our first instance of recursion. A negative sentence <i>in toto</i> must consist in a NEGATE
|
|
|
|
|
token, followed by a sequence of tokens that make up a valid sentence. Thus, we first check to see if the
|
|
|
|
|
first condition is met, returning a failure if it is not (2-4). If it is, then we consume it (5) and pass the
|
|
|
|
|
rest of the tokens recursively to another parsing function (6). We'll see the internals of parseSentence()
|
|
|
|
|
later. For now, we'll note that it returns the same sort of data structure that the other parsing functions
|
|
|
|
|
return, which is a tuple of whether the parsing succeeded and a list of unconsumed tokens. If the parsing was a
|
|
|
|
|
failure, we return a failure as in line 3 (7-9). Note that in line 8, we return <i>tokens</i>, not
|
|
|
|
|
<i>parseTry[1]</i>. We don't want to consume the NEGATE token if we couldn't parse a negative sentence after
|
|
|
|
|
all. If the parsing did succeed, then the return value is a tuple with a parsing success and the tokens
|
|
|
|
|
left unconsumed by the sentence, which is just what <i>parseNegation()</i> wants to return.</p>
|
|
|
|
|
<pre>
|
|
|
|
|
1 function parseBinaryOperator(tokens, operator, flag) {
|
|
|
|
|
2 if (tokens[0] != LPAR) {
|
|
|
|
|
3 return [ false, tokens ];
|
|
|
|
|
4 }
|
|
|
|
|
5 var iterA = tokens.slice(1);
|
|
|
|
|
6 var parseTryOne = parseSentence(iterA, flag);
|
|
|
|
|
7 if (!parseTryOne[0]) {
|
|
|
|
|
8 return [ false, tokens ];
|
|
|
|
|
9 }
|
|
|
|
|
10 if (parseTryOne[1][0] != operator) {
|
|
|
|
|
11 return [ false, tokens ];
|
|
|
|
|
12 }
|
|
|
|
|
13 var iterB = parseTryOne[1].slice(1);
|
|
|
|
|
14 var parseTryTwo = parseSentence(iterB, flag);
|
|
|
|
|
15 if (!parseTryTwo[0]) {
|
|
|
|
|
16 return [ false, tokens ];
|
|
|
|
|
17 }
|
|
|
|
|
18 if (parseTryTwo[1][0] != RPAR) {
|
|
|
|
|
19 return [ false, tokens ];
|
|
|
|
|
20 }
|
|
|
|
|
21 return [ true, parseTryTwo[1].slice(1) ];
|
|
|
|
|
22 }</pre>
|
|
|
|
|
<p>Finally, we come to the binary operator function. All four of the binary operator sentence types share a common
|
|
|
|
|
structure, differing only in which operator token is in the middle, so if we pass that in as the <i>operator</i>
|
|
|
|
|
argument to the function, we can represent all four sentences by passing different tokens to the function. In this
|
|
|
|
|
function we also see two instances of recursion on lines 6 and 14. The overall effect is to try and consume an
|
|
|
|
|
LPAR token (2-5), then however many tokens are required to parse a valid sentence (6-9), then whichever token is
|
|
|
|
|
the operator for this function call (10-13), then however many tokens are required to parse another valid sentence
|
|
|
|
|
(14-17), then finally an RPAR token (18-21). If at any point the expected tokens aren't found, then a binary
|
|
|
|
|
operator sentence can't be parsed, and the token list is returned unchanged as a failure.</p>
|
|
|
|
|
<p><i>parseNegation()</i> and <i>parseBinaryOperator()</i> accomplished their recursive descent by calling a
|
|
|
|
|
generalized <i>parseSentence()</i> function. What does this function do? <i>parseSentence()</i> is what ties all of
|
|
|
|
|
the recursive descent parsing functions together by trying each of them in turn and returning the results of
|
|
|
|
|
whichever of them worked:</p>
|
|
|
|
|
<pre>
|
|
|
|
|
1 function parseSentence(tokens, flag) {
|
|
|
|
|
2 var tryAtom = parseAtom(tokens, flag);
|
|
|
|
|
3 if (tryAtom[0])
|
|
|
|
|
4 return tryAtom;
|
|
|
|
|
5 var tryNegation = parseNegation(tokens, flag);
|
|
|
|
|
6 if (tryNegation[0])
|
|
|
|
|
7 return tryNegation;
|
|
|
|
|
8 var tryConditional = parseBinaryOperator(tokens, ARROW, flag);
|
|
|
|
|
9 if (tryConditional[0])
|
|
|
|
|
10 return tryConditional;
|
|
|
|
|
11 var tryBicondition = parseBinaryOperator(tokens, BICOND, flag);
|
|
|
|
|
12 if (tryBicondition[0])
|
|
|
|
|
13 return tryBicondition;
|
|
|
|
|
14 var tryConjunction = parseBinaryOperator(tokens, AND, flag);
|
|
|
|
|
15 if (tryConjunction[0])
|
|
|
|
|
16 return tryConjunction;
|
|
|
|
|
17 var tryDisjunction = parseBinaryOperator(tokens, OR, flag);
|
|
|
|
|
18 if (tryDisjunction[0])
|
|
|
|
|
19 return tryDisjunction;
|
|
|
|
|
20 return [ false, tokens ];
|
|
|
|
|
21 }</pre>
|
|
|
|
|
<p>Note that line 20 perfectly encapsulates what the closure clause does in the definition of SL3: if something
|
|
|
|
|
does not follow by one of the given rules i.e. does not parse according to any of the defined parsing functions,
|
|
|
|
|
then it is not a sentence, and the parser returns a failure to the calling context.</p>
|
|
|
|
|
<p>All that's left is to put the two pieces together. The function to parse a given input will look something like
|
|
|
|
|
this:</p>
|
|
|
|
|
<pre>
|
|
|
|
|
1 function parseSL3(input) {
|
|
|
|
|
2 var tokens = tokenize(input);
|
|
|
|
|
3 var parsed = parseSentence(tokens, false);
|
|
|
|
|
4 var valid = parsed[0] && parsed[1].length == 0;
|
|
|
|
|
5
|
|
|
|
|
6 // Do something with the result
|
|
|
|
|
7 }</pre>
|
|
|
|
|
<p>On line 2 we pass the input to the tokenizer to convert it from a character sequence to a token sequence. On
|
|
|
|
|
line 3 we pass this token sequence to the general parsing function, which will then try all the parsing functions
|
|
|
|
|
until it finds one that works, which will itself do some parsing, possibly including more recursive calls to
|
|
|
|
|
parsing functions, and so on. After all of that is done, we receive back a tuple of whether it worked and a list
|
|
|
|
|
of all the leftover tokens. On line 4 we establish that an input sentence is valid not only if it parsed validly,
|
|
|
|
|
but also if there were no tokens left over. This must be checked because otherwise superfluous characters could
|
|
|
|
|
be added to the end of a valid sentence.</p>
|
|
|
|
|
<center><h2>Writing an SL3 RDP: Extended junctions</h2></center>
|
|
|
|
|
<p>The above code will parse any sentence according to the given rules of SL3 sentences:
|
|
|
|
|
<pre>
|
|
|
|
|
Φ := A | B | C
|
|
|
|
|
Φ := ~Φ
|
|
|
|
|
Φ := (Φ∧Φ) | (Φ∨Φ) | (Φ→Φ) | (Φ↔Φ)</pre>
|
|
|
|
|
<p>However, we might want to extend these rules so that long junctions don't have to involve nesting each
|
|
|
|
|
subsentence within another pair of parentheses:</p>
|
|
|
|
|
<pre>
|
|
|
|
|
Φ := A | B | C
|
|
|
|
|
Φ := ~Φ
|
|
|
|
|
Φ := (Φ→Φ) | (Φ↔Φ)
|
|
|
|
|
Φ := (Φ∧...∧Φ) | (Φ∨...∨Φ)</pre>
|
|
|
|
|
<p>We can accomplish this by writing a different parsing function for junctions that can handle an arbitrary
|
|
|
|
|
finite number of conjuncts or disjuncts. This function will begin much like the binary operator parser, before
|
|
|
|
|
diverging at the end. The different steps have been slightly separated to make the parallels clearer.</p>
|
|
|
|
|
</div>
|
|
|
|
|
<div class="text2"><pre>
|
|
|
|
|
1 function parseBinaryOperator(tokens, operator, flag) {
|
|
|
|
|
|
|
|
|
|
2 if (tokens[0] != LPAR) {
|
|
|
|
|
3 return [ false, tokens ];
|
|
|
|
|
4 }
|
|
|
|
|
5 var iterA = tokens.slice(1);
|
|
|
|
|
|
|
|
|
|
6 var parseTryOne = parseSentence(iterA, flag);
|
|
|
|
|
7 if (!parseTryOne[0]) {
|
|
|
|
|
8 return [ false, tokens ];
|
|
|
|
|
9 }
|
|
|
|
|
|
|
|
|
|
10 if (parseTryOne[1][0] != operator) {
|
|
|
|
|
11 return [ false, tokens ];
|
|
|
|
|
12 }
|
|
|
|
|
13 var iterB = parseTryOne[1].slice(1);
|
|
|
|
|
|
|
|
|
|
14 var parseTryTwo = parseSentence(iterB, flag);
|
|
|
|
|
15 if (!parseTryTwo[0]) {
|
|
|
|
|
16 return [ false, tokens ];
|
|
|
|
|
17 }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 if (parseTryTwo[1][0] != RPAR) {
|
|
|
|
|
19 return [ false, tokens ];
|
|
|
|
|
20 }
|
|
|
|
|
21 return [ true, parseTryTwo[1].slice(1) ];
|
|
|
|
|
22 }</pre>
|
|
|
|
|
</div>
|
|
|
|
|
<div class="text2"><pre>
|
|
|
|
|
1 function parseNAryOperator(tokens, operator, flag) {
|
|
|
|
|
|
|
|
|
|
2 if (tokens[0] != LPAR) {
|
|
|
|
|
3 return [ false, tokens ];
|
|
|
|
|
4 }
|
|
|
|
|
5 var iterA = tokens.slice(1);
|
|
|
|
|
|
|
|
|
|
6 var parseTryOne = parseSentence(iterA, flag);
|
|
|
|
|
7 if (!parseTryOne[0]) {
|
|
|
|
|
8 return [ false, tokens ];
|
|
|
|
|
9 }
|
|
|
|
|
|
|
|
|
|
10 if (parseTryOne[1][0] != operator) {
|
|
|
|
|
11 return [ false, tokens ];
|
|
|
|
|
12 }
|
|
|
|
|
13 var iterB = parseTryOne[1].slice(1);
|
|
|
|
|
|
|
|
|
|
14 var parseTryTwo = parseSentence(iterB, flag);
|
|
|
|
|
15 if (!parseTryTwo[0]) {
|
|
|
|
|
16 return [ false, tokens ];
|
|
|
|
|
17 }
|
|
|
|
|
|
|
|
|
|
18 var tokensRemaining = parseTryTwo[1];
|
|
|
|
|
19 while (tokensRemaining.length > 0) {
|
|
|
|
|
|
|
|
|
|
20 if (tokensRemaining[0] == RPAR) {
|
|
|
|
|
21 return [ true, tokensRemaining.slice(1) ];
|
|
|
|
|
22 }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 if (tokensRemaining[0] != operator) {
|
|
|
|
|
24 return [ false, tokens ];
|
|
|
|
|
25 }
|
|
|
|
|
26 tokensRemaining = tokensRemaining.slice(1);
|
|
|
|
|
|
|
|
|
|
27 var parseTryLoop = parseSentence(tokensRemaining, flag);
|
|
|
|
|
28 if (!parseTryLoop[0]) {
|
|
|
|
|
29 return [ false, tokens ];
|
|
|
|
|
30 }
|
|
|
|
|
31 tokensRemaining = parseTryLoop[1];
|
|
|
|
|
|
|
|
|
|
32 }
|
|
|
|
|
33 return [ false, tokens ];
|
|
|
|
|
34 }</pre>
|
|
|
|
|
</div>
|
|
|
|
|
<div class="text1">
|
|
|
|
|
<p>Both functions begin by attempting to parse an LPAR, a sentence, an operator, and a second sentence. However,
|
|
|
|
|
where the binary parser need only look for an RPAR token, the extended parser needs a loop to check for an
|
|
|
|
|
arbitrary number of addtinal conjuncts or disjuncts. The check for an RPAR is done first, so that it can still
|
|
|
|
|
validate 2-sentence junctions (20-22). If there isn't one, then the junction must still be continuing, which
|
|
|
|
|
means that the sentence must be followed by the same operator (23-26) and another sentence (27-31). If at any
|
|
|
|
|
point either a sentence is not followed by the operator or the operator is not followed by a sentence, the
|
|
|
|
|
parser will fail (24,29). After consuming the sentence, the function loops back to line 19 to check for an
|
|
|
|
|
RPAR token again. If the loop ever runs out of tokens, then it must not have found an RPAR that closed the
|
|
|
|
|
current junction, in which case the sentence is invalid (33).</p>
|
|
|
|
|
<p>With this alternative parsing function, we can finally make use of <i>flag</i> by letting the value of
|
|
|
|
|
<i>flag</i> determine whether we use extended junction rules. To do this, we rewrite</p>
|
|
|
|
|
<pre>
|
|
|
|
|
14 var tryConjunction = parseBinaryOperator(tokens, AND, flag);
|
|
|
|
|
15 if (tryConjunction[0])
|
|
|
|
|
16 return tryConjunction;
|
|
|
|
|
17 var tryDisjunction = parseBinaryOperator(tokens, OR, flag);
|
|
|
|
|
18 if (tryDisjunction[0])
|
|
|
|
|
19 return tryDisjunction;</pre>
|
|
|
|
|
<p>to use the extended functions when flagged, as so:</p>
|
|
|
|
|
<pre>
|
|
|
|
|
14 var tryConjunction = flag ? parseNAryOperator(tokens, AND, flag) : parseBinaryOperator(tokens, AND, flag);
|
|
|
|
|
15 if (tryConjunction[0])
|
|
|
|
|
16 return tryConjunction;
|
|
|
|
|
17 var tryDisjunction = flag ? parseNAryOperator(tokens, OR, flag) : parseBinaryOperator(tokens, OR, flag);
|
|
|
|
|
18 if (tryDisjunction[0])
|
|
|
|
|
19 return tryDisjunction;</pre>
|
|
|
|
|
<p>The ?: expression above is called a ternary operator, and is just shorthand for an if/else statement. Thus,
|
|
|
|
|
if <i>flag</i> is true, then conjunctions and disjunctions will be parsed using the extended junction rules; and
|
|
|
|
|
if false, using the binary rules. Then we can call <i>parseSentence()</i> on the same list of tokens with
|
|
|
|
|
<i>flag</i> set to true or false in order to see which rules a sentence is valid under.</p>
|
|
|
|
|
<p>The real implementation under the hood has to deal with updating the UI at the top of the page, so the highest
|
|
|
|
|
parsing function concerns itself with some other details. To see what the full implementation does, right click
|
|
|
|
|
on this page and select "View page source" (or press Ctrl+U or Cmd+U) to see the code for the embedded
|
|
|
|
|
Javascript. The functions have comments in them that outline what is going on at each point in the function.</p>
|
|
|
|
|
</div>
|
|
|
|
|
</center>
|
|
|
|
|
<hr>
|
|
|
|
|
Tim Van Baak<br>
|
|
|
|
|
Rice University '18<br>
|
|
|
|
|
PHIL 357 - Incompleteness, Undecidability, and Computability<br>
|
|
|
|
|
31 January 2017<br>
|
|
|
|
|
</body>
|
|
|
|
|
</html>
|