diff --git a/build.py b/build.py index 9dea397..4d31193 100755 --- a/build.py +++ b/build.py @@ -44,23 +44,19 @@ def main(): os.makedirs(dest.parent, exist_ok=True) # Copy any file types without content processing - if filename.rsplit(".")[-1] not in ("html", "md"): + if filename.rsplit(".")[-1] not in ("md",): print("Copying ", path) shutil.copyfile(path, dest) continue - content = path.read_text(encoding="utf8") - meta = {} - # Preprocess markdown into html if dest.name.endswith(".md"): print("Converting", path) md.reset() dest = dest.with_suffix(".html") - content = md.convert(content) - meta = md.Meta + content = md.convert(path.read_text(encoding="utf8")) + meta = md.Meta or {} - if dest.name.endswith("html"): # Inject content into the template page_content = bs4.BeautifulSoup(content, features="html.parser") page = copy.copy(template) @@ -104,12 +100,9 @@ def main(): p.append(a) page.header.append(p) - # The fully templated page is the new content - content = str(page) - - # Write the destination file - print("Writing ", dest) - dest.write_text(content) + # Write the fully templated page + print("Writing ", dest) + dest.write_text(str(page)) print("Processed", count, "files") diff --git a/src/project/index.md b/src/project/index.md new file mode 100644 index 0000000..16740ba --- /dev/null +++ b/src/project/index.md @@ -0,0 +1,23 @@ +--- +title: Projects +--- + +## Ludum Dare entries + +Ludum Dare is a long-running game jam event where participants create a game from scratch over a weekend. + +* [Roundman: Escape from Squareworld](https://web.archive.org/web/20120414152156/http://ludumdare.com/compo/ludum-dare-21/?action=preview&uid=4882), Ludum Dare 21. An arcade-style platformer in C#/XNA. + +* [Happy Land](https://web.archive.org/web/20120105193359/http://ludumdare.com/compo/ludum-dare-22/?action=preview&uid=4884), Ludum Dare 22. A top-down survival shooter in C#/XNA. + +* [Primordial Smackdown: Bacteria with Badditude](https://web.archive.org/web/20120505114828/http://ludumdare.com/compo/ludum-dare-23/?action=preview&uid=4884), Ludum Dare 23. An arcade-style game about bacteria in C#/XNA. + +* [Apes vs Dinosaurs: Unnatural Selection](https://web.archive.org/web/20140813200902/http://ludumdare.com/compo/ludum-dare-24/?action=preview&uid=4884), Ludum Dare 24. A card game/tower defense hybrid in C#/XNA. + +* [RGB Shift](https://web.archive.org/web/20171014143944/http://ludumdare.com/compo/minild-56/?action=preview&uid=4884), MiniLD 56. A sidescrolling platformer with a unique color mechanic in Java/LWJGL. RGB Shift was also developed for HackRice 2015, where it won Best Visualization. + +* [Voyage of the Indefatigable](https://ldjam.com/events/ludum-dare/39/voyage-of-the-indefatigable), Ludum Dare 39. A resource management game in HTML/JS. + +## Miscellaneous + +* [A recursive descent SL3 parser](./sl3.html) written for a mathematical logic class. \ No newline at end of file diff --git a/src/project/sl3.html b/src/project/sl3.html new file mode 100644 index 0000000..312f11a --- /dev/null +++ b/src/project/sl3.html @@ -0,0 +1,554 @@ + + + Comparative SL3 Parsing + + + + +
+

Comparative SL3 Parsing

+

+ + +

+

+ + + + +

+
+
+

Input sentence validity in SL3 without extended junctions:

+

✔ Valid

+
+
+

Input sentence validity in SL3 with extended junctions:

+

✔ Valid

+
+
+
+

Recursive descent parsing

+

Languages that are defined by recursive grammars can be parsed by means of a recursive descent parser. + A recursive descent parser will consist of two main pieces: the tokenizer and the parser. The tokenizer + converts the input, which is a string of characters where one element may consist of multiple characters, + into a sequence of tokens, which abstractly represent the structural elements of the sequence. For example, + a natural language parser might tokenize a string of characters by splitting on whitespace and converting + each group of letters into a WORD token, and a JSON parser might convert a string of numerals and notation + characters into a JNUMBER token.

+

The parser takes in a sequence of tokens and decides whether it can be parsed according to a set grammar. + To do this, the parser defines recursive functions that correspond to each rule in the recursive grammar. + A particular method call to one of these functions can then call whatever other recursive functions are + required to validate that grammar rule. For example, a function that parses math expressions might call an + expression parser on each side of a + token. At each step in the parsing procedure, recursive calls will + consume some number of tokens off of the token list. If a recursive call fails, the original token list + will be returned instead of a list with some tokens consumed, so the parent function call can try another + recursive call or fail.

+

Writing an SL3 RDP: Tokenizer

+

SL3 is defined by the following recursive grammar rule, where Φ is a sentence:

+
+	Φ := A | B | C | ~Φ | (Φ∧Φ) | (Φ∨Φ) | (Φ→Φ) | (Φ↔Φ)
+

The closure clause is implicit. Thus the characters we should expect to see are A, B, C, (, ), ∧, + ∨, →, and ↔. Fortunately for the tokenizer, we don't have any sequence elements that are + more than one character long, so we can simply convert the sequence of characters into tokens. We represent + the tokens as integers under the hood, but in the code for our parser we'll use aliases so it's clear what + kind of token we're talking about.

+
+1	// Declare aliases for token identifiers
+2	var ERROR = 0, ATOM = 1, NEGATE = 2, LPAR = 3, RPAR = 4, ARROW = 5,
+3		BICOND = 6, AND = 7, OR = 8;
+4	
+5	// Converts each symbol to a token and returns a list of tokens
+6	function tokenize(s) {
+7		var tokens = [];
+8		for (var i = 0; i < s.length; i++) {
+9			if (/[ABC]/.test(s[i])) {
+10				tokens.push(ATOM);
+11			} else if (s[i] == ("~")) {
+12				tokens.push(NEGATE);
+13			} else if (s[i] == ("(")) {
+14				tokens.push(LPAR);
+15			} else if (s[i] == (")")) {
+16				tokens.push(RPAR);
+17			} else if (s[i] == ("\u2192")) {
+18				tokens.push(ARROW);
+19			} else if (s[i] == ("\u2194")) {
+20				tokens.push(BICOND);
+21			} else if (s[i] == ("\u2227")) {
+22				tokens.push(AND);
+23			} else if (s[i] == ("\u2228")) {
+24				tokens.push(OR);
+25			} else {
+26				return [ ERROR ];
+27				console.log("Parsing error")
+28			}
+29		}
+30		return tokens;
+31	}
+

Some things to note: We define an ERROR token that gets returned when we encounter a character that + isn't part of the grammar (25-28). If we find an illegal character, we know immediately that the sequence + won't parse. The parser won't try to interpret an ERROR token and fail immediately. Also note that we can + match all the atomic sentences with a single regular expression match (9).

+

Writing an SL3 RDP: Parser

+

For the parser, we can simplify the number of functions required by matching the rules by their structural + similarity:

+
+	Φ := A | B | C
+	Φ := ~Φ
+	Φ := (Φ∧Φ) | (Φ∨Φ) | (Φ→Φ) | (Φ↔Φ)
+

The atomic sentences consist of a single ATOM token, negative sentences of a NEGATE token followed by a + sentence, and the other complicated sentences by two sentences around an operator token, flanked by an LPAR + token and an RPAR token. Using the computer scientist's first resort, anotehr layer of indirection, we can + get away with writing three functions: one to try and parse an atomic sentence from the token list, one to + try to parse a negative sentence, and one to parse a sentence given some binary operator. Let's look at these + in order. Ignore flag for now; we'll make use of that later.

+
+1	function parseAtom(tokens, flag) {
+2		if (tokens[0] != ATOM) {
+3			return [ false, tokens ];
+4		}
+5		return [ true, tokens.slice(1) ];
+6	}
+

If all we need to successfully parse an atomic sentence is a single ATOM token, then it suffices to find + one of them at the head of the token list. If we don't find one, we report that we failed to parse an + atomic sentence and return an untouched token list (2-4). If we do find one, we consume the ATOM token and return + rest of the tokens as a success (5). The .slice() function in Javascript returns a subsequence of a list, + which we use to remove tokens from the head of the list.

+
+1	function parseNegation(tokens, flag) {
+2		if (tokens[0] != NEGATE) {
+3			return [ false, tokens ];
+4		}
+5		var iter = tokens.slice(1);
+6		var parseTry = parseSentence(iter, flag);
+7		if (!parseTry[0]) {
+8			return [ false, tokens ];
+9		}
+10		return parseTry;
+11	}
+

Here we see our first instance of recursion. A negative sentence in toto must consist in a NEGATE + token, followed by a sequence of tokens that make up a valid sentence. Thus, we first check to see if the + first condition is met, returning a failure if it is not (2-4). If it is, then we consume it (5) and pass the + rest of the tokens recursively to another parsing function (6). We'll see the internals of parseSentence() + later. For now, we'll note that it returns the same sort of data structure that the other parsing functions + return, which is a tuple of whether the parsing succeeded and a list of unconsumed tokens. If the parsing was a + failure, we return a failure as in line 3 (7-9). Note that in line 8, we return tokens, not + parseTry[1]. We don't want to consume the NEGATE token if we couldn't parse a negative sentence after + all. If the parsing did succeed, then the return value is a tuple with a parsing success and the tokens + left unconsumed by the sentence, which is just what parseNegation() wants to return.

+
+1	function parseBinaryOperator(tokens, operator, flag) {
+2		if (tokens[0] != LPAR) {
+3			return [ false, tokens ];
+4		}
+5		var iterA = tokens.slice(1);
+6		var parseTryOne = parseSentence(iterA, flag);
+7		if (!parseTryOne[0]) {
+8			return [ false, tokens ];
+9		}
+10		if (parseTryOne[1][0] != operator) {
+11			return [ false, tokens ];
+12		}
+13		var iterB = parseTryOne[1].slice(1);
+14		var parseTryTwo = parseSentence(iterB, flag);
+15		if (!parseTryTwo[0]) {
+16			return [ false, tokens ];
+17		}
+18		if (parseTryTwo[1][0] != RPAR) {
+19			return [ false, tokens ];
+20		}
+21		return [ true, parseTryTwo[1].slice(1) ];
+22	}
+

Finally, we come to the binary operator function. All four of the binary operator sentence types share a common + structure, differing only in which operator token is in the middle, so if we pass that in as the operator + argument to the function, we can represent all four sentences by passing different tokens to the function. In this + function we also see two instances of recursion on lines 6 and 14. The overall effect is to try and consume an + LPAR token (2-5), then however many tokens are required to parse a valid sentence (6-9), then whichever token is + the operator for this function call (10-13), then however many tokens are required to parse another valid sentence + (14-17), then finally an RPAR token (18-21). If at any point the expected tokens aren't found, then a binary + operator sentence can't be parsed, and the token list is returned unchanged as a failure.

+

parseNegation() and parseBinaryOperator() accomplished their recursive descent by calling a + generalized parseSentence() function. What does this function do? parseSentence() is what ties all of + the recursive descent parsing functions together by trying each of them in turn and returning the results of + whichever of them worked:

+
+1	function parseSentence(tokens, flag) {
+2		var tryAtom = parseAtom(tokens, flag);		
+3		if (tryAtom[0])
+4			return tryAtom;
+5		var tryNegation = parseNegation(tokens, flag);
+6		if (tryNegation[0])
+7			return tryNegation;
+8		var tryConditional = parseBinaryOperator(tokens, ARROW, flag);
+9		if (tryConditional[0])
+10			return tryConditional;
+11		var tryBicondition = parseBinaryOperator(tokens, BICOND, flag);
+12		if (tryBicondition[0])
+13			return tryBicondition;
+14		var tryConjunction = parseBinaryOperator(tokens, AND, flag);
+15		if (tryConjunction[0])
+16			return tryConjunction;
+17		var tryDisjunction = parseBinaryOperator(tokens, OR, flag);
+18		if (tryDisjunction[0])
+19			return tryDisjunction;
+20		return [ false, tokens ];
+21		}
+

Note that line 20 perfectly encapsulates what the closure clause does in the definition of SL3: if something + does not follow by one of the given rules i.e. does not parse according to any of the defined parsing functions, + then it is not a sentence, and the parser returns a failure to the calling context.

+

All that's left is to put the two pieces together. The function to parse a given input will look something like + this:

+
+1	function parseSL3(input) {
+2		var tokens = tokenize(input);
+3		var parsed = parseSentence(tokens, false);
+4		var valid = parsed[0] && parsed[1].length == 0;
+5		
+6		// Do something with the result
+7	}
+

On line 2 we pass the input to the tokenizer to convert it from a character sequence to a token sequence. On + line 3 we pass this token sequence to the general parsing function, which will then try all the parsing functions + until it finds one that works, which will itself do some parsing, possibly including more recursive calls to + parsing functions, and so on. After all of that is done, we receive back a tuple of whether it worked and a list + of all the leftover tokens. On line 4 we establish that an input sentence is valid not only if it parsed validly, + but also if there were no tokens left over. This must be checked because otherwise superfluous characters could + be added to the end of a valid sentence.

+

Writing an SL3 RDP: Extended junctions

+

The above code will parse any sentence according to the given rules of SL3 sentences: +

+	Φ := A | B | C
+	Φ := ~Φ
+	Φ := (Φ∧Φ) | (Φ∨Φ) | (Φ→Φ) | (Φ↔Φ)
+

However, we might want to extend these rules so that long junctions don't have to involve nesting each + subsentence within another pair of parentheses:

+
+	Φ := A | B | C
+	Φ := ~Φ
+	Φ := (Φ→Φ) | (Φ↔Φ)
+	Φ := (Φ∧...∧Φ) | (Φ∨...∨Φ)
+

We can accomplish this by writing a different parsing function for junctions that can handle an arbitrary + finite number of conjuncts or disjuncts. This function will begin much like the binary operator parser, before + diverging at the end. The different steps have been slightly separated to make the parallels clearer.

+
+
+1	function parseBinaryOperator(tokens, operator, flag) {
+
+2		if (tokens[0] != LPAR) {
+3			return [ false, tokens ];
+4		}
+5		var iterA = tokens.slice(1);
+
+6		var parseTryOne = parseSentence(iterA, flag);
+7		if (!parseTryOne[0]) {
+8			return [ false, tokens ];
+9		}
+
+10		if (parseTryOne[1][0] != operator) {
+11			return [ false, tokens ];
+12		}
+13		var iterB = parseTryOne[1].slice(1);
+
+14		var parseTryTwo = parseSentence(iterB, flag);
+15		if (!parseTryTwo[0]) {
+16			return [ false, tokens ];
+17		}
+
+
+
+
+18		if (parseTryTwo[1][0] != RPAR) {
+19			return [ false, tokens ];
+20		}
+21		return [ true, parseTryTwo[1].slice(1) ];
+22	}
+
+
+1	function parseNAryOperator(tokens, operator, flag) {
+
+2		if (tokens[0] != LPAR) {
+3			return [ false, tokens ];
+4		}
+5		var iterA = tokens.slice(1);
+
+6		var parseTryOne = parseSentence(iterA, flag);
+7		if (!parseTryOne[0]) {
+8			return [ false, tokens ];
+9		}
+
+10		if (parseTryOne[1][0] != operator) {
+11			return [ false, tokens ];
+12		}
+13		var iterB = parseTryOne[1].slice(1);
+
+14		var parseTryTwo = parseSentence(iterB, flag);
+15		if (!parseTryTwo[0]) {
+16			return [ false, tokens ];
+17		}
+
+18		var tokensRemaining = parseTryTwo[1];
+19		while (tokensRemaining.length > 0) {
+
+20			if (tokensRemaining[0] == RPAR) {
+21				return [ true, tokensRemaining.slice(1) ];
+22			}
+
+
+23			if (tokensRemaining[0] != operator) {
+24				return [ false, tokens ];
+25			}
+26			tokensRemaining = tokensRemaining.slice(1);
+
+27			var parseTryLoop = parseSentence(tokensRemaining, flag);
+28			if (!parseTryLoop[0]) {
+29				return [ false, tokens ];
+30			}
+31			tokensRemaining = parseTryLoop[1];
+
+32		}
+33		return [ false, tokens ];
+34	}
+
+
+

Both functions begin by attempting to parse an LPAR, a sentence, an operator, and a second sentence. However, + where the binary parser need only look for an RPAR token, the extended parser needs a loop to check for an + arbitrary number of addtinal conjuncts or disjuncts. The check for an RPAR is done first, so that it can still + validate 2-sentence junctions (20-22). If there isn't one, then the junction must still be continuing, which + means that the sentence must be followed by the same operator (23-26) and another sentence (27-31). If at any + point either a sentence is not followed by the operator or the operator is not followed by a sentence, the + parser will fail (24,29). After consuming the sentence, the function loops back to line 19 to check for an + RPAR token again. If the loop ever runs out of tokens, then it must not have found an RPAR that closed the + current junction, in which case the sentence is invalid (33).

+

With this alternative parsing function, we can finally make use of flag by letting the value of + flag determine whether we use extended junction rules. To do this, we rewrite

+
+14	var tryConjunction = parseBinaryOperator(tokens, AND, flag);
+15	if (tryConjunction[0])
+16		return tryConjunction;
+17	var tryDisjunction = parseBinaryOperator(tokens, OR, flag);
+18	if (tryDisjunction[0])
+19		return tryDisjunction;
+

to use the extended functions when flagged, as so:

+
+14	var tryConjunction = flag ? parseNAryOperator(tokens, AND, flag) : parseBinaryOperator(tokens, AND, flag);
+15	if (tryConjunction[0])
+16		return tryConjunction;
+17	var tryDisjunction = flag ? parseNAryOperator(tokens, OR, flag) : parseBinaryOperator(tokens, OR, flag);
+18	if (tryDisjunction[0])
+19		return tryDisjunction;
+

The ?: expression above is called a ternary operator, and is just shorthand for an if/else statement. Thus, + if flag is true, then conjunctions and disjunctions will be parsed using the extended junction rules; and + if false, using the binary rules. Then we can call parseSentence() on the same list of tokens with + flag set to true or false in order to see which rules a sentence is valid under.

+

The real implementation under the hood has to deal with updating the UI at the top of the page, so the highest + parsing function concerns itself with some other details. To see what the full implementation does, right click + on this page and select "View page source" (or press Ctrl+U or Cmd+U) to see the code for the embedded + Javascript. The functions have comments in them that outline what is going on at each point in the function.

+
+
+
+ Tim Van Baak
+ Rice University '18
+ PHIL 357 - Incompleteness, Undecidability, and Computability
+ 31 January 2017
+ + \ No newline at end of file