diff --git a/build.py b/build.py index 9dea397..4d31193 100755 --- a/build.py +++ b/build.py @@ -44,23 +44,19 @@ def main(): os.makedirs(dest.parent, exist_ok=True) # Copy any file types without content processing - if filename.rsplit(".")[-1] not in ("html", "md"): + if filename.rsplit(".")[-1] not in ("md",): print("Copying ", path) shutil.copyfile(path, dest) continue - content = path.read_text(encoding="utf8") - meta = {} - # Preprocess markdown into html if dest.name.endswith(".md"): print("Converting", path) md.reset() dest = dest.with_suffix(".html") - content = md.convert(content) - meta = md.Meta + content = md.convert(path.read_text(encoding="utf8")) + meta = md.Meta or {} - if dest.name.endswith("html"): # Inject content into the template page_content = bs4.BeautifulSoup(content, features="html.parser") page = copy.copy(template) @@ -104,12 +100,9 @@ def main(): p.append(a) page.header.append(p) - # The fully templated page is the new content - content = str(page) - - # Write the destination file - print("Writing ", dest) - dest.write_text(content) + # Write the fully templated page + print("Writing ", dest) + dest.write_text(str(page)) print("Processed", count, "files") diff --git a/src/project/index.md b/src/project/index.md new file mode 100644 index 0000000..16740ba --- /dev/null +++ b/src/project/index.md @@ -0,0 +1,23 @@ +--- +title: Projects +--- + +## Ludum Dare entries + +Ludum Dare is a long-running game jam event where participants create a game from scratch over a weekend. + +* [Roundman: Escape from Squareworld](https://web.archive.org/web/20120414152156/http://ludumdare.com/compo/ludum-dare-21/?action=preview&uid=4882), Ludum Dare 21. An arcade-style platformer in C#/XNA. + +* [Happy Land](https://web.archive.org/web/20120105193359/http://ludumdare.com/compo/ludum-dare-22/?action=preview&uid=4884), Ludum Dare 22. A top-down survival shooter in C#/XNA. + +* [Primordial Smackdown: Bacteria with Badditude](https://web.archive.org/web/20120505114828/http://ludumdare.com/compo/ludum-dare-23/?action=preview&uid=4884), Ludum Dare 23. An arcade-style game about bacteria in C#/XNA. + +* [Apes vs Dinosaurs: Unnatural Selection](https://web.archive.org/web/20140813200902/http://ludumdare.com/compo/ludum-dare-24/?action=preview&uid=4884), Ludum Dare 24. A card game/tower defense hybrid in C#/XNA. + +* [RGB Shift](https://web.archive.org/web/20171014143944/http://ludumdare.com/compo/minild-56/?action=preview&uid=4884), MiniLD 56. A sidescrolling platformer with a unique color mechanic in Java/LWJGL. RGB Shift was also developed for HackRice 2015, where it won Best Visualization. + +* [Voyage of the Indefatigable](https://ldjam.com/events/ludum-dare/39/voyage-of-the-indefatigable), Ludum Dare 39. A resource management game in HTML/JS. + +## Miscellaneous + +* [A recursive descent SL3 parser](./sl3.html) written for a mathematical logic class. \ No newline at end of file diff --git a/src/project/sl3.html b/src/project/sl3.html new file mode 100644 index 0000000..312f11a --- /dev/null +++ b/src/project/sl3.html @@ -0,0 +1,554 @@ + +
++ + +
++ + + + +
+Input sentence validity in SL3 without extended junctions:
+Input sentence validity in SL3 with extended junctions:
+Languages that are defined by recursive grammars can be parsed by means of a recursive descent parser. + A recursive descent parser will consist of two main pieces: the tokenizer and the parser. The tokenizer + converts the input, which is a string of characters where one element may consist of multiple characters, + into a sequence of tokens, which abstractly represent the structural elements of the sequence. For example, + a natural language parser might tokenize a string of characters by splitting on whitespace and converting + each group of letters into a WORD token, and a JSON parser might convert a string of numerals and notation + characters into a JNUMBER token.
+The parser takes in a sequence of tokens and decides whether it can be parsed according to a set grammar. + To do this, the parser defines recursive functions that correspond to each rule in the recursive grammar. + A particular method call to one of these functions can then call whatever other recursive functions are + required to validate that grammar rule. For example, a function that parses math expressions might call an + expression parser on each side of a + token. At each step in the parsing procedure, recursive calls will + consume some number of tokens off of the token list. If a recursive call fails, the original token list + will be returned instead of a list with some tokens consumed, so the parent function call can try another + recursive call or fail.
+SL3 is defined by the following recursive grammar rule, where Φ is a sentence:
++ Φ := A | B | C | ~Φ | (Φ∧Φ) | (Φ∨Φ) | (Φ→Φ) | (Φ↔Φ)+
The closure clause is implicit. Thus the characters we should expect to see are A, B, C, (, ), ∧, + ∨, →, and ↔. Fortunately for the tokenizer, we don't have any sequence elements that are + more than one character long, so we can simply convert the sequence of characters into tokens. We represent + the tokens as integers under the hood, but in the code for our parser we'll use aliases so it's clear what + kind of token we're talking about.
++1 // Declare aliases for token identifiers +2 var ERROR = 0, ATOM = 1, NEGATE = 2, LPAR = 3, RPAR = 4, ARROW = 5, +3 BICOND = 6, AND = 7, OR = 8; +4 +5 // Converts each symbol to a token and returns a list of tokens +6 function tokenize(s) { +7 var tokens = []; +8 for (var i = 0; i < s.length; i++) { +9 if (/[ABC]/.test(s[i])) { +10 tokens.push(ATOM); +11 } else if (s[i] == ("~")) { +12 tokens.push(NEGATE); +13 } else if (s[i] == ("(")) { +14 tokens.push(LPAR); +15 } else if (s[i] == (")")) { +16 tokens.push(RPAR); +17 } else if (s[i] == ("\u2192")) { +18 tokens.push(ARROW); +19 } else if (s[i] == ("\u2194")) { +20 tokens.push(BICOND); +21 } else if (s[i] == ("\u2227")) { +22 tokens.push(AND); +23 } else if (s[i] == ("\u2228")) { +24 tokens.push(OR); +25 } else { +26 return [ ERROR ]; +27 console.log("Parsing error") +28 } +29 } +30 return tokens; +31 }+
Some things to note: We define an ERROR token that gets returned when we encounter a character that + isn't part of the grammar (25-28). If we find an illegal character, we know immediately that the sequence + won't parse. The parser won't try to interpret an ERROR token and fail immediately. Also note that we can + match all the atomic sentences with a single regular expression match (9).
+For the parser, we can simplify the number of functions required by matching the rules by their structural + similarity:
++ Φ := A | B | C + Φ := ~Φ + Φ := (Φ∧Φ) | (Φ∨Φ) | (Φ→Φ) | (Φ↔Φ)+
The atomic sentences consist of a single ATOM token, negative sentences of a NEGATE token followed by a + sentence, and the other complicated sentences by two sentences around an operator token, flanked by an LPAR + token and an RPAR token. Using the computer scientist's first resort, anotehr layer of indirection, we can + get away with writing three functions: one to try and parse an atomic sentence from the token list, one to + try to parse a negative sentence, and one to parse a sentence given some binary operator. Let's look at these + in order. Ignore flag for now; we'll make use of that later.
++1 function parseAtom(tokens, flag) { +2 if (tokens[0] != ATOM) { +3 return [ false, tokens ]; +4 } +5 return [ true, tokens.slice(1) ]; +6 }+
If all we need to successfully parse an atomic sentence is a single ATOM token, then it suffices to find + one of them at the head of the token list. If we don't find one, we report that we failed to parse an + atomic sentence and return an untouched token list (2-4). If we do find one, we consume the ATOM token and return + rest of the tokens as a success (5). The .slice() function in Javascript returns a subsequence of a list, + which we use to remove tokens from the head of the list.
++1 function parseNegation(tokens, flag) { +2 if (tokens[0] != NEGATE) { +3 return [ false, tokens ]; +4 } +5 var iter = tokens.slice(1); +6 var parseTry = parseSentence(iter, flag); +7 if (!parseTry[0]) { +8 return [ false, tokens ]; +9 } +10 return parseTry; +11 }+
Here we see our first instance of recursion. A negative sentence in toto must consist in a NEGATE + token, followed by a sequence of tokens that make up a valid sentence. Thus, we first check to see if the + first condition is met, returning a failure if it is not (2-4). If it is, then we consume it (5) and pass the + rest of the tokens recursively to another parsing function (6). We'll see the internals of parseSentence() + later. For now, we'll note that it returns the same sort of data structure that the other parsing functions + return, which is a tuple of whether the parsing succeeded and a list of unconsumed tokens. If the parsing was a + failure, we return a failure as in line 3 (7-9). Note that in line 8, we return tokens, not + parseTry[1]. We don't want to consume the NEGATE token if we couldn't parse a negative sentence after + all. If the parsing did succeed, then the return value is a tuple with a parsing success and the tokens + left unconsumed by the sentence, which is just what parseNegation() wants to return.
++1 function parseBinaryOperator(tokens, operator, flag) { +2 if (tokens[0] != LPAR) { +3 return [ false, tokens ]; +4 } +5 var iterA = tokens.slice(1); +6 var parseTryOne = parseSentence(iterA, flag); +7 if (!parseTryOne[0]) { +8 return [ false, tokens ]; +9 } +10 if (parseTryOne[1][0] != operator) { +11 return [ false, tokens ]; +12 } +13 var iterB = parseTryOne[1].slice(1); +14 var parseTryTwo = parseSentence(iterB, flag); +15 if (!parseTryTwo[0]) { +16 return [ false, tokens ]; +17 } +18 if (parseTryTwo[1][0] != RPAR) { +19 return [ false, tokens ]; +20 } +21 return [ true, parseTryTwo[1].slice(1) ]; +22 }+
Finally, we come to the binary operator function. All four of the binary operator sentence types share a common + structure, differing only in which operator token is in the middle, so if we pass that in as the operator + argument to the function, we can represent all four sentences by passing different tokens to the function. In this + function we also see two instances of recursion on lines 6 and 14. The overall effect is to try and consume an + LPAR token (2-5), then however many tokens are required to parse a valid sentence (6-9), then whichever token is + the operator for this function call (10-13), then however many tokens are required to parse another valid sentence + (14-17), then finally an RPAR token (18-21). If at any point the expected tokens aren't found, then a binary + operator sentence can't be parsed, and the token list is returned unchanged as a failure.
+parseNegation() and parseBinaryOperator() accomplished their recursive descent by calling a + generalized parseSentence() function. What does this function do? parseSentence() is what ties all of + the recursive descent parsing functions together by trying each of them in turn and returning the results of + whichever of them worked:
++1 function parseSentence(tokens, flag) { +2 var tryAtom = parseAtom(tokens, flag); +3 if (tryAtom[0]) +4 return tryAtom; +5 var tryNegation = parseNegation(tokens, flag); +6 if (tryNegation[0]) +7 return tryNegation; +8 var tryConditional = parseBinaryOperator(tokens, ARROW, flag); +9 if (tryConditional[0]) +10 return tryConditional; +11 var tryBicondition = parseBinaryOperator(tokens, BICOND, flag); +12 if (tryBicondition[0]) +13 return tryBicondition; +14 var tryConjunction = parseBinaryOperator(tokens, AND, flag); +15 if (tryConjunction[0]) +16 return tryConjunction; +17 var tryDisjunction = parseBinaryOperator(tokens, OR, flag); +18 if (tryDisjunction[0]) +19 return tryDisjunction; +20 return [ false, tokens ]; +21 }+
Note that line 20 perfectly encapsulates what the closure clause does in the definition of SL3: if something + does not follow by one of the given rules i.e. does not parse according to any of the defined parsing functions, + then it is not a sentence, and the parser returns a failure to the calling context.
+All that's left is to put the two pieces together. The function to parse a given input will look something like + this:
++1 function parseSL3(input) { +2 var tokens = tokenize(input); +3 var parsed = parseSentence(tokens, false); +4 var valid = parsed[0] && parsed[1].length == 0; +5 +6 // Do something with the result +7 }+
On line 2 we pass the input to the tokenizer to convert it from a character sequence to a token sequence. On + line 3 we pass this token sequence to the general parsing function, which will then try all the parsing functions + until it finds one that works, which will itself do some parsing, possibly including more recursive calls to + parsing functions, and so on. After all of that is done, we receive back a tuple of whether it worked and a list + of all the leftover tokens. On line 4 we establish that an input sentence is valid not only if it parsed validly, + but also if there were no tokens left over. This must be checked because otherwise superfluous characters could + be added to the end of a valid sentence.
+The above code will parse any sentence according to the given rules of SL3 sentences: +
+ Φ := A | B | C + Φ := ~Φ + Φ := (Φ∧Φ) | (Φ∨Φ) | (Φ→Φ) | (Φ↔Φ)+
However, we might want to extend these rules so that long junctions don't have to involve nesting each + subsentence within another pair of parentheses:
++ Φ := A | B | C + Φ := ~Φ + Φ := (Φ→Φ) | (Φ↔Φ) + Φ := (Φ∧...∧Φ) | (Φ∨...∨Φ)+
We can accomplish this by writing a different parsing function for junctions that can handle an arbitrary + finite number of conjuncts or disjuncts. This function will begin much like the binary operator parser, before + diverging at the end. The different steps have been slightly separated to make the parallels clearer.
++1 function parseBinaryOperator(tokens, operator, flag) { + +2 if (tokens[0] != LPAR) { +3 return [ false, tokens ]; +4 } +5 var iterA = tokens.slice(1); + +6 var parseTryOne = parseSentence(iterA, flag); +7 if (!parseTryOne[0]) { +8 return [ false, tokens ]; +9 } + +10 if (parseTryOne[1][0] != operator) { +11 return [ false, tokens ]; +12 } +13 var iterB = parseTryOne[1].slice(1); + +14 var parseTryTwo = parseSentence(iterB, flag); +15 if (!parseTryTwo[0]) { +16 return [ false, tokens ]; +17 } + + + + +18 if (parseTryTwo[1][0] != RPAR) { +19 return [ false, tokens ]; +20 } +21 return [ true, parseTryTwo[1].slice(1) ]; +22 }+
+1 function parseNAryOperator(tokens, operator, flag) { + +2 if (tokens[0] != LPAR) { +3 return [ false, tokens ]; +4 } +5 var iterA = tokens.slice(1); + +6 var parseTryOne = parseSentence(iterA, flag); +7 if (!parseTryOne[0]) { +8 return [ false, tokens ]; +9 } + +10 if (parseTryOne[1][0] != operator) { +11 return [ false, tokens ]; +12 } +13 var iterB = parseTryOne[1].slice(1); + +14 var parseTryTwo = parseSentence(iterB, flag); +15 if (!parseTryTwo[0]) { +16 return [ false, tokens ]; +17 } + +18 var tokensRemaining = parseTryTwo[1]; +19 while (tokensRemaining.length > 0) { + +20 if (tokensRemaining[0] == RPAR) { +21 return [ true, tokensRemaining.slice(1) ]; +22 } + + +23 if (tokensRemaining[0] != operator) { +24 return [ false, tokens ]; +25 } +26 tokensRemaining = tokensRemaining.slice(1); + +27 var parseTryLoop = parseSentence(tokensRemaining, flag); +28 if (!parseTryLoop[0]) { +29 return [ false, tokens ]; +30 } +31 tokensRemaining = parseTryLoop[1]; + +32 } +33 return [ false, tokens ]; +34 }+
Both functions begin by attempting to parse an LPAR, a sentence, an operator, and a second sentence. However, + where the binary parser need only look for an RPAR token, the extended parser needs a loop to check for an + arbitrary number of addtinal conjuncts or disjuncts. The check for an RPAR is done first, so that it can still + validate 2-sentence junctions (20-22). If there isn't one, then the junction must still be continuing, which + means that the sentence must be followed by the same operator (23-26) and another sentence (27-31). If at any + point either a sentence is not followed by the operator or the operator is not followed by a sentence, the + parser will fail (24,29). After consuming the sentence, the function loops back to line 19 to check for an + RPAR token again. If the loop ever runs out of tokens, then it must not have found an RPAR that closed the + current junction, in which case the sentence is invalid (33).
+With this alternative parsing function, we can finally make use of flag by letting the value of + flag determine whether we use extended junction rules. To do this, we rewrite
++14 var tryConjunction = parseBinaryOperator(tokens, AND, flag); +15 if (tryConjunction[0]) +16 return tryConjunction; +17 var tryDisjunction = parseBinaryOperator(tokens, OR, flag); +18 if (tryDisjunction[0]) +19 return tryDisjunction;+
to use the extended functions when flagged, as so:
++14 var tryConjunction = flag ? parseNAryOperator(tokens, AND, flag) : parseBinaryOperator(tokens, AND, flag); +15 if (tryConjunction[0]) +16 return tryConjunction; +17 var tryDisjunction = flag ? parseNAryOperator(tokens, OR, flag) : parseBinaryOperator(tokens, OR, flag); +18 if (tryDisjunction[0]) +19 return tryDisjunction;+
The ?: expression above is called a ternary operator, and is just shorthand for an if/else statement. Thus, + if flag is true, then conjunctions and disjunctions will be parsed using the extended junction rules; and + if false, using the binary rules. Then we can call parseSentence() on the same list of tokens with + flag set to true or false in order to see which rules a sentence is valid under.
+The real implementation under the hood has to deal with updating the UI at the top of the page, so the highest + parsing function concerns itself with some other details. To see what the full implementation does, right click + on this page and select "View page source" (or press Ctrl+U or Cmd+U) to see the code for the embedded + Javascript. The functions have comments in them that outline what is going on at each point in the function.
+