|
| 1 | +/* |
| 2 | + * BF to C or PASM translater |
| 3 | + * Copyright 2025 Total Spectrum Software and Collabora, Ltd. |
| 4 | + * |
| 5 | + * SPDX-License-Identifier: MIT |
| 6 | + */ |
| 7 | + |
| 8 | +/* |
| 9 | + * A parser for a really simple language to the flexspin internal API |
| 10 | + * Intended more as an example for implementors than as something |
| 11 | + * to actually use in the real world |
| 12 | + * |
| 13 | + * The language implemented here is an esoteric language called |
| 14 | + * BrainF*ck or BF (or various other things) with only 8 commands, |
| 15 | + * each consisting of only a single character: |
| 16 | + * > Increment data pointer by 1 |
| 17 | + * < Decrement data pointer by 1 |
| 18 | + * + Increment the byte at the data pointer by 1 |
| 19 | + * - Decrement the byte at the data pointer by 1 |
| 20 | + * . Output the byte at the data pointer |
| 21 | + * , Read a byte and store it at the data pointer |
| 22 | + * [ If the byte at the data pointer is 0, jump to the command |
| 23 | + * after the next matching ] |
| 24 | + * ] If the byte at the data pointer is non-zero, jump back to |
| 25 | + * the previous matching [ |
| 26 | + * |
| 27 | + * All other characters are ignored and considered to be comments. |
| 28 | + * |
| 29 | + * I/O is performed using the standard Spin SEND and RECV pointers, so |
| 30 | + * a BF program may be included as an object in a Spin program. If |
| 31 | + * SEND and/or RECV are not initialized, they are set to an appropriate serial |
| 32 | + * function. |
| 33 | + */ |
| 34 | + |
| 35 | +#include <stdlib.h> |
| 36 | +#include <string.h> |
| 37 | +#include "spinc.h" |
| 38 | + |
| 39 | +static AST *array_name; /* the name of the BF main BF data array */ |
| 40 | +static AST *array_index; /* the name of the var to index that array */ |
| 41 | +static AST *array_mask; /* a mask so we wrap around at end of array */ |
| 42 | +static unsigned array_size; /* size of the array (based on processor) */ |
| 43 | + |
| 44 | +static AST *cur_pos_deref; /* a dereference to fetch/store the byte at the current position */ |
| 45 | + |
| 46 | +/* |
| 47 | + * fetch a single BF character |
| 48 | + * ignore any non-useful (comment) characters |
| 49 | + * returns -1 on EOF |
| 50 | + */ |
| 51 | +static int nextbfchar(LexStream *L) { |
| 52 | + int c; |
| 53 | + |
| 54 | + do { |
| 55 | + c = lexgetc(L); |
| 56 | + switch (c) { |
| 57 | + case '[': |
| 58 | + case ']': |
| 59 | + case '+': |
| 60 | + case '-': |
| 61 | + case '<': |
| 62 | + case '>': |
| 63 | + case '.': |
| 64 | + case ',': |
| 65 | + /* a valid BF character, return to parser */ |
| 66 | + return c; |
| 67 | + default: |
| 68 | + break; |
| 69 | + } |
| 70 | + } while (c > 0); |
| 71 | + return c; |
| 72 | +} |
| 73 | + |
| 74 | +/* |
| 75 | + * add an expression wrapped in a statement list to |
| 76 | + * a list of AST's |
| 77 | + * returns the new value for the list |
| 78 | + */ |
| 79 | +static AST * |
| 80 | +append_expr_statement(AST *body, AST *expr) |
| 81 | +{ |
| 82 | + AST *stmt = NewAST(AST_STMTLIST, expr, NULL); |
| 83 | + body = AddToList(body, stmt); |
| 84 | + return body; |
| 85 | +} |
| 86 | + |
| 87 | +/* |
| 88 | + * Utility routine: read a sequence of characters ch, |
| 89 | + * count them, and then increment or decrement the |
| 90 | + * given variable reference by that many. |
| 91 | + * Optionally (if mask is non-NULL) applies the given |
| 92 | + * mask before storing the result back. |
| 93 | + */ |
| 94 | +static AST * |
| 95 | +incDecVar(LexStream *L, int ch, int incDecOp, AST *varref, AST *mask) |
| 96 | +{ |
| 97 | + int n = 0; |
| 98 | + int c = ch; |
| 99 | + AST *ast; |
| 100 | + // count how many ch's in a row we see |
| 101 | + // strictly speaking we don't have to merge them like this, |
| 102 | + // but it is a tremendous optimization |
| 103 | + while (c == ch) { |
| 104 | + n++; |
| 105 | + c = nextbfchar(L); |
| 106 | + } |
| 107 | + // the last character we saw was not '+', so |
| 108 | + // save it back for the next parse |
| 109 | + lexungetc(L, c); |
| 110 | + |
| 111 | + ast = AstOperator(incDecOp, varref, AstInteger(n)); |
| 112 | + if (mask) { |
| 113 | + ast = AstOperator('&', ast, mask); |
| 114 | + } |
| 115 | + // create the statement to update the variable |
| 116 | + ast = AstAssign(varref, ast); |
| 117 | + return ast; |
| 118 | +} |
| 119 | + |
| 120 | +/* |
| 121 | + * return a dereference of the current position |
| 122 | + */ |
| 123 | +static AST *CurPos(void) { |
| 124 | + return DupAST(cur_pos_deref); |
| 125 | +} |
| 126 | + |
| 127 | +/* |
| 128 | + * parse a BF expression; this can be one of: |
| 129 | + * .: output current character |
| 130 | + * ,: read a character and write to current location |
| 131 | + * a sequence of N +'s: add N to current location |
| 132 | + * a sequence of N -'s: subtract N from current location |
| 133 | + * a sequence of N >'s: add N to current location pointer |
| 134 | + * a sequence of N <'s: subtract N from current location pointer |
| 135 | + * [: form a loop up until the next ] |
| 136 | + * ]: terminate a loop |
| 137 | + * |
| 138 | + * Returns an AST for the expression, or NULL |
| 139 | + * on EOF |
| 140 | + */ |
| 141 | +static AST * |
| 142 | +parseBFstream(LexStream *L) |
| 143 | +{ |
| 144 | + int c; |
| 145 | + AST *loopbody = NULL; |
| 146 | + AST *ast; |
| 147 | + |
| 148 | + for(;;) { |
| 149 | + c = nextbfchar(L); |
| 150 | + if (c == '+') { |
| 151 | + ast = incDecVar(L, c, '+', CurPos(), NULL); |
| 152 | + } else if (c == '-') { |
| 153 | + ast = incDecVar(L, c, '-', CurPos(), NULL); |
| 154 | + } else if (c == '>') { |
| 155 | + ast = incDecVar(L, c, '+', array_index, array_mask); |
| 156 | + } else if (c == '<') { |
| 157 | + ast = incDecVar(L, c, '-', array_index, array_mask); |
| 158 | + } else if (c == '.') { |
| 159 | + // print the byte at the current position |
| 160 | + AST *sendptr = AstIdentifier("__sendptr"); |
| 161 | + ast = NewAST(AST_FUNCCALL, |
| 162 | + sendptr, |
| 163 | + NewAST(AST_EXPRLIST, CurPos(), NULL)); |
| 164 | + } else if (c == ',') { |
| 165 | + // read something into the current position |
| 166 | + AST *recvptr = AstIdentifier("__recvptr"); |
| 167 | + ast = AstAssign(CurPos(), |
| 168 | + NewAST(AST_FUNCCALL, |
| 169 | + recvptr, |
| 170 | + NULL)); |
| 171 | + } else if (c == '[') { |
| 172 | + // open a loop |
| 173 | + // this should emit |
| 174 | + // if (*cur_pos_deref != 0) { |
| 175 | + // do { |
| 176 | + // <loopbody> |
| 177 | + // } while (*cur_pos_deref != 0); |
| 178 | + ast = parseBFstream(L); |
| 179 | + // create the do-while loop |
| 180 | + ast = NewAST(AST_DOWHILE, |
| 181 | + AstOperator(K_NE, CurPos(), AstInteger(0)), |
| 182 | + ast); |
| 183 | + // wrap it in the if |
| 184 | + ast = NewAST(AST_IF, |
| 185 | + AstOperator(K_NE, CurPos(), AstInteger(0)), |
| 186 | + NewAST(AST_THENELSE, |
| 187 | + NewAST(AST_STMTLIST, ast, NULL), |
| 188 | + NULL)); |
| 189 | + } else if (c == ']') { |
| 190 | + ast = NULL; |
| 191 | + break; |
| 192 | + } else { |
| 193 | + if (c != -1) { |
| 194 | + ERROR(NULL, "unexpected character encountered"); |
| 195 | + } |
| 196 | + break; |
| 197 | + } |
| 198 | + loopbody = AddToList(loopbody, |
| 199 | + NewAST(AST_STMTLIST, ast, NULL)); |
| 200 | + } |
| 201 | + return loopbody; |
| 202 | +} |
| 203 | + |
| 204 | +/* |
| 205 | + * initialize a BF parse |
| 206 | + * there will be only one function (the |
| 207 | + * main body) which will go in current->body |
| 208 | + * we need to create an array (8K for P1, 32K for P2) |
| 209 | + * Note that the parser is not re-entrant (none of |
| 210 | + * flexspin's parsers are) so we're using static variables |
| 211 | + * for everything |
| 212 | + */ |
| 213 | + |
| 214 | +static void |
| 215 | +init_bf_parse(LexStream *L) |
| 216 | +{ |
| 217 | + AST *ast; |
| 218 | + |
| 219 | + // array size is 8K for P1 (too small, but the P1 only has 32K total) |
| 220 | + // and 32K for P2 |
| 221 | + // must be a power of 2 so array_mask can work |
| 222 | + array_size = gl_p2 ? 32768 : 8192; |
| 223 | + |
| 224 | + // create an array mask |
| 225 | + array_mask = AstInteger(array_size - 1); |
| 226 | + |
| 227 | + // create the member variable to hold the BF data (an array of bytes) |
| 228 | + // this declaration is like |
| 229 | + // long bf_array[array_size] |
| 230 | + // in C |
| 231 | + array_name = AstIdentifier("bf_array"); |
| 232 | + AST *array_decl = NewAST(AST_ARRAYDECL, |
| 233 | + array_name, AstInteger(array_size)); |
| 234 | + // the base index of the array is put in d.ptr (it will default to 0, |
| 235 | + // but it doesn't hurt to make it explicit) |
| 236 | + array_decl->d.ptr = AstInteger(0); |
| 237 | + |
| 238 | + // we put the array in the module's DAT section |
| 239 | + MaybeDeclareMemberVar(current, array_decl, ast_type_byte, 0, NORMAL_VAR); |
| 240 | + |
| 241 | + // create a local variable declaration for the array_index |
| 242 | + array_index = AstIdentifier("bf_pos"); |
| 243 | + // declare it and initialize it to 0 |
| 244 | + AST *initZero = AstAssign(array_index, AstInteger(0)); |
| 245 | + |
| 246 | + // the variable declaration AST expects a list of variables to |
| 247 | + // initialize, hence the AST_LISTHOLDER |
| 248 | + AST *decl = NewAST(AST_DECLARE_VAR, |
| 249 | + ast_type_long, |
| 250 | + NewAST(AST_LISTHOLDER, initZero, NULL)); |
| 251 | + |
| 252 | + // add the variable declaration to the main program body |
| 253 | + current->body = append_expr_statement(current->body, decl); |
| 254 | + |
| 255 | + // create a dereference bf_array[bf_pos] |
| 256 | + cur_pos_deref = NewAST(AST_ARRAYREF, |
| 257 | + array_name, array_index); |
| 258 | + |
| 259 | + // This part below is somewhat technical; we want to do I/O by using |
| 260 | + // the standard Spin2 send() and recv() functions. But those may not |
| 261 | + // be initialized (if we're not using the BF program as an object). |
| 262 | + // So add some code to initialize them to the default _tx and _rx |
| 263 | + // functions |
| 264 | + AST *sendfunc = AstIdentifier("__sendptr"); |
| 265 | + AST *recvfunc = AstIdentifier("__recvptr"); |
| 266 | + |
| 267 | + // check for sendfunc initialized |
| 268 | + ast = AstOperator(K_EQ, sendfunc, AstInteger(0)); |
| 269 | + ast = NewAST(AST_IF, ast, |
| 270 | + NewAST(AST_THENELSE, |
| 271 | + NewAST(AST_STMTLIST, |
| 272 | + AstAssign(sendfunc, |
| 273 | + NewAST(AST_ADDROF, |
| 274 | + AstIdentifier("_tx"), |
| 275 | + NULL)), |
| 276 | + NULL), |
| 277 | + NULL)); |
| 278 | + current->body = append_expr_statement(current->body, ast); |
| 279 | + |
| 280 | + // check for recvfunc initialized |
| 281 | + ast = AstOperator(K_EQ, recvfunc, AstInteger(0)); |
| 282 | + ast = NewAST(AST_IF, ast, |
| 283 | + NewAST(AST_THENELSE, |
| 284 | + NewAST(AST_STMTLIST, |
| 285 | + AstAssign(recvfunc, |
| 286 | + NewAST(AST_ADDROF, |
| 287 | + AstIdentifier("_rx"), |
| 288 | + NULL)), NULL), |
| 289 | + NULL)); |
| 290 | + current->body = append_expr_statement(current->body, ast); |
| 291 | + |
| 292 | +} |
| 293 | + |
| 294 | +/* |
| 295 | + * This uses the same prototype as a YACC generated parser, so it takes no |
| 296 | + * parameters and uses the lexer stream in the current module (a global |
| 297 | + * variable. This is ugly, but compatible. |
| 298 | + * returns 0 on success, -1 on failure |
| 299 | + */ |
| 300 | +int bfparse(void) |
| 301 | +{ |
| 302 | + LexStream *L = current->Lptr; |
| 303 | + AST *ast; |
| 304 | + int c; |
| 305 | + |
| 306 | + /* initialize the parser state */ |
| 307 | + init_bf_parse(L); |
| 308 | + |
| 309 | + ast = parseBFstream(L); |
| 310 | + current->body = AddToList(current->body, ast); |
| 311 | + |
| 312 | + // we should be at EOF here |
| 313 | + c = lexpeekc(L); |
| 314 | + if (c > 0) { |
| 315 | + ERROR(NULL, "[] mismatch detected"); |
| 316 | + return -1; |
| 317 | + } |
| 318 | + return 0; |
| 319 | +} |
| 320 | + |
| 321 | +// |
| 322 | +// High level transformation on the intermediate AST |
| 323 | +// For BF, this doesn't really have to do anything, but |
| 324 | +// we may as well run the standard high level optimizations |
| 325 | +// and such |
| 326 | +// |
| 327 | +void |
| 328 | +BFTransform(Function *func) |
| 329 | +{ |
| 330 | + InitGlobalFuncs(); |
| 331 | + |
| 332 | + // simplify assignments |
| 333 | + DoHLTransforms(func); |
| 334 | + |
| 335 | + // Do type checking here. For BF this is |
| 336 | + // only needed because of the setup of send := @_tx |
| 337 | + // at initialization time |
| 338 | + CheckTypes(func->body); |
| 339 | +} |
0 commit comments