|
| 1 | +/** |
| 2 | + * @file prism.h |
| 3 | + * |
| 4 | + * The main header file for the prism parser. |
| 5 | + */ |
1 | 6 | #ifndef PRISM_H
|
2 | 7 | #define PRISM_H
|
3 | 8 |
|
4 | 9 | #include "prism/defines.h"
|
5 | 10 | #include "prism/util/pm_buffer.h"
|
6 | 11 | #include "prism/util/pm_char.h"
|
7 | 12 | #include "prism/util/pm_memchr.h"
|
| 13 | +#include "prism/util/pm_strncasecmp.h" |
8 | 14 | #include "prism/util/pm_strpbrk.h"
|
9 | 15 | #include "prism/ast.h"
|
10 | 16 | #include "prism/diagnostic.h"
|
11 | 17 | #include "prism/node.h"
|
| 18 | +#include "prism/options.h" |
12 | 19 | #include "prism/pack.h"
|
13 | 20 | #include "prism/parser.h"
|
14 | 21 | #include "prism/prettyprint.h"
|
|
28 | 35 | #include <strings.h>
|
29 | 36 | #endif
|
30 | 37 |
|
31 |
| -void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer); |
32 |
| - |
33 |
| -void pm_parser_metadata(pm_parser_t *parser, const char *metadata); |
34 |
| - |
35 |
| -// The prism version and the serialization format. |
| 38 | +/** |
| 39 | + * The prism version and the serialization format. |
| 40 | + * |
| 41 | + * @returns The prism version as a constant string. |
| 42 | + */ |
36 | 43 | PRISM_EXPORTED_FUNCTION const char * pm_version(void);
|
37 | 44 |
|
38 |
| -// Initialize a parser with the given start and end pointers. |
39 |
| -PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const char *filepath); |
| 45 | +/** |
| 46 | + * Initialize a parser with the given start and end pointers. |
| 47 | + * |
| 48 | + * @param parser The parser to initialize. |
| 49 | + * @param source The source to parse. |
| 50 | + * @param size The size of the source. |
| 51 | + * @param options The optional options to use when parsing. |
| 52 | + */ |
| 53 | +PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options); |
40 | 54 |
|
41 |
| -// Register a callback that will be called whenever prism changes the encoding it |
42 |
| -// is using to parse based on the magic comment. |
| 55 | +/** |
| 56 | + * Register a callback that will be called whenever prism changes the encoding |
| 57 | + * it is using to parse based on the magic comment. |
| 58 | + * |
| 59 | + * @param parser The parser to register the callback with. |
| 60 | + * @param callback The callback to register. |
| 61 | + */ |
43 | 62 | PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback);
|
44 | 63 |
|
45 |
| -// Register a callback that will be called when prism encounters a magic comment |
46 |
| -// with an encoding referenced that it doesn't understand. The callback should |
47 |
| -// return NULL if it also doesn't understand the encoding or it should return a |
48 |
| -// pointer to a pm_encoding_t struct that contains the functions necessary to |
49 |
| -// parse identifiers. |
| 64 | +/** |
| 65 | + * Register a callback that will be called when prism encounters a magic comment |
| 66 | + * with an encoding referenced that it doesn't understand. The callback should |
| 67 | + * return NULL if it also doesn't understand the encoding or it should return a |
| 68 | + * pointer to a pm_encoding_t struct that contains the functions necessary to |
| 69 | + * parse identifiers. |
| 70 | + * |
| 71 | + * @param parser The parser to register the callback with. |
| 72 | + * @param callback The callback to register. |
| 73 | + */ |
50 | 74 | PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_decode_callback(pm_parser_t *parser, pm_encoding_decode_callback_t callback);
|
51 | 75 |
|
52 |
| -// Free any memory associated with the given parser. |
| 76 | +/** |
| 77 | + * Free any memory associated with the given parser. |
| 78 | + * |
| 79 | + * @param parser The parser to free. |
| 80 | + */ |
53 | 81 | PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);
|
54 | 82 |
|
55 |
| -// Parse the Ruby source associated with the given parser and return the tree. |
| 83 | +/** |
| 84 | + * Initiate the parser with the given parser. |
| 85 | + * |
| 86 | + * @param parser The parser to use. |
| 87 | + * @return The AST representing the source. |
| 88 | + */ |
56 | 89 | PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
|
57 | 90 |
|
58 |
| -// Serialize the AST represented by the given node to the given buffer. |
| 91 | +/** |
| 92 | + * Serialize the given list of comments to the given buffer. |
| 93 | + * |
| 94 | + * @param parser The parser to serialize. |
| 95 | + * @param list The list of comments to serialize. |
| 96 | + * @param buffer The buffer to serialize to. |
| 97 | + */ |
| 98 | +void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer); |
| 99 | + |
| 100 | +/** |
| 101 | + * Serialize the name of the encoding to the buffer. |
| 102 | + * |
| 103 | + * @param encoding The encoding to serialize. |
| 104 | + * @param buffer The buffer to serialize to. |
| 105 | + */ |
| 106 | +void pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer); |
| 107 | + |
| 108 | +/** |
| 109 | + * Serialize the encoding, metadata, nodes, and constant pool. |
| 110 | + * |
| 111 | + * @param parser The parser to serialize. |
| 112 | + * @param node The node to serialize. |
| 113 | + * @param buffer The buffer to serialize to. |
| 114 | + */ |
| 115 | +void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer); |
| 116 | + |
| 117 | +/** |
| 118 | + * Serialize the AST represented by the given node to the given buffer. |
| 119 | + * |
| 120 | + * @param parser The parser to serialize. |
| 121 | + * @param node The node to serialize. |
| 122 | + * @param buffer The buffer to serialize to. |
| 123 | + */ |
59 | 124 | PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
|
60 | 125 |
|
61 |
| -// Parse the given source to the AST and serialize the AST to the given buffer. |
62 |
| -PRISM_EXPORTED_FUNCTION void pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata); |
| 126 | +/** |
| 127 | + * Parse the given source to the AST and dump the AST to the given buffer. |
| 128 | + * |
| 129 | + * @param buffer The buffer to serialize to. |
| 130 | + * @param source The source to parse. |
| 131 | + * @param size The size of the source. |
| 132 | + * @param data The optional data to pass to the parser. |
| 133 | + */ |
| 134 | +PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); |
63 | 135 |
|
64 |
| -// Lex the given source and serialize to the given buffer. |
65 |
| -PRISM_EXPORTED_FUNCTION void pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer); |
| 136 | +/** |
| 137 | + * Parse and serialize the comments in the given source to the given buffer. |
| 138 | + * |
| 139 | + * @param buffer The buffer to serialize to. |
| 140 | + * @param source The source to parse. |
| 141 | + * @param size The size of the source. |
| 142 | + * @param data The optional data to pass to the parser. |
| 143 | + */ |
| 144 | +PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); |
66 | 145 |
|
67 |
| -// Parse and serialize both the AST and the tokens represented by the given |
68 |
| -// source to the given buffer. |
69 |
| -PRISM_EXPORTED_FUNCTION void pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata); |
| 146 | +/** |
| 147 | + * Lex the given source and serialize to the given buffer. |
| 148 | + * |
| 149 | + * @param source The source to lex. |
| 150 | + * @param size The size of the source. |
| 151 | + * @param buffer The buffer to serialize to. |
| 152 | + * @param data The optional data to pass to the lexer. |
| 153 | + */ |
| 154 | +PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); |
70 | 155 |
|
71 |
| -// Returns a string representation of the given token type. |
| 156 | +/** |
| 157 | + * Parse and serialize both the AST and the tokens represented by the given |
| 158 | + * source to the given buffer. |
| 159 | + * |
| 160 | + * @param buffer The buffer to serialize to. |
| 161 | + * @param source The source to parse. |
| 162 | + * @param size The size of the source. |
| 163 | + * @param data The optional data to pass to the parser. |
| 164 | + */ |
| 165 | +PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); |
| 166 | + |
| 167 | +/** |
| 168 | + * Returns a string representation of the given token type. |
| 169 | + * |
| 170 | + * @param token_type The token type to convert to a string. |
| 171 | + * @return A string representation of the given token type. |
| 172 | + */ |
72 | 173 | PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type);
|
73 | 174 |
|
| 175 | +/** |
| 176 | + * @mainpage |
| 177 | + * |
| 178 | + * Prism is a parser for the Ruby programming language. It is designed to be |
| 179 | + * portable, error tolerant, and maintainable. It is written in C99 and has no |
| 180 | + * dependencies. It is currently being integrated into |
| 181 | + * [CRuby](https://github.com/ruby/ruby), |
| 182 | + * [JRuby](https://github.com/jruby/jruby), |
| 183 | + * [TruffleRuby](https://github.com/oracle/truffleruby), |
| 184 | + * [Sorbet](https://github.com/sorbet/sorbet), and |
| 185 | + * [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree). |
| 186 | + * |
| 187 | + * @section getting-started Getting started |
| 188 | + * |
| 189 | + * If you're vendoring this project and compiling it statically then as long as |
| 190 | + * you have a C99 compiler you will be fine. If you're linking against it as |
| 191 | + * shared library, then you should compile with `-fvisibility=hidden` and |
| 192 | + * `-DPRISM_EXPORT_SYMBOLS` to tell prism to make only its public interface |
| 193 | + * visible. |
| 194 | + * |
| 195 | + * @section parsing Parsing |
| 196 | + * |
| 197 | + * In order to parse Ruby code, the structures and functions that you're going |
| 198 | + * to want to use and be aware of are: |
| 199 | + * |
| 200 | + * * `pm_parser_t` - the main parser structure |
| 201 | + * * `pm_parser_init` - initialize a parser |
| 202 | + * * `pm_parse` - parse and return the root node |
| 203 | + * * `pm_node_destroy` - deallocate the root node returned by `pm_parse` |
| 204 | + * * `pm_parser_free` - free the internal memory of the parser |
| 205 | + * |
| 206 | + * Putting all of this together would look something like: |
| 207 | + * |
| 208 | + * ```c |
| 209 | + * void parse(const uint8_t *source, size_t length) { |
| 210 | + * pm_parser_t parser; |
| 211 | + * pm_parser_init(&parser, source, length, NULL); |
| 212 | + * |
| 213 | + * pm_node_t *root = pm_parse(&parser); |
| 214 | + * printf("PARSED!\n"); |
| 215 | + * |
| 216 | + * pm_node_destroy(root); |
| 217 | + * pm_parser_free(&parser); |
| 218 | + * } |
| 219 | + * ``` |
| 220 | + * |
| 221 | + * All of the nodes "inherit" from `pm_node_t` by embedding those structures as |
| 222 | + * their first member. This means you can downcast and upcast any node in the |
| 223 | + * tree to a `pm_node_t`. |
| 224 | + * |
| 225 | + * @section serializing Serializing |
| 226 | + * |
| 227 | + * Prism provides the ability to serialize the AST and its related metadata into |
| 228 | + * a binary format. This format is designed to be portable to different |
| 229 | + * languages and runtimes so that you only need to make one FFI call in order to |
| 230 | + * parse Ruby code. The structures and functions that you're going to want to |
| 231 | + * use and be aware of are: |
| 232 | + * |
| 233 | + * * `pm_buffer_t` - a small buffer object that will hold the serialized AST |
| 234 | + * * `pm_buffer_free` - free the memory associated with the buffer |
| 235 | + * * `pm_serialize` - serialize the AST into a buffer |
| 236 | + * * `pm_serialize_parse` - parse and serialize the AST into a buffer |
| 237 | + * |
| 238 | + * Putting all of this together would look something like: |
| 239 | + * |
| 240 | + * ```c |
| 241 | + * void serialize(const uint8_t *source, size_t length) { |
| 242 | + * pm_buffer_t buffer = { 0 }; |
| 243 | + * |
| 244 | + * pm_serialize_parse(&buffer, source, length, NULL); |
| 245 | + * printf("SERIALIZED!\n"); |
| 246 | + * |
| 247 | + * pm_buffer_free(&buffer); |
| 248 | + * } |
| 249 | + * ``` |
| 250 | + * |
| 251 | + * @section inspecting Inspecting |
| 252 | + * |
| 253 | + * Prism provides the ability to inspect the AST by pretty-printing nodes. You |
| 254 | + * can do this with the `pm_prettyprint` function, which you would use like: |
| 255 | + * |
| 256 | + * ```c |
| 257 | + * void prettyprint(const uint8_t *source, size_t length) { |
| 258 | + * pm_parser_t parser; |
| 259 | + * pm_parser_init(&parser, source, length, NULL); |
| 260 | + * |
| 261 | + * pm_node_t *root = pm_parse(&parser); |
| 262 | + * pm_buffer_t buffer = { 0 }; |
| 263 | + * |
| 264 | + * pm_prettyprint(&buffer, &parser, root); |
| 265 | + * printf("*.s%\n", (int) buffer.length, buffer.value); |
| 266 | + * |
| 267 | + * pm_buffer_free(&buffer); |
| 268 | + * pm_node_destroy(root); |
| 269 | + * pm_parser_free(&parser); |
| 270 | + * } |
| 271 | + * ``` |
| 272 | + */ |
| 273 | + |
74 | 274 | #endif
|
0 commit comments