|
| 1 | +// This module was based closely on the curl example module from ngx-rust. |
1 | 2 | use ngx::ffi::{
|
2 | 3 | nginx_version, ngx_array_push, ngx_command_t, ngx_conf_t, ngx_http_core_module, ngx_http_handler_pt,
|
3 | 4 | ngx_http_module_t, ngx_http_phases_NGX_HTTP_ACCESS_PHASE, ngx_http_request_t, ngx_int_t, ngx_module_t, ngx_str_t,
|
4 |
| - ngx_uint_t, NGX_CONF_TAKE1, NGX_HTTP_LOC_CONF, NGX_HTTP_MODULE, NGX_RS_HTTP_LOC_CONF_OFFSET, |
5 |
| - NGX_RS_MODULE_SIGNATURE, |
| 5 | + ngx_uint_t, NGX_CONF_TAKE1, NGX_HTTP_MAIN_CONF, NGX_HTTP_SRV_CONF, NGX_HTTP_LOC_CONF, NGX_HTTP_MODULE, |
| 6 | + NGX_RS_HTTP_LOC_CONF_OFFSET, NGX_RS_MODULE_SIGNATURE, |
6 | 7 | };
|
7 | 8 | use ngx::http::MergeConfigError;
|
8 | 9 | use ngx::{core, core::Status, http, http::HTTPModule};
|
@@ -36,15 +37,21 @@ impl http::HTTPModule for Module {
|
36 | 37 |
|
37 | 38 | #[derive(Debug, Default)]
|
38 | 39 | struct ModuleConfig {
|
39 |
| - robots_txt_path: String, |
40 |
| - robots_txt_contents: String, |
| 40 | + robots_txt_path: String, // absolute file path of robots.txt |
| 41 | + robots_txt_contents: String, // the contents of robots.txt, read by this module from robots_txt_path |
41 | 42 | }
|
42 | 43 |
|
43 | 44 | #[no_mangle]
|
44 | 45 | static mut ngx_http_robots_commands: [ngx_command_t; 2] = [
|
| 46 | + // define the robots_txt_path configuration directive |
45 | 47 | ngx_command_t {
|
46 | 48 | name: ngx_string!("robots_txt_path"),
|
47 |
| - type_: (NGX_HTTP_LOC_CONF | NGX_CONF_TAKE1) as ngx_uint_t, |
| 49 | + // The directive may appear in the http, server, or location block and takes |
| 50 | + // a single argument (the absolute file path of robots.txt). |
| 51 | + type_: ( NGX_HTTP_MAIN_CONF |
| 52 | + | NGX_HTTP_SRV_CONF |
| 53 | + | NGX_HTTP_LOC_CONF |
| 54 | + | NGX_CONF_TAKE1 ) as ngx_uint_t, |
48 | 55 | set: Some(ngx_http_robots_commands_set_robots_txt_path),
|
49 | 56 | conf: NGX_RS_HTTP_LOC_CONF_OFFSET,
|
50 | 57 | offset: 0,
|
@@ -101,15 +108,21 @@ pub static mut ngx_http_robots_module: ngx_module_t = ngx_module_t {
|
101 | 108 |
|
102 | 109 | impl http::Merge for ModuleConfig {
|
103 | 110 | fn merge(&mut self, prev: &ModuleConfig) -> Result<(), MergeConfigError> {
|
104 |
| - // If robots.txt path is not set at this level, inherit the setting from the higher level |
| 111 | + // If robots.txt path is not set at this level, inherit the setting from the higher level. |
| 112 | + // This means that configuring the directive in the location block overrides any configuration |
| 113 | + // of the directive in the server block and that configuring the directive in the server block |
| 114 | + // overrides any configuration in the http block. |
105 | 115 | if self.robots_txt_path == "" {
|
106 | 116 | self.robots_txt_path = prev.robots_txt_path.to_string();
|
107 | 117 | }
|
| 118 | + |
108 | 119 | self.robots_txt_contents = "".to_string(); // default value
|
| 120 | + |
109 | 121 | // If robots.txt path has been set, store the contents of the file
|
110 | 122 | if self.robots_txt_path != "" {
|
111 | 123 | self.robots_txt_contents = fs::read_to_string(&self.robots_txt_path).unwrap();
|
112 | 124 | }
|
| 125 | + |
113 | 126 | Ok(())
|
114 | 127 | }
|
115 | 128 | }
|
@@ -179,9 +192,11 @@ extern "C" fn ngx_http_robots_commands_set_robots_txt_path(
|
179 | 192 | std::ptr::null_mut()
|
180 | 193 | }
|
181 | 194 |
|
182 |
| -/// Extract the matchable part of a user agent string, essentially stopping at |
183 |
| -/// the first invalid character. |
184 |
| -/// Example: 'Googlebot/2.1' becomes 'Googlebot' |
| 195 | +// Extract the matchable part of a user agent string, essentially stopping at |
| 196 | +// the first invalid character. |
| 197 | +// Example: 'Googlebot/2.1' becomes 'Googlebot' |
| 198 | +// |
| 199 | +// This function and its unit tests were inherited from robotstxt. |
185 | 200 | fn extract_user_agent(user_agent: &str) -> &str {
|
186 | 201 | // Allowed characters in user-agent are [a-zA-Z_-].
|
187 | 202 | if let Some(end) =
|
|
0 commit comments