diff --git a/src/collate.c b/src/collate.c index 02e468d..8158125 100644 --- a/src/collate.c +++ b/src/collate.c @@ -652,19 +652,21 @@ int ldb_collate_load_tuples_to_delete(job_delete_tuples_t * job, char * buffer, while (line != NULL) { - tuples = realloc(tuples, ((tuples_index+1) * sizeof(tuple_t*))); - tuples[tuples_index] = calloc(1, sizeof(tuple_t)); - ldb_hex_to_bin(line, key_len * 2, tuples[tuples_index]->key); - - if (strchr(line, ',')) + if (line && strlen(line) >= key_len * 2) { - char * data = strdup(line + key_len * 2 + 1); - if (data && *data) - tuples[tuples_index]->data = data; + tuples = realloc(tuples, ((tuples_index+1) * sizeof(tuple_t*))); + tuples[tuples_index] = calloc(1, sizeof(tuple_t)); + ldb_hex_to_bin(line, key_len * 2, tuples[tuples_index]->key); + + if (strchr(line, ',')) + { + char * data = strdup(line + key_len * 2 + 1); + if (data && *data) + tuples[tuples_index]->data = data; + } + tuples_index++; } - line = strtok(NULL, delimiter); - tuples_index++; } job->tuples = tuples; job->tuples_number = tuples_index; @@ -715,12 +717,12 @@ bool ldb_collate_init(struct ldb_collate_data * collate, struct ldb_table table, } /* Reserve space for collate data */ - collate->data = (char *)calloc(LDB_MAX_RECORDS * collate->rec_width, 1); + collate->data = (char *)calloc(LDB_MAX_RECORDS * (collate->rec_width+10), 1); if (!collate->data) return false; - collate->tmp_data = (char *)calloc(LDB_MAX_RECORDS * collate->rec_width, 1); + collate->tmp_data = (char *)calloc(LDB_MAX_RECORDS * (collate->rec_width+10), 1); if (!collate->tmp_data) return false; @@ -736,12 +738,12 @@ bool ldb_collate_init(struct ldb_collate_data * collate, struct ldb_table table, return true; } -void ldb_collate_sector(struct ldb_collate_data *collate, uint8_t sector, uint8_t *sector_mem) +void ldb_collate_sector(struct ldb_collate_data *collate, ldb_sector_t * sector) { - log_info("Collating %s/%s - sector %02x - %s\n", collate->in_table.db, collate->in_table.table, sector, sector_mem == NULL ? "On disk" : "On RAM"); + log_info("Collating %s/%s - sector %02x - %s\n", collate->in_table.db, collate->in_table.table, sector->id, sector->data == NULL ? "On disk" : "On RAM"); /* Read each one of the (256 ^ 3) list pointers from the map */ uint8_t k[LDB_KEY_LN]; - k[0] = sector; + k[0] = sector->id; for (int k1 = 0; k1 < 256; k1++) for (int k2 = 0; k2 < 256; k2++) for (int k3 = 0; k3 < 256; k3++) @@ -751,7 +753,7 @@ void ldb_collate_sector(struct ldb_collate_data *collate, uint8_t sector, uint8_ k[3] = k3; /* If there is a pointer, read the list */ /* Process records */ - ldb_fetch_recordset(sector_mem, collate->in_table, k, true, ldb_collate_handler, collate); + ldb_fetch_recordset_v2(sector, collate->in_table, k, true, ldb_collate_handler, collate); } /* Process last record/s */ @@ -771,13 +773,13 @@ void ldb_collate_sector(struct ldb_collate_data *collate, uint8_t sector, uint8_ ldb_sector_update(collate->out_table, k); if (collate->del_count) - log_info("%s - sector %02X: %'ld records deleted\n", collate->in_table.table, sector, collate->del_count); + log_info("%s - sector %02X: %'ld records deleted\n", collate->in_table.table, sector->id, collate->del_count); - log_info("Table %s - sector %2x: collate completed with %'ld records\n", collate->in_table.table , sector, collate->rec_count); + log_info("Table %s - sector %2x: collate completed with %'ld records\n", collate->in_table.table , sector->id, collate->rec_count); free(collate->data); - free(collate->tmp_data); - free(sector_mem); + free(sector->data); + sector->data = NULL; } /** @@ -812,12 +814,12 @@ void ldb_collate(struct ldb_table table, struct ldb_table out_table, int max_rec /* Load collate data structure */ collate.handler = handler; collate.del_tuples = NULL; - uint8_t *sector = ldb_load_sector(table, &k0); + ldb_sector_t sector = ldb_load_sector_v2(table, &k0); //skip unexistent sector. - if (!sector) + if (!sector.size) continue; - ldb_collate_sector(&collate, k0, sector); + ldb_collate_sector(&collate, §or); } if (p_sector >=0) @@ -867,7 +869,6 @@ void ldb_collate_delete(struct ldb_table table, struct ldb_table out_table, job_ { k0 = *delete->tuples[i]->key; struct ldb_collate_data collate; - if (k0 != k0_last && ldb_collate_init(&collate, table, out_table, 2048, false, k0)) { log_info("Removing keys from Table %s - Reading sector %02x\n", table.table, k0); @@ -875,14 +876,16 @@ void ldb_collate_delete(struct ldb_table table, struct ldb_table out_table, job_ collate.handler = handler; collate.del_tuples = delete; collate.del_count = 0; - uint8_t * sector = ldb_load_sector(table, &k0); - ldb_collate_sector(&collate, k0, sector); + ldb_sector_t sector = ldb_load_sector_v2(table, &k0); + ldb_collate_sector(&collate, §or); total_records += collate.del_count; - k0_last = k0; } + k0_last = k0; + /* Exit here if it is a delete command, otherwise move to the next sector */ } /* Show processed totals */ log_info("Table %s: cleanup completed with %'ld records\n", table.table, total_records); fflush(stdout); + } diff --git a/src/import.c b/src/import.c index d8b8d17..e875c2f 100644 --- a/src/import.c +++ b/src/import.c @@ -1430,42 +1430,44 @@ int import_collate_sector(ldb_importation_config_t *config) char *ptr = NULL; char *filename = basename(config->csv_path); //extract the sector from the file name. If there is no sector (example license.csv) process the compleate table. - long sector = strtol(filename, &ptr, 16); + long sector_number = strtol(filename, &ptr, 16); if (ptr - filename < 2 || (ptr && *ptr != '.')) - sector = -1; + sector_number = -1; logger_basic("Collating - %s", dbtable); - if (sector < 0) + if (sector_number < 0) { - log_info("Collating table %s - all sectors, Max record size: %d\n", dbtable, sector, max_rec_len); - ldb_collate(ldbtable, tmptable, max_rec_len, false, sector, NULL); + log_info("Collating table %s - all sectors, Max record size: %d\n", dbtable, sector_number, max_rec_len); + ldb_collate(ldbtable, tmptable, max_rec_len, false, sector_number, NULL); return 0; } - log_info("Collating table %s - sector %02x, Max record size: %d\n", dbtable, sector, max_rec_len); + log_info("Collating table %s - sector %02x, Max record size: %d\n", dbtable, sector_number, max_rec_len); if ((ldbtable.definitions > 0 && ldbtable.definitions & LDB_TABLE_DEFINITION_MZ) || (ldbtable.definitions == LDB_TABLE_DEFINITION_UNDEFINED && config->opt.params.is_mz_table)) { - ldb_mz_collate(ldbtable, sector); + ldb_mz_collate(ldbtable, sector_number); } - else if (sector >= 0) + else if (sector_number >= 0) { pthread_mutex_lock(&lock); struct ldb_collate_data collate; - uint8_t k0 = sector; - uint8_t *sector_mem = NULL; + uint8_t k0 = sector_number; + //uint8_t *sector_mem = NULL; + ldb_sector_t sector = {.data=NULL, .size = 0, .id = sector_number}; bool init_ok = ldb_collate_init(&collate, ldbtable, tmptable, max_rec_len, false, k0); if (!init_ok) log_info("Collate init failed for sector %d\n", k0); if (init_ok && check_system_available_ram(ldbtable, k0, config->opt.params.collate_max_ram_percent)) - sector_mem = ldb_load_sector(ldbtable, &k0); + //sector_mem = ldb_load_sector(ldbtable, &k0); + sector = ldb_load_sector_v2(ldbtable, &k0); pthread_mutex_unlock(&lock); if (init_ok) - ldb_collate_sector(&collate, sector, sector_mem); + ldb_collate_sector(&collate, §or); else { log_info("ERROR: failed to allocate memory to collate sector %02x\n", k0); diff --git a/src/ldb.h b/src/ldb.h index a6957c8..dbf8288 100644 --- a/src/ldb.h +++ b/src/ldb.h @@ -27,7 +27,7 @@ #include "./ldb/types.h" #include "./ldb/mz.h" -#define LDB_VERSION "4.1.4" +#define LDB_VERSION "4.1.5" #define LDB_TABLE_DEFINITION_UNDEFINED -1 #define LDB_TABLE_DEFINITION_STANDARD 0 @@ -62,6 +62,7 @@ uint64_t ldb_last_node_pointer(FILE *ldb_sector, uint64_t list_pointer); void ldb_update_list_pointers(FILE *ldb_sector, uint8_t *key, uint64_t list, uint64_t new_node); int ldb_node_write (struct ldb_table table, FILE *ldb_sector, uint8_t *key, uint8_t *data, uint32_t dataln, uint16_t records); uint64_t ldb_node_read (uint8_t *sector, struct ldb_table table, FILE *ldb_sector, uint64_t ptr, uint8_t *key, uint32_t *bytes_read, uint8_t **out, int max_node_size); +uint64_t ldb_node_read_v2(ldb_sector_t *sector, struct ldb_table table, uint64_t ptr, uint8_t *key, uint32_t *bytes_read, uint8_t **out, int max_node_size); char *ldb_sector_path (struct ldb_table table, uint8_t *key, char *mode); FILE *ldb_open (struct ldb_table table, uint8_t *key, char *mode); bool ldb_close(FILE * sector); @@ -84,11 +85,13 @@ bool ldb_create_database(char *database); struct ldb_recordset ldb_recordset_init(char *db, char *table, uint8_t *key); void ldb_list_unlink(FILE *ldb_sector, uint8_t *key); uint8_t *ldb_load_sector (struct ldb_table table, uint8_t *key); +ldb_sector_t ldb_load_sector_v2(struct ldb_table table, uint8_t *key); bool ldb_validate_node(uint8_t *node, uint32_t node_size, int subkey_ln); //bool uint32_is_zero(uint8_t *n); bool ldb_key_exists(struct ldb_table table, uint8_t *key); bool ldb_key_in_recordset(uint8_t *rs, uint32_t rs_len, uint8_t *subkey, uint8_t subkey_ln); uint32_t ldb_fetch_recordset(uint8_t *sector, struct ldb_table table, uint8_t* key, bool skip_subkey, bool (*ldb_record_handler) (uint8_t *, uint8_t *, int, uint8_t *, uint32_t, int, void *), void *void_ptr); +uint32_t ldb_fetch_recordset_v2(ldb_sector_t * sector, struct ldb_table table, uint8_t* key, bool skip_subkey, bool (*ldb_record_handler) (uint8_t *, uint8_t *, int, uint8_t *, uint32_t, int, void *), void *void_ptr); bool ldb_hexprint_width(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t size, int iteration, void *ptr); void ldb_sector_update(struct ldb_table table, uint8_t *key); void ldb_sector_erase(struct ldb_table table, uint8_t *key); diff --git a/src/ldb/collate.h b/src/ldb/collate.h index 057cc82..7b8e491 100644 --- a/src/ldb/collate.h +++ b/src/ldb/collate.h @@ -43,7 +43,7 @@ struct ldb_collate_data collate_handler handler; }; bool ldb_collate_init(struct ldb_collate_data * collate, struct ldb_table table, struct ldb_table out_table, int max_rec_ln, bool merge, uint8_t sector); -void ldb_collate_sector(struct ldb_collate_data *collate, uint8_t sector, uint8_t *sector_mem); +void ldb_collate_sector(struct ldb_collate_data *collate, ldb_sector_t * sector); int ldb_collate_load_tuples_to_delete(job_delete_tuples_t* job, char * buffer, char * d, struct ldb_table table); void ldb_collate(struct ldb_table table, struct ldb_table out_table, int max_rec_ln, bool merge, int p_sector, collate_handler handler); void ldb_collate_delete(struct ldb_table table, struct ldb_table out_table, job_delete_tuples_t * delete, collate_handler handler); diff --git a/src/ldb/types.h b/src/ldb/types.h index 5d68683..2c5cfa1 100644 --- a/src/ldb/types.h +++ b/src/ldb/types.h @@ -47,6 +47,14 @@ struct ldb_recordset uint64_t last_node; // Location of last node of the list uint8_t ts_ln; // 2 or 4 (16-bit or 32-bit reserved for total sector size) }; +typedef struct ldb_sector_t +{ + uint8_t id; + size_t size; + uint8_t * data; + FILE * file; + bool failure; +} ldb_sector_t; typedef bool (*ldb_record_handler) (uint8_t *, uint8_t *, int, uint8_t *, uint32_t, int, void *); diff --git a/src/md5.c b/src/md5.c index d8dd1bd..109b399 100644 --- a/src/md5.c +++ b/src/md5.c @@ -22,32 +22,48 @@ void md5_string(const unsigned char *input, int len, unsigned char output[16]) * @param path string path * @return pointer to file md5 array */ -uint8_t * md5_file(char *path) +#include +#include +#include +#include + +static void __attribute__((constructor)) init_libgcrypt(void) { - uint8_t *c = calloc(1, gcry_md_get_algo_dlen(GCRY_MD_MD5)); // Allocate memory for MD5 hash - FILE *fp = fopen(path, "rb"); + if (!gcry_check_version(GCRYPT_VERSION)) { + fprintf(stderr, "Libgcrypt version mismatch\n"); + return; + } + + gcry_control(GCRYCTL_DISABLE_SECMEM_WARN); + gcry_control(GCRYCTL_INIT_SECMEM, 16384, 0); + gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0); +} +uint8_t * md5_file(char *path) +{ + uint8_t *c = calloc(1, gcry_md_get_algo_dlen(GCRY_MD_MD5)); // Allocate memory for MD5 hash + FILE *fp = fopen(path, "rb"); if (!fp) { fprintf(stderr, "Unable to open file for reading.\n"); + free(c); return NULL; } - + gcry_md_hd_t md5_hd; gcry_md_open(&md5_hd, GCRY_MD_MD5, GCRY_MD_FLAG_SECURE); - uint8_t *buffer = malloc(BUFFER_SIZE); size_t bytes; - + while ((bytes = fread(buffer, 1, BUFFER_SIZE, fp)) > 0) { gcry_md_write(md5_hd, buffer, bytes); } - + fclose(fp); free(buffer); - + const uint8_t *digest = gcry_md_read(md5_hd, GCRY_MD_MD5); memcpy(c, digest, gcry_md_get_algo_dlen(GCRY_MD_MD5)); - gcry_md_close(md5_hd); + return c; } \ No newline at end of file diff --git a/src/node.c b/src/node.c index ef15e19..462cd8a 100644 --- a/src/node.c +++ b/src/node.c @@ -407,6 +407,134 @@ void ldb_node_unlink(struct ldb_table table, uint8_t *key) ldb_close_unlock(ldb_sector); } +uint64_t ldb_node_read_v2(ldb_sector_t *sector, struct ldb_table table, uint64_t ptr, uint8_t *key, uint32_t *bytes_read, uint8_t **out, int max_node_size) +{ + *bytes_read = 0; + uint8_t *buffer = NULL; + + /* Read sector pointer either from disk (ldb_sector) or memory (sector) */ + if (sector->data) + { + if (ptr == 0) + { + /* If pointer is zero, get the list location from the map */ + ptr = uint40_read(sector->data + ldb_map_pointer_pos(key)); + /* If pointer is zero, then there are no records for the key */ + if (ptr == 0) + return 0; + + ptr += LDB_PTR_LN; + + } + + if (ptr < sector->size) + buffer = sector->data + ptr; + else + { + log_info("Warning: cannot read LDB node from sector %02x. The node pointer is out of range %ld / %ld\n", sector->id, ptr, sector->size); + ptr = 0; + sector->failure = true; + return 0; + } + } + + if (!sector->data || !buffer) + { //open the sector if not already open + if (!sector->file) + { + sector->file = ldb_open(table, key, "r"); + if (!sector->file) + return 0; + } + + if (ptr == 0) + { + ptr = ldb_list_pointer(sector->file, key); + fseeko64(sector->file, ptr, SEEK_SET); + uint64_t last_node; // = uint40_read + fread(&last_node, 1, LDB_PTR_LN, sector->file); + //printf("key: %02x%02x%02x%02x: sector first node ptr: %ld - last node %ld\n", key[0], key[1], key[2], key[3], ptr, last_node); + + /* If pointer is zero, then there are no records for the key */ + if (ptr == 0) + return 0; + + ptr += LDB_PTR_LN; + } + + fseeko64(sector->file, ptr, SEEK_SET); + buffer = calloc(LDB_PTR_LN + table.ts_ln + LDB_KEY_LN, 1); + if (!fread(buffer, 1, LDB_PTR_LN + table.ts_ln, sector->file)) + { + log_debug("Warning: cannot read LDB node\n"); + free(buffer); + ldb_read_failure = true; + return 0; + } + } + + /* NN: Obtain the next node */ + uint64_t next_node = uint40_read(buffer); + + /* TS: Obtain the size of the node */ + uint32_t node_size = 0; + if (table.ts_ln == 2) + node_size = uint16_read(buffer + LDB_PTR_LN); + else + node_size = uint32_read(buffer + LDB_PTR_LN); + + uint32_t actual_size = node_size; + + /* When records are fixed in length, node size is expressed in number of records */ + if (table.rec_ln) + actual_size = node_size * table.rec_ln; + //printf("Node %ld, size %d, Next node ptr: %ld\n", ptr, actual_size, next_node); + + /* If the node size exceeds the wanted limit, then ignore it entirely */ + if (max_node_size) + if (actual_size > max_node_size) + actual_size = 0; + + /* A deleted node will have a size set to zero. */ + if (actual_size) + { + + if (table.rec_ln) + if (actual_size > 64800) + actual_size = 64800; // TODO: EXPAND? + + if (out) + *out = calloc(actual_size + 1, 1); + + /* Return the entire node */ + if (sector->data) + { + //*out = buffer + LDB_PTR_LN + table.ts_ln; + if ((buffer - sector->data + actual_size) < sector->size) + memcpy(*out, buffer + LDB_PTR_LN + table.ts_ln, actual_size); + else + log_info("warning on sector %02x node size overflow\n", sector->id); + } + else + { + if (!fread(*out, 1, actual_size, sector->file)) + { + log_debug("Warning: cannot read entire LDB node\n"); + ldb_read_failure = true; + } + } + *bytes_read = actual_size; + } + + if (!sector->data) + { + free(buffer); + //fclose(sector_file); + } + return next_node; +} + + /** * @brief Validates a node checking for the dataset size. * diff --git a/src/recordset.c b/src/recordset.c index cf7188a..7367317 100644 --- a/src/recordset.c +++ b/src/recordset.c @@ -134,6 +134,8 @@ uint32_t ldb_fetch_recordset(uint8_t *sector, struct ldb_table table, uint8_t* k /* We drop records longer than the desired limit */ if (record_size + 32 < LDB_MAX_REC_LN) done = ldb_record_handler(key, subkey, subkey_ln, dataset + dataset_ptr, record_size, records++, void_ptr); + if (done) + break; /* Move pointer to end of record */ dataset_ptr += record_size; } @@ -153,6 +155,111 @@ uint32_t ldb_fetch_recordset(uint8_t *sector, struct ldb_table table, uint8_t* k return records; } +/** + * @brief Recurses all records in *table* for *key* and calls the provided handler funcion in each iteration, passing + * subkey, subkey length, fetched data, length and iteration number. This function acts on the .ldb for the + * provided *key*, but can also work from memory, if a pointer to a *sector* is provided (not NULL) + * + * @param sector Optional: Pointer to a LDB sector allocated in memory. If NULL the function will use tha table struct and key to open the ldb + * @param table table struct config + * @param key key of the associated table + * @param skip_subkey true for skip the subkey + * @param ldb_record_handler Handler to print the data + * @param void_ptr This pointer is passed to the handler function + * @return uint32_t The number of records found + */ +uint32_t ldb_fetch_recordset_v2(ldb_sector_t * sector, struct ldb_table table, uint8_t* key, bool skip_subkey, bool (*ldb_record_handler) (uint8_t *, uint8_t *, int, uint8_t *, uint32_t, int, void *), void *void_ptr) +{ + uint8_t *node = NULL; + + uint64_t next = 0; + uint32_t node_size = 0; + uint32_t node_ptr; + uint8_t subkey_ln = table.key_ln - LDB_KEY_LN; + + uint32_t records = 0; + bool done = false; + do + { + /* Read node */ + next = ldb_node_read_v2(sector, table, next, key, &node_size, &node, 0); + + if (ldb_read_failure || sector->failure) + { + log_info("Error reading table %s/%s - sector %02x: the file is not available or the node doesn't exist\n", table.db, table.table, sector->id); + ldb_read_failure = false; + sector->failure = false; + break; + } + if (!node_size && !next) + break; // reached end of list + + /* Pass entire node (fixed record length) to handler */ + if (table.rec_ln) + done = ldb_record_handler(key, NULL, 0 , node, node_size, records++, void_ptr); + + /* Extract and pass variable-size records to handler */ + else + { + if (!ldb_validate_node(node, node_size, subkey_ln)) + continue; + + /* Extract datasets from node */ + node_ptr = 0; + + while (node_ptr < node_size && !done) + { + /* Get subkey */ + uint8_t *subkey = node + node_ptr; + node_ptr += subkey_ln; + + /* Get recordset length */ + int dataset_size = uint16_read(node + node_ptr); + node_ptr += 2; + + /* Compare subkey */ + bool key_matched = true; + if (!skip_subkey && subkey_ln) + key_matched = (memcmp(subkey, key + LDB_KEY_LN, subkey_ln) == 0); + + if (key_matched) + { + /* Extract records from dataset */ + uint32_t dataset_ptr = 0; + while (dataset_ptr < dataset_size) + { + uint8_t *dataset = node + node_ptr; + /* Get record length */ + int record_size = uint16_read(dataset + dataset_ptr); + dataset_ptr += 2; + /* We drop records longer than the desired limit */ + if (record_size + 32 < LDB_MAX_REC_LN){ + done = ldb_record_handler(key, subkey, subkey_ln, dataset + dataset_ptr, record_size, records++, void_ptr); + if (done) + break; + } + /* Move pointer to end of record */ + dataset_ptr += record_size; + } + } + /* Move pointer to end of dataset */ + node_ptr += dataset_size; + } + } + free(node); + + } while (next && !done); + + if (sector->file) + { + fclose(sector->file); + sector->file = NULL; + } + + return records; +} + + /** * @brief Handler function for ldb_get_first_record * diff --git a/src/sector.c b/src/sector.c index 4d4c412..17339dd 100644 --- a/src/sector.c +++ b/src/sector.c @@ -265,6 +265,41 @@ uint8_t *ldb_load_sector(struct ldb_table table, uint8_t *key) { return out; } + +/** + * @brief Loads an entire LDB sector into memory and returns a pointer + (NULL if the sector does not exist) + * + * @param table Instance of the table struct. + * @param key Key of the sector to load. + * @return uint8_t* Pointer to the block of memory with the sector loaded. + */ +ldb_sector_t ldb_load_sector_v2(struct ldb_table table, uint8_t *key) { + + ldb_sector_t sector = {.data = NULL, .id = *key, .size = 0}; + FILE *ldb_sector = ldb_open(table, key, "r"); + + if (!ldb_sector) + return sector; + + fseeko64(ldb_sector, 0, SEEK_END); + uint64_t size = ftello64(ldb_sector); + + uint8_t *out = malloc(size); + if (!out) + return sector; + + fseeko64(ldb_sector, 0, SEEK_SET); + if (!fread(out, 1, size, ldb_sector)) + { + out = NULL; + } + fclose(ldb_sector); + sector.data = out; + sector.size = size; + return sector; +} + /** * @brief Reserves memory for storing a copy of an entire LDB sector * (returns NULL if the source sector does not exist)