-
Notifications
You must be signed in to change notification settings - Fork 4
Description
Hi: I'm using an API interface to test on an x86 server. After adding a dozen or so custom auxiliary fields, I wrote 10,000 records into a slow5 file, with each record's len_raw_signal set to 100,000. Neither zlib nor svb compression was enabled. The resulting file size is 4.7 GB, the process took 30 seconds(my x86 server is very strong), and the CPU utilization reached 99.9% (single core %Cpu1 : 94.1 us, 4.3 sy, 0.0 ni, 3.6 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st). This write speed is too slow. How can I optimize it? Thank you. 1.3.1 version
demo:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <slow5/slow5.h>
#include <time.h>
// #define FILE_PATH "output.blow5"
#define FILE_PATH "output.slow5"
#define NUM_RECORDS 10000
#define SIGNAL_LEN 100000
#define READ_ID_LEN 64
int16_t *shared_signal_buffer = NULL;
void set_header_aux_fields(slow5_file_t *sp)
{
const char *aux_fields[] = {
"CycleId", "ChannelId", "SensorNum", "ReadNum", "StartIndex",
"SignalLen", "ReadScale", "ReadOffset", "offset", "scale",
"K", "B", "Version", "Platform"};
const enum slow5_aux_type types[] = {
SLOW5_UINT16_T, SLOW5_UINT16_T, SLOW5_UINT16_T,
SLOW5_INT32_T, SLOW5_INT32_T, SLOW5_INT32_T,
SLOW5_DOUBLE, SLOW5_DOUBLE, SLOW5_FLOAT, SLOW5_FLOAT,
SLOW5_FLOAT, SLOW5_FLOAT, SLOW5_STRING, SLOW5_STRING};
for (int i = 0; i < 14; i++)
{
if (slow5_aux_add(aux_fields[i], types[i], sp->header) < 0)
{
fprintf(stderr, "Error adding field %s\n", aux_fields[i]);
exit(EXIT_FAILURE);
}
}
}
int init_shared_signal_buffer()
{
shared_signal_buffer = (int16_t *)malloc(SIGNAL_LEN * sizeof(int16_t));
if (!shared_signal_buffer)
return -1;
for (uint64_t j = 0; j < SIGNAL_LEN; j++)
{
shared_signal_buffer[j] = rand() & 4095;
}
return 0;
}
void free_shared_signal_buffer()
{
if (shared_signal_buffer)
{
free(shared_signal_buffer);
shared_signal_buffer = NULL;
}
}
slow5_rec_t *create_record(int record_num, slow5_file_t *sp)
{
slow5_rec_t *rec = slow5_rec_init();
if (!rec)
return NULL;
char *read_id = (char *)malloc(READ_ID_LEN);
if (!read_id)
{
slow5_rec_free(rec);
return NULL;
}
snprintf(read_id, READ_ID_LEN, "2507500506021_1_12_230_10209834_%d", record_num);
rec->read_id = read_id;
rec->read_id_len = strlen(read_id);
rec->read_group = 0;
rec->digitisation = 0;
rec->offset = 0;
rec->range = 0;
rec->sampling_rate = 0;
rec->len_raw_signal = SIGNAL_LEN;
rec->raw_signal = (int16_t *)malloc(SIGNAL_LEN * sizeof(int16_t));
if (!rec->raw_signal)
{
slow5_rec_free(rec);
return NULL;
}
memcpy(rec->raw_signal, shared_signal_buffer, SIGNAL_LEN * sizeof(int16_t));
uint16_t CycleId = record_num % 10;
uint16_t ChannelId = (record_num % 2048);
uint16_t SensorNum = (record_num % 4);
int32_t ReadNum = record_num;
int32_t StartIndex = record_num % 1000;
int32_t SignalLen = SIGNAL_LEN;
double ReadScale = (record_num % 100) / 100.0;
double ReadOffset = (record_num % 500) / 100.0;
float offset = 1.23232f, scale = 2.434f, K = 1.322345f, B = 3.1314f;
const char *version = "v1.0";
const char *platform = "testbench";
slow5_aux_set(rec, "CycleId", &CycleId, sp->header);
slow5_aux_set(rec, "ChannelId", &ChannelId, sp->header);
slow5_aux_set(rec, "SensorNum", &SensorNum, sp->header);
slow5_aux_set(rec, "ReadNum", &ReadNum, sp->header);
slow5_aux_set(rec, "StartIndex", &StartIndex, sp->header);
slow5_aux_set(rec, "SignalLen", &SignalLen, sp->header);
slow5_aux_set(rec, "ReadScale", &ReadScale, sp->header);
slow5_aux_set(rec, "ReadOffset", &ReadOffset, sp->header);
slow5_aux_set(rec, "offset", &offset, sp->header);
slow5_aux_set(rec, "scale", &scale, sp->header);
slow5_aux_set(rec, "K", &K, sp->header);
slow5_aux_set(rec, "B", &B, sp->header);
slow5_aux_set_string(rec, "Version", version, sp->header);
slow5_aux_set_string(rec, "Platform", platform, sp->header);
return rec;
}
int main()
{
srand(time(NULL));
if (init_shared_signal_buffer() < 0)
{
fprintf(stderr, "Failed to allocate shared signal buffer\n");
exit(EXIT_FAILURE);
}
slow5_file_t *sp = slow5_open(FILE_PATH, "w");
if (!sp)
{
fprintf(stderr, "Error opening file %s\n", FILE_PATH);
free_shared_signal_buffer();
exit(EXIT_FAILURE);
}
// // 设置压缩
// if (slow5_set_press(sp, SLOW5_COMPRESS_ZLIB, SLOW5_COMPRESS_SVB_ZD) < 0) {
// fprintf(stderr, "Warning: Failed to set compression\n");
// }
set_header_aux_fields(sp);
if (slow5_hdr_write(sp) < 0)
{
fprintf(stderr, "Error writing header\n");
free_shared_signal_buffer();
slow5_close(sp);
exit(EXIT_FAILURE);
}
clock_t start = clock();
for (int i = 0; i < NUM_RECORDS; i++)
{
slow5_rec_t *rec = create_record(i, sp);
if (!rec)
{
fprintf(stderr, "Failed to create record %d\n", i);
continue;
}
if (slow5_write(rec, sp) < 0)
{
fprintf(stderr, "Error writing record %d\n", i);
slow5_rec_free(rec);
break;
}
slow5_rec_free(rec);
if ((i + 1) % 1000 == 0)
{
printf("Written %d records...\n", i + 1);
fflush(stdout);
}
}
clock_t end = clock();
double elapsed = (double)(end - start) / CLOCKS_PER_SEC;
printf("Successfully written %d records in %.2f seconds\n", NUM_RECORDS, elapsed);
printf("Record rate: %.2f records/sec\n", NUM_RECORDS / elapsed);
free_shared_signal_buffer();
slow5_close(sp);
return 0;
}