Skip to content

Merge PMDK to Master #7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 37 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
f0e2205
Initial pmdk pushes for clht_lb_res
pyrito Mar 6, 2020
8aaf5c2
Refactored & bug
pyrito Mar 13, 2020
922b912
Fixed seg fault
pyrito Mar 14, 2020
1d56564
Main logic working with PMDK
pyrito Mar 15, 2020
1982610
Implemented PM clean-up, solved segfault
pyrito Mar 16, 2020
298bf59
Modified clean-up in clht_gc.c, found resizing bug
pyrito Mar 17, 2020
6a5a4d4
Fixed PMEMpool size, still having segfault
pyrito Mar 17, 2020
b3ead0c
Solved seg fault for hashtable. WIP
pyrito Mar 19, 2020
b859a6b
Replicated bug
pyrito Mar 19, 2020
5ec9a47
Fixed seg fault, still issue with ssmem
pyrito Mar 21, 2020
2e07aca
Changed clht_open func param
pyrito Mar 21, 2020
fd33b64
First implementation of transactions
pyrito Mar 23, 2020
824f5e2
Trimmed use of transactions. WIP.
pyrito Mar 31, 2020
bd951e6
Working on cache alignment fix. WIP.
pyrito Apr 1, 2020
9b605d9
Cache-line alignment fixed.
pyrito Apr 1, 2020
5dda496
Added README information for PMDK
pyrito Apr 5, 2020
bbdd250
Added transaction configurability
pyrito Apr 6, 2020
f4243fe
Documentation changes
pyrito Apr 8, 2020
03c7b32
Merge branch 'master' into pmdk
SeKwonLee Apr 9, 2020
fb11b10
Initial commit applying PMDK to masstree
SeKwonLee Apr 10, 2020
9782868
Integrated correct transactions
pyrito Apr 11, 2020
4f6a839
Merge branch 'pmdk' of https://github.com/utsaslab/RECIPE into pmdk
pyrito Apr 11, 2020
77bc50d
[P-Masstree] Update compile options and minor changes
Apr 12, 2020
c8182e3
Update pmdk document
SeKwonLee Apr 12, 2020
1ec6380
[P-CLHT] update compile options
SeKwonLee Apr 12, 2020
3d82a49
Fixed bug in clht_lb_res.c
pyrito Apr 12, 2020
af8106d
Merge branch 'master' into pmdk
SeKwonLee Apr 14, 2020
ed00f17
:sparkles: add the new feature to reload masstree
SeKwonLee Apr 14, 2020
0405b2d
Merge branch 'master' into pmdk
SeKwonLee Apr 15, 2020
8d3b00a
Merge branch 'master' into pmdk
SeKwonLee Apr 15, 2020
270bb09
Exchange free to pmemobj_free
SeKwonLee Apr 15, 2020
3514403
Added some previously deleted code
pyrito Apr 16, 2020
7006004
Merge branch 'pmdk' of https://github.com/utsaslab/RECIPE into pmdk
pyrito Apr 16, 2020
8d7af8a
Integrate clht_open to clht_create
SeKwonLee Apr 16, 2020
d495e35
Remove comments related to DIMM
SeKwonLee Apr 16, 2020
058e0b1
Merge branch 'master' into pmdk
SeKwonLee Apr 18, 2020
03af92d
Modified README for P-CLHT
pyrito Apr 18, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"files.associations": {
"mutex": "c",
"shared_mutex": "c",
"condition_variable": "c",
"chrono": "cpp",
"random": "c",
"algorithm": "c"
}
}
9 changes: 8 additions & 1 deletion P-CLHT/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,12 @@ set(P_CLHT_TEST example.cpp src/clht_lb_res.c src/clht_gc.c
external/sspfd/sspfd.c external/ssmem/src/ssmem.c)
add_executable(example ${P_CLHT_TEST})

target_link_libraries(example ${TbbLib} ${JemallocLib} boost_system
target_link_libraries(example ${TbbLib} ${JemallocLib} pmemobj pmem boost_system
boost_thread pthread)

set(P_CLHT_TEST test.cpp src/clht_lb_res.c src/clht_gc.c
external/sspfd/sspfd.c external/ssmem/src/ssmem.c)
add_executable(test ${P_CLHT_TEST})

target_link_libraries(test ${TbbLib} ${JemallocLib} pmemobj pmem boost_system
boost_thread pthread)
2 changes: 1 addition & 1 deletion P-CLHT/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ CFLAGS += $(PLATFORM)
CFLAGS += $(OPTIMIZE)
CFLAGS += $(DEBUG_FLAGS)

INCLUDES := -I$(MAININCLUDE) -I$(TOP)/external/include
INCLUDES := -I$(MAININCLUDE) -I$(TOP)/external/include
OBJ_FILES := clht_gc.o

SRC := src
Expand Down
57 changes: 52 additions & 5 deletions P-CLHT/README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
## P-CLHT: Persistent Cache-Line Hash Table
## P-CLHT: Persistent Cache-Line Hash Table - PMDK

`P-CLHT` is a crash consistent version of [Cache-Line Hash Table](https://dl.acm.org/citation.cfm?id=2694359) (CLHT).
`P-CLHT` is a recoverable and crash-consistent version of [Cache-Line Hash Table](https://dl.acm.org/citation.cfm?id=2694359) (CLHT).
CLHT is a cache-friendly hash table which restricts each bucket to be of the size of a cache line.
CLHT is an unordered index only supporting point queries.

**Conversion**. `CLHT-LB` using lock-based writes for concurrency is converted into `P-CLHT` by adding cache
line flushes and memory fences after each critical volatile store.
**Conversion**. `CLHT-LB` using lock-based writes for concurrency is converted into `P-CLHT` by adding cache line flushes and memory fences after each critical volatile store.

**Performance**. Compared with [CCEH](https://www.usenix.org/conference/fast19/presentation/nam) that is a
state-of-the-art unordered index, `P-CLHT` shows **2.38x**, **1.35x**, and **1.25x** better performance in
Expand All @@ -16,7 +15,55 @@ YCSB workload A, B, C respectively using random integer keys while **0.37x** wor
**Use Case**. `P-CLHT` provides the superior performance of insertion and point lookup, even if not supporting
range scans. Therefore, it would be appropriate to be used for the applications only consisting of point queries.

This branch of P-CLHT also uses PMDK to ensure the persistence and recoverability of the cache-line hash table. All other details of this data structure are the same (cache line flushing, alignment, etc) except for the backend library used to ensure persistence.

**Motivation** The published implementation does not have a way of recovering permanent memory leaks during a crash. The PMDK library, specifically `libpmemobj`, gives us useful internal structures such as `pmemobj_root`, which is a stored offset within the persistent memory pool that can be used to recover any data that was left in a partial state, etc.

**How We Used PMDK** The entire conversion required us to replace any data structure pointers to point to the persistent memory pool using the non-transactional, atomic allocation functions such as `pmemobj_alloc`. Since the `PMEMoid` structs (which store the pool offset and id) were 16 bytes, some code manipulation was required to ensure the cache-line alignment of the data structure. Finally, transactions were used for major hashtable operations such as insertion, resizing, and deletion. This part is still being tested and is a work-in-progress. If you look through the code and compare it with the `master` branch, you can see that the changes follow a logical pattern, and the modifications are relatively minor.

**How to test recoverability?** The best way to recover your hashtable is following the paradigm presented in `clht_open` where all the user has to do is use `pmemobj_root` to recover the root (a clht_t object basically) of the persistent memory pool. Please make sure that you are opening the same pool with the correct pool layout!
```
...
PMEMoid my_root = pmemobj_root(pop, sizeof(clht_t));
if (pmemobj_direct(my_root) == NULL)
{
perror("root pointer is null\n");
}
...
clht_t* w = pmemobj_direct(my_root);
...
```

## Build & Run
### How to enable PM?
1. Install PMDK
```$ git clone https://github.com/pmem/pmdk.git
$ cd pmdk
$ git checkout tags/1.6
$ make -j
$ cd ..
```
2. Emulate PM with Ext4-DAX mount
```$ sudo mkfs.ext4 -b 4096 -E stride=512 -F /dev/pmem0
$ sudo mount -o dax /dev/pmem0 /mnt/pmem
```

3. Set pool_size and pool name appropriately using `pmemobj_create`. For example:
```
// Size of the memory pool
size_t pool_size = 2*1024*1024*1024UL;
if( access("/mnt/pmem/pool", F_OK ) != -1 )
{
// If the pool already exists, open it
pop = pmemobj_open("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht));
} else
{
// If the pool does not exist, create it
pop = pmemobj_create("/mnt/pmem/pool", POBJ_LAYOUT_NAME(clht), pool_size, 0666);
}
```

4. Make accordingly and run the example.

#### Build

Expand All @@ -35,4 +82,4 @@ $ ./example 10000 4
usage: ./example [n] [nthreads]
n: number of keys (integer)
nthreads: number of threads (integer)
```
```
21 changes: 11 additions & 10 deletions P-CLHT/example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
#include <atomic>
#include "tbb/tbb.h"

using namespace std;
#include <libpmemobj.h>

#include "clht.h"
#include "clht_lb_res.h"
#include "ssmem.h"

typedef struct thread_data {
Expand Down Expand Up @@ -96,7 +96,7 @@ void run(char **argv) {
thread_group[i].join();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now() - starttime);
printf("Throughput: load, %f ,ops/us\n", (n * 1.0) / duration.count());
printf("Throughput: load, %f ,ops/s\n", (n * 1.0) / (duration.count()/1000000.0));
}

barrier.crossing = 0;
Expand All @@ -117,11 +117,12 @@ void run(char **argv) {
barrier_cross(&barrier);

for (uint64_t i = start_key; i < end_key; i++) {
uintptr_t val = clht_get(tds[thread_id].ht->ht, keys[i]);
if (val != keys[i]) {
std::cout << "[CLHT] wrong key read: " << val << "expected: " << keys[i] << std::endl;
exit(1);
}
clht_hashtable_t *ht = (clht_hashtable_t*)clht_ptr_from_off((tds[thread_id].ht)->ht_off);
uintptr_t val = clht_get(ht, keys[i]);
if (val != keys[i]) {
std::cout << "[CLHT] wrong key read: " << val << " expected: " << keys[i] << std::endl;
exit(1);
}
}
};

Expand All @@ -134,9 +135,9 @@ void run(char **argv) {
thread_group[i].join();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now() - starttime);
printf("Throughput: run, %f ,ops/us\n", (n * 1.0) / duration.count());
printf("Throughput: run, %f ,ops/s\n", (n * 1.0) / (duration.count()/1000000.0));
}
clht_gc_destroy(hashtable);
// clht_gc_destroy(hashtable);

delete[] keys;
}
Expand Down
25 changes: 22 additions & 3 deletions P-CLHT/include/clht_lb_res.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <inttypes.h>
#include "atomic_ops.h"
#include "utils.h"
#include <libpmemobj.h>

#ifdef __cplusplus
extern "C" {
Expand Down Expand Up @@ -160,7 +161,9 @@ typedef struct ALIGNED(CACHE_LINE_SIZE) bucket_s
volatile uint32_t hops;
clht_addr_t key[ENTRIES_PER_BUCKET];
clht_val_t val[ENTRIES_PER_BUCKET];
volatile struct bucket_s* next;
// volatile struct bucket_s* next;
// PMEMoid next;
uint64_t next_off;
} bucket_t;

//#if __GNUC__ > 4 && __GNUC_MINOR__ > 4
Expand All @@ -173,7 +176,9 @@ typedef struct ALIGNED(CACHE_LINE_SIZE) clht
{
struct
{
struct clht_hashtable_s* ht;
// PMEMoid ht;
uint64_t ht_off;
// struct clht_hashtable_s* ht;
uint8_t next_cache_line[CACHE_LINE_SIZE - (sizeof(void*))];
struct clht_hashtable_s* ht_oldest;
struct ht_ts* version_list;
Expand All @@ -193,7 +198,9 @@ typedef struct ALIGNED(CACHE_LINE_SIZE) clht_hashtable_s
struct
{
size_t num_buckets;
bucket_t* table;
// PMEMoid table;
uint64_t table_off;
//bucket_t* table;
size_t hash;
size_t version;
uint8_t next_cache_line[CACHE_LINE_SIZE - (3 * sizeof(size_t)) - (sizeof(void*))];
Expand Down Expand Up @@ -441,6 +448,18 @@ int ht_resize_pes(clht_t* hashtable, int is_increase, int by);
const char* clht_type_desc();

void clht_lock_initialization(clht_t *h);

// Initialize the persistent memory pool
POBJ_LAYOUT_BEGIN(clht);
POBJ_LAYOUT_ROOT(clht, clht_t);
POBJ_LAYOUT_TOID(clht, clht_hashtable_t);
POBJ_LAYOUT_TOID(clht, bucket_t);
POBJ_LAYOUT_END(clht);

/* Global pool uuid */
uint64_t pool_uuid;
void* clht_ptr_from_off(uint64_t offset);

#ifdef __cplusplus
}
#endif
Expand Down
62 changes: 38 additions & 24 deletions P-CLHT/src/clht_gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ clht_gc_thread_init(clht_t* h, int id)
ht_ts_t* ts = (ht_ts_t*) memalign(CACHE_LINE_SIZE, sizeof(ht_ts_t));
assert(ts != NULL);

ts->version = h->ht->version;
clht_hashtable_t* ht_ptr = clht_ptr_from_off(h->ht_off);
ts->version = ht_ptr->version;
ts->id = id;

do
Expand Down Expand Up @@ -128,7 +129,8 @@ clht_gc_min_version_used(clht_t* h)
{
volatile ht_ts_t* cur = h->version_list;

size_t min = h->ht->version;
clht_hashtable_t* ht_ptr = clht_ptr_from_off(h->ht_off);
size_t min = ht_ptr->version;
while (cur != NULL)
{
if (cur->version < min)
Expand All @@ -149,8 +151,9 @@ clht_gc_min_version_used(clht_t* h)
static int
clht_gc_collect_cond(clht_t* hashtable, int collect_not_referenced_only)
{
clht_hashtable_t* ht_ptr = clht_ptr_from_off(hashtable->ht_off);
/* if version_min >= current version there is nothing to collect! */
if ((hashtable->version_min >= hashtable->ht->version) || TRYLOCK_ACQ(&hashtable->gc_lock))
if ((hashtable->version_min >= ht_ptr->version) || TRYLOCK_ACQ(&hashtable->gc_lock))
{
/* printf("** someone else is performing gc\n"); */
return 0;
Expand All @@ -160,7 +163,7 @@ clht_gc_collect_cond(clht_t* hashtable, int collect_not_referenced_only)

/* printf("[GCOLLE-%02d] LOCK : %zu\n", GET_ID(collect_not_referenced_only), hashtable->version); */

size_t version_min = hashtable->ht->version;
size_t version_min = ht_ptr->version;
if (collect_not_referenced_only)
{
version_min = clht_gc_min_version_used(hashtable);
Expand Down Expand Up @@ -221,20 +224,24 @@ clht_gc_free(clht_hashtable_t* hashtable)
uint64_t bin;
for (bin = 0; bin < num_buckets; bin++)
{
bucket = hashtable->table + bin;
bucket = bucket->next;
bucket = ((bucket_t*)clht_ptr_from_off(hashtable->table_off)) + bin;
bucket = clht_ptr_from_off(bucket->next_off);

while (bucket != NULL)
{
volatile bucket_t* cur = bucket;
bucket = bucket->next;
free((void*) cur);
}
{
volatile bucket_t* cur = bucket;
bucket = clht_ptr_from_off(bucket->next_off);
PMEMoid cur_oid = pmemobj_oid((void*) cur);
pmemobj_free(&cur_oid);
}
}
#endif

free(hashtable->table);
free(hashtable);

PMEMoid table_oid = {pool_uuid, hashtable->table_off};
pmemobj_free(&(table_oid));
PMEMoid ht_oid = pmemobj_oid((void*) hashtable);
pmemobj_free(&ht_oid);

return 1;
}

Expand All @@ -246,12 +253,13 @@ clht_gc_destroy(clht_t* hashtable)
{
#if !defined(CLHT_LINKED)
clht_gc_collect_all(hashtable);
clht_gc_free(hashtable->ht);
free(hashtable);
clht_gc_free(clht_ptr_from_off(hashtable->ht_off));
// PMEMoid ht_oid = pmemobj_oid((void*) hashtable);
// pmemobj_free(&ht_oid);
#endif

// ssmem_alloc_term(clht_alloc);
free(clht_alloc);
// ssmem_alloc_term(clht_alloc);
//free(clht_alloc);
}

/*
Expand All @@ -269,20 +277,26 @@ clht_gc_release(clht_hashtable_t* hashtable)

uint64_t bin;
for (bin = 0; bin < num_buckets; bin++)
{
bucket = hashtable->table + bin;
bucket = bucket->next;
{
bucket = ((bucket_t*)clht_ptr_from_off(hashtable->table_off)) + bin;
bucket = clht_ptr_from_off(bucket->next_off);

while (bucket != NULL)
{
volatile bucket_t* cur = bucket;
bucket = bucket->next;
bucket = clht_ptr_from_off(bucket->next_off);
ssmem_release(clht_alloc, (void*) cur);
// PMEMoid cur_oid = pmemobj_oid((void*) cur);
// pmemobj_free(&cur_oid);
}
}
}
#endif

ssmem_release(clht_alloc, hashtable->table);
ssmem_release(clht_alloc, clht_ptr_from_off(hashtable->table_off));
ssmem_release(clht_alloc, hashtable);
// pmemobj_free(&(hashtable->table));
// PMEMoid ht_oid = pmemobj_oid((void*) hashtable);
// pmemobj_free(&ht_oid);
return 1;
}

Expand Down
Loading