読者です 読者をやめる 読者になる 読者になる

slub: [sg]et_freepointer()めも

linux kernel

slubのkmem_cache_alloc()、kmem_cache_free()時の[sg]et_freepointer()の動きを確かめるというめもですが、チラ裏ですm(__)m

kmem_cache_alloc()を呼ぶとslubではslab_alloc_node()が呼ばれて、slabがすでにある場合はelseの方に入る。ここでget_freepointer_safe()を呼ぶんだけど、slab_alloc_node()の戻り値はnext_objectではなくて、2411行目のc->freelistの方になる。

2411         object = c->freelist;
2412         page = c->page;
2413         if (unlikely(!object || !node_match(page, node))) {
2414                 object = __slab_alloc(s, gfpflags, node, addr, c);
2415                 stat(s, ALLOC_SLOWPATH);
2416         } else {
2417                 void *next_object = get_freepointer_safe(s, object);
2418 
2419                 /*
2420                  * The cmpxchg will only match if there was no additional
2421                  * operation and if we are on the right processor.
2422                  *
2423                  * The cmpxchg does the following atomically (without lock
2424                  * semantics!)
2425                  * 1. Relocate first pointer to the current per cpu area.
2426                  * 2. Verify that tid and freelist have not been changed
2427                  * 3. If they were not changed replace tid and freelist
2428                  *
2429                  * Since this is without lock semantics the protection is only
2430                  * against code executing on this cpu *not* from access by
2431                  * other cpus.
2432                  */
2433                 if (unlikely(!this_cpu_cmpxchg_double(
2434                                 s->cpu_slab->freelist, s->cpu_slab->tid,
2435                                 object, tid,
2436                                 next_object, next_tid(tid)))) {
2437 
2438                         note_cmpxchg_failure("slab_alloc", s, tid);
2439                         goto redo;
2440                 }
2441                 prefetch_freepointer(s, next_object);
2442                 stat(s, ALLOC_FASTPATH);
2443         }

kmem_cache_free()の場合はslab_free()が最初のまともな処理部分。ここで解放時はfreeしようとしているオブジェクト「object」に対してc->freelistのアドレスを書き込む。そしてthis_cpu_cmpxchg_double()でc->freelistとobjectが入れ替わってfreeしたobjectが次のkmem_cache_alloc()時に返るようになる。

2661         if (likely(page == c->page)) {
2662                 set_freepointer(s, object, c->freelist);
2663 
2664                 if (unlikely(!this_cpu_cmpxchg_double(
2665                                 s->cpu_slab->freelist, s->cpu_slab->tid,
2666                                 c->freelist, tid,
2667                                 object, next_tid(tid)))) {
2668 
2669                         note_cmpxchg_failure("slab_free", s, tid);
2670                         goto redo;
2671                 }
2672                 stat(s, FREE_FASTPATH);
2673         } else
2674                 __slab_free(s, page, x, addr);

さて、ここでslubの slab管理方法を思いっきり簡略化して[sg]et_freepointer()の動きを確かめる。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <assert.h>
#include <sys/mman.h>

struct test_st {
    unsigned char s[256];
};

static inline void *get_freepointer(void *object)
{
    return *(void **)(object);
}

static inline void set_freepointer(void *object, void *fp)
{
    printf("set pointer %p to object %p\n", fp, object);
    *(void **)(object) = fp;
}

static long get_page_size(void)
{
    return sysconf(_SC_PAGESIZE);
}

static slab_free(void *x, void **freelist)
{
    void **object = (void *) x;

    printf("free slab object %p\n", object);
    printf("current freelist is %p\n", *freelist);

    set_freepointer(object, *freelist);

    *freelist = object;

    printf("next freelist is %p\n", *freelist);
}

int main(int argc, char **argv)
{
    long pagesize = get_page_size();
    char *p = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    int objsize = sizeof(struct test_st);
    long count = (pagesize / objsize)  - 1;
    int i;
    char *start;
    struct test_st **objs;
    void *freelist = NULL;
    struct test_st *tmp;

    assert(p != NULL);

    printf("pagesize is %ld\n", pagesize);
    printf("struct size is %d\n", sizeof(struct test_st));
    printf("object count = %ld\n", count);
    printf("start address is %p\n", p);

    freelist = start = p;
    for (i = 0; i <= count; i++, p += objsize) {
            printf("%d: ", i);
            if (i < count)
                set_freepointer(p, p + objsize);
            else
                set_freepointer(p, NULL);
    }

    objs = malloc(sizeof(*objs) * count);
    assert(objs != NULL);

    for (i = 0; i < count; i++) {
        tmp = get_freepointer(freelist);
        if (!tmp) {
            freelist = tmp;
            break;
        }
        objs[i] = freelist;
        freelist = tmp;
        printf("objs[%d] -> %p : next freelist -> %p\n", i, objs[i], freelist);
    }

    slab_free(objs[1], &freelist);

    tmp = get_freepointer(freelist);
    printf("next object %p\n", freelist);
    printf("next freelist %p\n", tmp);

    freelist = tmp;
    printf("get next object %p\n", get_freepointer(freelist));

    free(objs);

    munmap(start, pagesize);

    return 0;
}

本来set_freepointer()・get_greepointer()を直接使うことは無いし、freelistはvoid**なんだけど簡略化のためその辺はスルーで。 ↓が実行結果。

masami@saga:~$ ./a.out
pagesize is 4096
struct size is 256
object count = 15
start address is 0x7f16725a1000
0: set pointer 0x7f16725a1100 to object 0x7f16725a1000
1: set pointer 0x7f16725a1200 to object 0x7f16725a1100
2: set pointer 0x7f16725a1300 to object 0x7f16725a1200
3: set pointer 0x7f16725a1400 to object 0x7f16725a1300
4: set pointer 0x7f16725a1500 to object 0x7f16725a1400
5: set pointer 0x7f16725a1600 to object 0x7f16725a1500
6: set pointer 0x7f16725a1700 to object 0x7f16725a1600
7: set pointer 0x7f16725a1800 to object 0x7f16725a1700
8: set pointer 0x7f16725a1900 to object 0x7f16725a1800
9: set pointer 0x7f16725a1a00 to object 0x7f16725a1900
10: set pointer 0x7f16725a1b00 to object 0x7f16725a1a00
11: set pointer 0x7f16725a1c00 to object 0x7f16725a1b00
12: set pointer 0x7f16725a1d00 to object 0x7f16725a1c00
13: set pointer 0x7f16725a1e00 to object 0x7f16725a1d00
14: set pointer 0x7f16725a1f00 to object 0x7f16725a1e00
15: set pointer (nil) to object 0x7f16725a1f00
objs[0] -> 0x7f16725a1000 : next freelist -> 0x7f16725a1100
objs[1] -> 0x7f16725a1100 : next freelist -> 0x7f16725a1200
objs[2] -> 0x7f16725a1200 : next freelist -> 0x7f16725a1300
objs[3] -> 0x7f16725a1300 : next freelist -> 0x7f16725a1400
objs[4] -> 0x7f16725a1400 : next freelist -> 0x7f16725a1500
objs[5] -> 0x7f16725a1500 : next freelist -> 0x7f16725a1600
objs[6] -> 0x7f16725a1600 : next freelist -> 0x7f16725a1700
objs[7] -> 0x7f16725a1700 : next freelist -> 0x7f16725a1800
objs[8] -> 0x7f16725a1800 : next freelist -> 0x7f16725a1900
objs[9] -> 0x7f16725a1900 : next freelist -> 0x7f16725a1a00
objs[10] -> 0x7f16725a1a00 : next freelist -> 0x7f16725a1b00
objs[11] -> 0x7f16725a1b00 : next freelist -> 0x7f16725a1c00
objs[12] -> 0x7f16725a1c00 : next freelist -> 0x7f16725a1d00
objs[13] -> 0x7f16725a1d00 : next freelist -> 0x7f16725a1e00
objs[14] -> 0x7f16725a1e00 : next freelist -> 0x7f16725a1f00
free slab object 0x7f16725a1100
current freelist is 0x7f16725a1f00
set pointer 0x7f16725a1f00 to object 0x7f16725a1100
next freelist is 0x7f16725a1100
next object 0x7f16725a1100
next freelist 0x7f16725a1f00
get next object (nil)

objs[14] でget_freepointer()したあとはfreelistが0x7f16725a1f00でnullを指すようになっていて、その後objs[1]をfreeしてfreelistがそのアドレスを指すようになって、次にもう一度get_freeponiter()すると先ほどfreeしたアドレスが返り、freelistがまた0x7f16725a1f00を指す。この状態でget_freepointer()するとnullが返るという動き。

↓はここ最近調べたことのまとめです。

Slub data structure

ハルロック(1)

ハルロック(1)