Cute trick to mark parts of a C structure read-only
Fri 15 November 2024 — download

Recently, I've been trying to improve the sorry state of PHP's heap implementation, small step by small step since my free time significantly shrunk this year. Anyway, one of the low-hanging fruits is to makes parts of the _zend_mm_heap read-only, since it contains function pointers that are often overwritten in public exploits to transform a (limited) read/write primitive into an arbitrary code execution.

Changing memory's mode is done via mprotect, but it can only be done on a per-page page-aligned granularity. The easiest way that came to mind is to use C11's anonymous struct and union along with the aligned attribute to make the structure fit neatly on pages:

struct {
    union {
        struct {
            void* my_important_ptr;
            size_t my_important_size;
        };
        char padding[PAGE_SIZE];
    } ro_data
    void *my_unimportant_ptr;
    size_t my_unimportant_size;
} my_struct __attribute__((aligned(PAGE_SIZE)));

my_struct* init_my_struct() {
    assert(sizeof(my_struct) > PAGE_SIZE);
    my_struct* s = (my_struct*)malloc(sizeof(my_struct));
    if (!s) {
        return NULL;;
    }

    s->my_unimportant_ptr = NULL;
    s->my_unimportant_size = 0;
    s->my_important_ptr = NULL;
    s->my_important_size = 0;
    mprotect(s, PAGE_SIZE, PROT_READ);
    return s;
}

void set_size(my_struct* s, size_t size) {
    mprotect(s, PAGE_SIZE, PROT_WRITE);
    s->my_important_size = size;
    mprotect(s, PAGE_SIZE, PROT_READ);
}

Unfortunately, this isn't really portable, as PAGE_SIZE can't be known at compilation-time. The recommended way to get its value is to call long sz = sysconf(_SC_PAGESIZE);.

So what I went for, on the good advice of Arnaud Le Blanc was to go the dynamic route, with two separate structures:

static size_t get_page_size(void) {
    static size_t page_size = 0;
    if (!page_size) {
        page_size = sysconf(_SC_PAGESIZE);
        if (!page_size) {
            page_size = 4096; // return a sane-ish default
        }
    }
    return page_size;
}

#define GET_RO(s) ((my_struct*)((char*)(s) + get_page_size()))

struct {
    void *my_unimportant_ptr;
    size_t my_unimportant_size;
} my_struct;

struct {
    void* my_important_ptr;
    size_t my_important_size;
} ro_data

my_struct* init_my_struct() {
    assert(sizeof(my_struct) <= get_page_size());
    assert(sizeof(ro_data) <= get_page_size());

    my_struct* s = (my_struct*)malloc(get_page_size() * 2); // or posix_memalign(3)
    if (!s) {
        return NULL;;
    }

    s->my_unimportant_ptr = NULL;
    s->my_important_size = 0;
    GET_RO(s)->my_important_ptr = NULL;
    GET_RO(s)->my_important_size = 0;
    mprotect(GET_RO(s), get_page_size(), PROT_READ);
    return s;
}

void set_size(my_struct* s, size_t size) {
    mprotect(GET_RO(s), get_page_size(), PROT_WRITE);
    GET_RO(s)->my_important_size = size;
    mprotect(GET_RO(s), get_page_size(), PROT_READ);
}

The pull-request to implement this in php has a bit of fluff to integrate it with PHP's memory-management lifecycle, but should still be fairly readable. Unfortunately, it was rejected on the basis of lowering performances by 0.6% on my local benchmark.