B Trees

#programming #data-structures

A self-balancing tree data structure, which is designed for efficient lookups in disk storage and database systems. Unlike binary trees, it's nodes can have more than 2 children.

Core properties

A B tree of order m has the following properties.

Some advantages

B trees general layout in C

unsigned int MAX = 3 // Order m = 4 (Max keys = m-1) 
unsigned int MIN = 1 // Min keys = ceil(m/2) - 1 
struct BTreeNode { 
	int keys[MAX]; // Array of keys 
	struct BTreeNode *child[MAX + 1]; // Pointers to children 
	int count; // Number of keys currently in node 
	bool leaf; // True if leaf node 
};

General operations

We perform a binary search upon the keys in a node, and move to the correct child node if the key isn't present

struct BTreeNode* search(struct BTreeNode* root, int k) {
    int i = 0;
    // Find the first key greater than or equal to k
    while (i < root->count && k > root->keys[i]) {
        i++;
    }

    // If the key is found in this node
    if (i < root->count && root->keys[i] == k) {
        return root;
    }

    // If the key is not found and this is a leaf node
    if (root->leaf) {
        return NULL;
    }

    // Recurse to the appropriate child
    return search(root->child[i], k);
}

Time complexity : O(logmn)


Insertion

Insertion always happens at the leaf level. The following steps are followed

  1. Traverse : Find the leaf node where key should be inserted
  2. If the key has capacity remaining, insert in the key in sorted order
  3. If capacity is over, Split the node.

The Split :

Code : Splitting child

// Function to split the child y of node x
void splitChild(struct BTreeNode *x, int i, struct BTreeNode *y) {
    struct BTreeNode *z = malloc(sizeof(struct BTreeNode));
    z->leaf = y->leaf;
    z->count = MIN;

    // Copy the last MIN keys of y to z
    for (int j = 0; j < MIN; j++)
        z->keys[j] = y->keys[j + MIN + 1];

    // Copy the last MIN+1 children of y to z
    if (!y->leaf) {
        for (int j = 0; j <= MIN; j++)
            z->child[j] = y->child[j + MIN + 1];
    }

    y->count = MIN;

    // Shift children of x to make room for z
    for (int j = x->count; j >= i + 1; j--)
        x->child[j + 1] = x->child[j];

    x->child[i + 1] = z;

    // Shift keys of x to move median key up from y
    for (int j = x->count - 1; j >= i; j--)
        x->keys[j + 1] = x->keys[j];

    x->keys[i] = y->keys[MIN];
    x->count++;
}

Time complexity : O(mlogmn)


Deletion

Deletion is complex because balance is required to be maintained, and a node must never have less than m/21 keys

Case A : Node is leaf

Case B : Node is internal

Code : Deletion (Borrowing from prev)

void borrowFromPrev(struct BTreeNode *node, int idx) {
    struct BTreeNode *child = node->child[idx];
    struct BTreeNode *sibling = node->child[idx - 1];

    // Shift all keys in child one step ahead
    for (int i = child->count - 1; i >= 0; --i)
        child->keys[i + 1] = child->keys[i];

    if (!child->leaf) {
        for (int i = child->count; i >= 0; --i)
            child->child[i + 1] = child->child[i];
    }

    // Setting child's first key equal to the key from the current node
    child->keys[0] = node->keys[idx - 1];

    if (!child->leaf)
        child->child[0] = sibling->child[sibling->count];

    // Moving the last key from sibling to the parent (node)
    node->keys[idx - 1] = sibling->keys[sibling->count - 1];

    child->count += 1;
    sibling->count -= 1;
}

Time complexity : O(mlogmn)


Full C implementation

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>

#define M 4
#define MAX_KEYS (M - 1)
#define MIN_KEYS (M / 2 - 1)

struct BTreeNode {
    int keys[MAX_KEYS];
    struct BTreeNode *child[M];
    int count;
    bool leaf;
};

// --- Function Prototypes ---
struct BTreeNode* createNode(bool leaf);
void traverse(struct BTreeNode* root);
struct BTreeNode* search(struct BTreeNode* root, int k);
void insert(struct BTreeNode** root, int k);
void insertNonFull(struct BTreeNode* x, int k);
void splitChild(struct BTreeNode* x, int i, struct BTreeNode* y);
void removeKey(struct BTreeNode* root, int k);
void removeFromLeaf(struct BTreeNode* node, int idx);
void removeFromNonLeaf(struct BTreeNode* node, int idx);
int getPredecessor(struct BTreeNode* node, int idx);
int getSuccessor(struct BTreeNode* node, int idx);
void fill(struct BTreeNode* node, int idx);
void borrowFromPrev(struct BTreeNode* node, int idx);
void borrowFromNext(struct BTreeNode* node, int idx);
void merge(struct BTreeNode* node, int idx);

// --- Core Helper: Create Node ---
struct BTreeNode* createNode(bool leaf) {
    struct BTreeNode* newNode = (struct BTreeNode*)malloc(sizeof(struct BTreeNode));
    newNode->leaf = leaf;
    newNode->count = 0;
    for (int i = 0; i < M; i++) newNode->child[i] = NULL;
    return newNode;
}

// --- INSERTION LOGIC ---
void insert(struct BTreeNode** root, int k) {
    struct BTreeNode* r = *root;
    if (r->count == MAX_KEYS) {
        struct BTreeNode* s = createNode(false);
        *root = s;
        s->child[0] = r;
        splitChild(s, 0, r);
        insertNonFull(s, k);
    } else {
        insertNonFull(r, k);
    }
}

void insertNonFull(struct BTreeNode* x, int k) {
    int i = x->count - 1;
    if (x->leaf) {
        while (i >= 0 && x->keys[i] > k) {
            x->keys[i + 1] = x->keys[i];
            i--;
        }
        x->keys[i + 1] = k;
        x->count++;
    } else {
        while (i >= 0 && x->keys[i] > k) i--;
        if (x->child[i + 1]->count == MAX_KEYS) {
            splitChild(x, i + 1, x->child[i + 1]);
            if (x->keys[i + 1] < k) i++;
        }
        insertNonFull(x->child[i + 1], k);
    }
}

void splitChild(struct BTreeNode* x, int i, struct BTreeNode* y) {
    struct BTreeNode* z = createNode(y->leaf);
    z->count = MIN_KEYS;
    for (int j = 0; j < MIN_KEYS; j++) z->keys[j] = y->keys[j + MIN_KEYS + 1];
    if (!y->leaf) {
        for (int j = 0; j <= MIN_KEYS; j++) z->child[j] = y->child[j + MIN_KEYS + 1];
    }
    y->count = MIN_KEYS;
    for (int j = x->count; j >= i + 1; j--) x->child[j + 1] = x->child[j];
    x->child[i + 1] = z;
    for (int j = x->count - 1; j >= i; j--) x->keys[j + 1] = x->keys[j];
    x->keys[i] = y->keys[MIN_KEYS];
    x->count++;
}

// --- DELETION LOGIC ---
int findKey(struct BTreeNode* node, int k) {
    int idx = 0;
    while (idx < node->count && node->keys[idx] < k) ++idx;
    return idx;
}

void removeKey(struct BTreeNode* node, int k) {
    int idx = findKey(node, k);
    if (idx < node->count && node->keys[idx] == k) {
        if (node->leaf) removeFromLeaf(node, idx);
        else removeFromNonLeaf(node, idx);
    } else {
        if (node->leaf) return;
        bool flag = (idx == node->count);
        if (node->child[idx]->count < M / 2) fill(node, idx);
        if (flag && idx > node->count) removeKey(node->child[idx - 1], k);
        else removeKey(node->child[idx], k);
    }
}

void removeFromLeaf(struct BTreeNode* node, int idx) {
    for (int i = idx + 1; i < node->count; ++i) node->keys[i - 1] = node->keys[i];
    node->count--;
}

void removeFromNonLeaf(struct BTreeNode* node, int idx) {
    int k = node->keys[idx];
    if (node->child[idx]->count >= M / 2) {
        int pred = getPredecessor(node, idx);
        node->keys[idx] = pred;
        removeKey(node->child[idx], pred);
    } else if (node->child[idx + 1]->count >= M / 2) {
        int succ = getSuccessor(node, idx);
        node->keys[idx] = succ;
        removeKey(node->child[idx + 1], succ);
    } else {
        merge(node, idx);
        removeKey(node->child[idx], k);
    }
}

int getPredecessor(struct BTreeNode* node, int idx) {
    struct BTreeNode* cur = node->child[idx];
    while (!cur->leaf) cur = cur->child[cur->count];
    return cur->keys[cur->count - 1];
}

int getSuccessor(struct BTreeNode* node, int idx) {
    struct BTreeNode* cur = node->child[idx + 1];
    while (!cur->leaf) cur = cur->child[0];
    return cur->keys[0];
}

void fill(struct BTreeNode* node, int idx) {
    if (idx != 0 && node->child[idx - 1]->count >= M / 2) borrowFromPrev(node, idx);
    else if (idx != node->count && node->child[idx + 1]->count >= M / 2) borrowFromNext(node, idx);
    else {
        if (idx != node->count) merge(node, idx);
        else merge(node, idx - 1);
    }
}

void borrowFromPrev(struct BTreeNode* node, int idx) {
    struct BTreeNode* child = node->child[idx];
    struct BTreeNode* sibling = node->child[idx - 1];
    for (int i = child->count - 1; i >= 0; --i) child->keys[i + 1] = child->keys[i];
    if (!child->leaf) {
        for (int i = child->count; i >= 0; --i) child->child[i + 1] = child->child[i];
    }
    child->keys[0] = node->keys[idx - 1];
    if (!child->leaf) child->child[0] = sibling->child[sibling->count];
    node->keys[idx - 1] = sibling->keys[sibling->count - 1];
    child->count += 1;
    sibling->count -= 1;
}

void borrowFromNext(struct BTreeNode* node, int idx) {
    struct BTreeNode* child = node->child[idx];
    struct BTreeNode* sibling = node->child[idx + 1];
    child->keys[child->count] = node->keys[idx];
    if (!child->leaf) child->child[child->count + 1] = sibling->child[0];
    node->keys[idx] = sibling->keys[0];
    for (int i = 1; i < sibling->count; ++i) sibling->keys[i - 1] = sibling->keys[i];
    if (!sibling->leaf) {
        for (int i = 1; i <= sibling->count; ++i) sibling->child[i - 1] = sibling->child[i];
    }
    child->count += 1;
    sibling->count -= 1;
}

void merge(struct BTreeNode* node, int idx) {
    struct BTreeNode* child = node->child[idx];
    struct BTreeNode* sibling = node->child[idx + 1];
    child->keys[MIN_KEYS] = node->keys[idx];
    for (int i = 0; i < sibling->count; ++i) child->keys[i + MIN_KEYS + 1] = sibling->keys[i];
    if (!child->leaf) {
        for (int i = 0; i <= sibling->count; ++i) child->child[i + MIN_KEYS + 1] = sibling->child[i];
    }
    for (int i = idx + 1; i < node->count; ++i) node->keys[i - 1] = node->keys[i];
    for (int i = idx + 2; i <= node->count; ++i) node->child[i - 1] = node->child[i];
    child->count += sibling->count + 1;
    node->count--;
    free(sibling);
}

void traverse(struct BTreeNode* root) {
    int i;
    for (i = 0; i < root->count; i++) {
        if (!root->leaf) traverse(root->child[i]);
        printf(" %d", root->keys[i]);
    }
    if (!root->leaf) traverse(root->child[i]);
}

int main() {
    struct BTreeNode* root = createNode(true);
    int vals[] = {10, 20, 5, 6, 12, 30, 7, 17};
    
    for(int i=0; i<8; i++) insert(&root, vals[i]);

    printf("Traversal: ");
    traverse(root);
    printf("\n");

    removeKey(root, 6);
    printf("After deleting 6: ");
    traverse(root);
    printf("\n");

    return 0;
}

Powered by Forestry.md