B Trees
A self-balancing tree data structure, which is designed for efficient lookups in disk storage and database systems. Unlike binary trees, it's nodes can have more than 2 children.
Core properties
A B tree of order
- Atmost
children per node - Atmost
keys per node - Every non root internal node has atleast
children - Every non root internal node has atleast
keys - The root has atleast two children, if it's not a leaf.
- All leaf nodes are at the same level
- Keys in each node are stored in sorted order
- Subtrees between keys contain value in the proper range
- If the pointer to subtree exists after key of value
and before value , then all keys in the subtree follow
- If the pointer to subtree exists after key of value
Some advantages
- Quick search, insert, delete : all of
time. - Optimized for disk access, since each node stores multiple keys, we have fewer overall I/O operations, and fewer read requests to disk, reducing overall latency
B trees general layout in C
unsigned int MAX = 3 // Order m = 4 (Max keys = m-1)
unsigned int MIN = 1 // Min keys = ceil(m/2) - 1
struct BTreeNode {
int keys[MAX]; // Array of keys
struct BTreeNode *child[MAX + 1]; // Pointers to children
int count; // Number of keys currently in node
bool leaf; // True if leaf node
};
General operations
Search
We perform a binary search upon the keys in a node, and move to the correct child node if the key isn't present
struct BTreeNode* search(struct BTreeNode* root, int k) {
int i = 0;
// Find the first key greater than or equal to k
while (i < root->count && k > root->keys[i]) {
i++;
}
// If the key is found in this node
if (i < root->count && root->keys[i] == k) {
return root;
}
// If the key is not found and this is a leaf node
if (root->leaf) {
return NULL;
}
// Recurse to the appropriate child
return search(root->child[i], k);
}
Time complexity :
Insertion
Insertion always happens at the leaf level. The following steps are followed
- Traverse : Find the leaf node where key should be inserted
- If the key has capacity remaining, insert in the key in sorted order
- If capacity is over, Split the node.
The Split :
- Median key is moved up to the parent.
- The remaining keys are divided into two sibling nodes
- If the parent is full, the split propogates upwards.
- If the root is full, a new root is created with split, increasing height of tree by 1
Code : Splitting child
// Function to split the child y of node x
void splitChild(struct BTreeNode *x, int i, struct BTreeNode *y) {
struct BTreeNode *z = malloc(sizeof(struct BTreeNode));
z->leaf = y->leaf;
z->count = MIN;
// Copy the last MIN keys of y to z
for (int j = 0; j < MIN; j++)
z->keys[j] = y->keys[j + MIN + 1];
// Copy the last MIN+1 children of y to z
if (!y->leaf) {
for (int j = 0; j <= MIN; j++)
z->child[j] = y->child[j + MIN + 1];
}
y->count = MIN;
// Shift children of x to make room for z
for (int j = x->count; j >= i + 1; j--)
x->child[j + 1] = x->child[j];
x->child[i + 1] = z;
// Shift keys of x to move median key up from y
for (int j = x->count - 1; j >= i; j--)
x->keys[j + 1] = x->keys[j];
x->keys[i] = y->keys[MIN];
x->count++;
}
Time complexity :
Deletion
Deletion is complex because balance is required to be maintained, and a node must never have less than
Case A : Node is leaf
- If keys > MIN_KEYS, simply delete key and move on
- else, we must borrow from a sibling, or merge with a sibling.
Case B : Node is internal
- Find the In-Order Predecessor or In-Order Successor
- Replace the key with successor/predecessor
- Recursively delete the successor/predecessor from the leaf.
Code : Deletion (Borrowing from prev)
void borrowFromPrev(struct BTreeNode *node, int idx) {
struct BTreeNode *child = node->child[idx];
struct BTreeNode *sibling = node->child[idx - 1];
// Shift all keys in child one step ahead
for (int i = child->count - 1; i >= 0; --i)
child->keys[i + 1] = child->keys[i];
if (!child->leaf) {
for (int i = child->count; i >= 0; --i)
child->child[i + 1] = child->child[i];
}
// Setting child's first key equal to the key from the current node
child->keys[0] = node->keys[idx - 1];
if (!child->leaf)
child->child[0] = sibling->child[sibling->count];
// Moving the last key from sibling to the parent (node)
node->keys[idx - 1] = sibling->keys[sibling->count - 1];
child->count += 1;
sibling->count -= 1;
}
Time complexity :
Full C implementation
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#define M 4
#define MAX_KEYS (M - 1)
#define MIN_KEYS (M / 2 - 1)
struct BTreeNode {
int keys[MAX_KEYS];
struct BTreeNode *child[M];
int count;
bool leaf;
};
// --- Function Prototypes ---
struct BTreeNode* createNode(bool leaf);
void traverse(struct BTreeNode* root);
struct BTreeNode* search(struct BTreeNode* root, int k);
void insert(struct BTreeNode** root, int k);
void insertNonFull(struct BTreeNode* x, int k);
void splitChild(struct BTreeNode* x, int i, struct BTreeNode* y);
void removeKey(struct BTreeNode* root, int k);
void removeFromLeaf(struct BTreeNode* node, int idx);
void removeFromNonLeaf(struct BTreeNode* node, int idx);
int getPredecessor(struct BTreeNode* node, int idx);
int getSuccessor(struct BTreeNode* node, int idx);
void fill(struct BTreeNode* node, int idx);
void borrowFromPrev(struct BTreeNode* node, int idx);
void borrowFromNext(struct BTreeNode* node, int idx);
void merge(struct BTreeNode* node, int idx);
// --- Core Helper: Create Node ---
struct BTreeNode* createNode(bool leaf) {
struct BTreeNode* newNode = (struct BTreeNode*)malloc(sizeof(struct BTreeNode));
newNode->leaf = leaf;
newNode->count = 0;
for (int i = 0; i < M; i++) newNode->child[i] = NULL;
return newNode;
}
// --- INSERTION LOGIC ---
void insert(struct BTreeNode** root, int k) {
struct BTreeNode* r = *root;
if (r->count == MAX_KEYS) {
struct BTreeNode* s = createNode(false);
*root = s;
s->child[0] = r;
splitChild(s, 0, r);
insertNonFull(s, k);
} else {
insertNonFull(r, k);
}
}
void insertNonFull(struct BTreeNode* x, int k) {
int i = x->count - 1;
if (x->leaf) {
while (i >= 0 && x->keys[i] > k) {
x->keys[i + 1] = x->keys[i];
i--;
}
x->keys[i + 1] = k;
x->count++;
} else {
while (i >= 0 && x->keys[i] > k) i--;
if (x->child[i + 1]->count == MAX_KEYS) {
splitChild(x, i + 1, x->child[i + 1]);
if (x->keys[i + 1] < k) i++;
}
insertNonFull(x->child[i + 1], k);
}
}
void splitChild(struct BTreeNode* x, int i, struct BTreeNode* y) {
struct BTreeNode* z = createNode(y->leaf);
z->count = MIN_KEYS;
for (int j = 0; j < MIN_KEYS; j++) z->keys[j] = y->keys[j + MIN_KEYS + 1];
if (!y->leaf) {
for (int j = 0; j <= MIN_KEYS; j++) z->child[j] = y->child[j + MIN_KEYS + 1];
}
y->count = MIN_KEYS;
for (int j = x->count; j >= i + 1; j--) x->child[j + 1] = x->child[j];
x->child[i + 1] = z;
for (int j = x->count - 1; j >= i; j--) x->keys[j + 1] = x->keys[j];
x->keys[i] = y->keys[MIN_KEYS];
x->count++;
}
// --- DELETION LOGIC ---
int findKey(struct BTreeNode* node, int k) {
int idx = 0;
while (idx < node->count && node->keys[idx] < k) ++idx;
return idx;
}
void removeKey(struct BTreeNode* node, int k) {
int idx = findKey(node, k);
if (idx < node->count && node->keys[idx] == k) {
if (node->leaf) removeFromLeaf(node, idx);
else removeFromNonLeaf(node, idx);
} else {
if (node->leaf) return;
bool flag = (idx == node->count);
if (node->child[idx]->count < M / 2) fill(node, idx);
if (flag && idx > node->count) removeKey(node->child[idx - 1], k);
else removeKey(node->child[idx], k);
}
}
void removeFromLeaf(struct BTreeNode* node, int idx) {
for (int i = idx + 1; i < node->count; ++i) node->keys[i - 1] = node->keys[i];
node->count--;
}
void removeFromNonLeaf(struct BTreeNode* node, int idx) {
int k = node->keys[idx];
if (node->child[idx]->count >= M / 2) {
int pred = getPredecessor(node, idx);
node->keys[idx] = pred;
removeKey(node->child[idx], pred);
} else if (node->child[idx + 1]->count >= M / 2) {
int succ = getSuccessor(node, idx);
node->keys[idx] = succ;
removeKey(node->child[idx + 1], succ);
} else {
merge(node, idx);
removeKey(node->child[idx], k);
}
}
int getPredecessor(struct BTreeNode* node, int idx) {
struct BTreeNode* cur = node->child[idx];
while (!cur->leaf) cur = cur->child[cur->count];
return cur->keys[cur->count - 1];
}
int getSuccessor(struct BTreeNode* node, int idx) {
struct BTreeNode* cur = node->child[idx + 1];
while (!cur->leaf) cur = cur->child[0];
return cur->keys[0];
}
void fill(struct BTreeNode* node, int idx) {
if (idx != 0 && node->child[idx - 1]->count >= M / 2) borrowFromPrev(node, idx);
else if (idx != node->count && node->child[idx + 1]->count >= M / 2) borrowFromNext(node, idx);
else {
if (idx != node->count) merge(node, idx);
else merge(node, idx - 1);
}
}
void borrowFromPrev(struct BTreeNode* node, int idx) {
struct BTreeNode* child = node->child[idx];
struct BTreeNode* sibling = node->child[idx - 1];
for (int i = child->count - 1; i >= 0; --i) child->keys[i + 1] = child->keys[i];
if (!child->leaf) {
for (int i = child->count; i >= 0; --i) child->child[i + 1] = child->child[i];
}
child->keys[0] = node->keys[idx - 1];
if (!child->leaf) child->child[0] = sibling->child[sibling->count];
node->keys[idx - 1] = sibling->keys[sibling->count - 1];
child->count += 1;
sibling->count -= 1;
}
void borrowFromNext(struct BTreeNode* node, int idx) {
struct BTreeNode* child = node->child[idx];
struct BTreeNode* sibling = node->child[idx + 1];
child->keys[child->count] = node->keys[idx];
if (!child->leaf) child->child[child->count + 1] = sibling->child[0];
node->keys[idx] = sibling->keys[0];
for (int i = 1; i < sibling->count; ++i) sibling->keys[i - 1] = sibling->keys[i];
if (!sibling->leaf) {
for (int i = 1; i <= sibling->count; ++i) sibling->child[i - 1] = sibling->child[i];
}
child->count += 1;
sibling->count -= 1;
}
void merge(struct BTreeNode* node, int idx) {
struct BTreeNode* child = node->child[idx];
struct BTreeNode* sibling = node->child[idx + 1];
child->keys[MIN_KEYS] = node->keys[idx];
for (int i = 0; i < sibling->count; ++i) child->keys[i + MIN_KEYS + 1] = sibling->keys[i];
if (!child->leaf) {
for (int i = 0; i <= sibling->count; ++i) child->child[i + MIN_KEYS + 1] = sibling->child[i];
}
for (int i = idx + 1; i < node->count; ++i) node->keys[i - 1] = node->keys[i];
for (int i = idx + 2; i <= node->count; ++i) node->child[i - 1] = node->child[i];
child->count += sibling->count + 1;
node->count--;
free(sibling);
}
void traverse(struct BTreeNode* root) {
int i;
for (i = 0; i < root->count; i++) {
if (!root->leaf) traverse(root->child[i]);
printf(" %d", root->keys[i]);
}
if (!root->leaf) traverse(root->child[i]);
}
int main() {
struct BTreeNode* root = createNode(true);
int vals[] = {10, 20, 5, 6, 12, 30, 7, 17};
for(int i=0; i<8; i++) insert(&root, vals[i]);
printf("Traversal: ");
traverse(root);
printf("\n");
removeKey(root, 6);
printf("After deleting 6: ");
traverse(root);
printf("\n");
return 0;
}