PMTiles vs QBTiles: Index Size Comparison¶
Read entries from a real PMTiles file, re-serialize both as PMTiles and QBTiles directories, and compare index sizes.
The tile data is identical in both formats — only the index encoding differs.
Sample file¶

curl -LO https://raw.githubusercontent.com/vuski/qbtiles/main/examples/sample_adm_korea.pmtiles
1. PMTiles Reader (minimal implementation)¶
In [1]:
Copied!
import sys, os, gzip, io, mmap, time
import numpy as np
sys.path.insert(0, os.path.join("..", "..", "src", "python"))
import qbtiles as qbt
def pm_read_varint(b_io):
shift = result = 0
while True:
raw = b_io.read(1)
if raw == b"":
raise EOFError
i = raw[0]
result |= (i & 0x7F) << shift
shift += 7
if not (i & 0x80):
break
return result
class PMEntry:
__slots__ = ("tile_id", "offset", "length", "run_length")
def __init__(self, t, o, l, r):
self.tile_id = t; self.offset = o; self.length = l; self.run_length = r
def deserialize_directory(buf):
b_io = io.BytesIO(gzip.decompress(buf))
entries = []
n = pm_read_varint(b_io)
last_id = 0
for i in range(n):
tmp = pm_read_varint(b_io)
entries.append(PMEntry(last_id + tmp, 0, 0, 0))
last_id += tmp
for i in range(n):
entries[i].run_length = pm_read_varint(b_io)
for i in range(n):
entries[i].length = pm_read_varint(b_io)
for i in range(n):
tmp = pm_read_varint(b_io)
if i > 0 and tmp == 0:
entries[i].offset = entries[i-1].offset + entries[i-1].length
else:
entries[i].offset = tmp - 1
return entries
def deserialize_header(buf):
def r64(p):
return int.from_bytes(buf[p:p+8], "little")
return {
"root_offset": r64(8),
"root_length": r64(16),
"leaf_directory_offset": r64(40),
"leaf_directory_length": r64(48),
"tile_entries_count": r64(80),
}
def get_all_leaf_entries(filename):
"""Collect all leaf entries from a PMTiles file."""
with open(filename, "rb") as f:
mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
header = deserialize_header(mm[0:127])
all_entries = []
def collect(off, length):
d = deserialize_directory(mm[off:off + length])
for e in d:
if e.run_length == 0:
collect(header["leaf_directory_offset"] + e.offset, e.length)
else:
all_entries.append(e)
collect(header["root_offset"], header["root_length"])
mm.close()
return all_entries, header
print("PMTiles reader loaded.")
import sys, os, gzip, io, mmap, time
import numpy as np
sys.path.insert(0, os.path.join("..", "..", "src", "python"))
import qbtiles as qbt
def pm_read_varint(b_io):
shift = result = 0
while True:
raw = b_io.read(1)
if raw == b"":
raise EOFError
i = raw[0]
result |= (i & 0x7F) << shift
shift += 7
if not (i & 0x80):
break
return result
class PMEntry:
__slots__ = ("tile_id", "offset", "length", "run_length")
def __init__(self, t, o, l, r):
self.tile_id = t; self.offset = o; self.length = l; self.run_length = r
def deserialize_directory(buf):
b_io = io.BytesIO(gzip.decompress(buf))
entries = []
n = pm_read_varint(b_io)
last_id = 0
for i in range(n):
tmp = pm_read_varint(b_io)
entries.append(PMEntry(last_id + tmp, 0, 0, 0))
last_id += tmp
for i in range(n):
entries[i].run_length = pm_read_varint(b_io)
for i in range(n):
entries[i].length = pm_read_varint(b_io)
for i in range(n):
tmp = pm_read_varint(b_io)
if i > 0 and tmp == 0:
entries[i].offset = entries[i-1].offset + entries[i-1].length
else:
entries[i].offset = tmp - 1
return entries
def deserialize_header(buf):
def r64(p):
return int.from_bytes(buf[p:p+8], "little")
return {
"root_offset": r64(8),
"root_length": r64(16),
"leaf_directory_offset": r64(40),
"leaf_directory_length": r64(48),
"tile_entries_count": r64(80),
}
def get_all_leaf_entries(filename):
"""Collect all leaf entries from a PMTiles file."""
with open(filename, "rb") as f:
mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
header = deserialize_header(mm[0:127])
all_entries = []
def collect(off, length):
d = deserialize_directory(mm[off:off + length])
for e in d:
if e.run_length == 0:
collect(header["leaf_directory_offset"] + e.offset, e.length)
else:
all_entries.append(e)
collect(header["root_offset"], header["root_length"])
mm.close()
return all_entries, header
print("PMTiles reader loaded.")
PMTiles reader loaded.
2. Read PMTiles file¶
In [2]:
Copied!
import urllib.request
pmtiles_path = os.path.join("..", "..", "examples", "sample_adm_korea.pmtiles")
if not os.path.exists(pmtiles_path):
pmtiles_path = "sample_adm_korea.pmtiles"
if not os.path.exists(pmtiles_path):
url = "https://raw.githubusercontent.com/vuski/qbtiles/main/examples/sample_adm_korea.pmtiles"
print(f"Downloading {url} ...")
urllib.request.urlretrieve(url, pmtiles_path)
file_size = os.path.getsize(pmtiles_path)
print(f"File: {os.path.basename(pmtiles_path)}")
print(f"Size: {file_size / 1024**2:.1f} MB")
t0 = time.time()
entries, header = get_all_leaf_entries(pmtiles_path)
print(f"Entries: {len(entries):,}")
print(f"Read time: {time.time()-t0:.2f}s")
import urllib.request
pmtiles_path = os.path.join("..", "..", "examples", "sample_adm_korea.pmtiles")
if not os.path.exists(pmtiles_path):
pmtiles_path = "sample_adm_korea.pmtiles"
if not os.path.exists(pmtiles_path):
url = "https://raw.githubusercontent.com/vuski/qbtiles/main/examples/sample_adm_korea.pmtiles"
print(f"Downloading {url} ...")
urllib.request.urlretrieve(url, pmtiles_path)
file_size = os.path.getsize(pmtiles_path)
print(f"File: {os.path.basename(pmtiles_path)}")
print(f"Size: {file_size / 1024**2:.1f} MB")
t0 = time.time()
entries, header = get_all_leaf_entries(pmtiles_path)
print(f"Entries: {len(entries):,}")
print(f"Read time: {time.time()-t0:.2f}s")
File: sample_adm_korea.pmtiles Size: 28.6 MB Entries: 36,149 Read time: 0.04s
3. PMTiles index size¶
Actual directory size from file header (root + leaf directories), plus re-serialized as a single flat directory for fair comparison.
In [3]:
Copied!
# Actual PMTiles directory size from header
pm_actual = header["root_length"] + header["leaf_directory_length"]
print(f"PMTiles actual directory: {pm_actual:,} bytes")
# Re-serialize as single flat directory (gzip)
pm_entries = [qbt.Entry(e.tile_id, e.offset, e.length, e.run_length) for e in entries]
pm_gz = qbt.serialize_directory(pm_entries)
pm_gz_size = len(pm_gz)
pm_raw_size = len(gzip.decompress(pm_gz))
print(f"PMTiles re-serialized: {pm_gz_size:,} bytes (gzip), {pm_raw_size:,} bytes (raw)")
# Actual PMTiles directory size from header
pm_actual = header["root_length"] + header["leaf_directory_length"]
print(f"PMTiles actual directory: {pm_actual:,} bytes")
# Re-serialize as single flat directory (gzip)
pm_entries = [qbt.Entry(e.tile_id, e.offset, e.length, e.run_length) for e in entries]
pm_gz = qbt.serialize_directory(pm_entries)
pm_gz_size = len(pm_gz)
pm_raw_size = len(gzip.decompress(pm_gz))
print(f"PMTiles re-serialized: {pm_gz_size:,} bytes (gzip), {pm_raw_size:,} bytes (raw)")
PMTiles actual directory: 82,406 bytes PMTiles re-serialized: 80,891 bytes (gzip), 203,746 bytes (raw)
4. QBTiles index size¶
Convert Hilbert tile IDs → quadkeys, sort, build quadtree, serialize.
In [4]:
Copied!
# Convert tile_id → quadkey_int64
sys.path.insert(0, os.path.join("..", "..", "examples"))
try:
import tileid_encoder
use_cpp = True
except ImportError:
use_cpp = False
t0 = time.time()
if use_cpp:
tile_ids = np.array([e.tile_id for e in entries], dtype=np.uint64)
quadkeys = tileid_encoder.encode_array(tile_ids)
label = "C++"
else:
quadkeys = np.array([qbt.tileid_to_quadkey_int64(e.tile_id) for e in entries], dtype=np.int64)
label = "Python"
lengths = np.array([e.length for e in entries], dtype=np.int64)
run_lengths = np.array([e.run_length for e in entries], dtype=np.int64)
print(f"Quadkey conversion ({label}): {time.time()-t0:.2f}s")
# Sort by quadkey
sort_idx = np.argsort(quadkeys)
quadkeys = quadkeys[sort_idx]
lengths = lengths[sort_idx]
run_lengths = run_lengths[sort_idx]
# Recalculate offsets
offsets = np.concatenate([[0], np.cumsum(lengths[:-1])])
# Build quadkey_info
qk_info = list(zip(
quadkeys.tolist(),
[""] * len(entries),
offsets.tolist(),
lengths.tolist(),
run_lengths.tolist(),
))
# Build quadtree and serialize
t0 = time.time()
root = qbt.build_quadtree(qk_info)
print(f"Tree build: {time.time()-t0:.2f}s")
t0 = time.time()
tmp_path = "_tmp_compare.gz"
qbt.write_tree_bitmask_to_single_file(root, tmp_path)
qb_gz_size = os.path.getsize(tmp_path)
with gzip.open(tmp_path, "rb") as f:
qb_raw_size = len(f.read())
os.remove(tmp_path)
print(f"Serialize: {time.time()-t0:.2f}s")
print(f"\nQBTiles index: {qb_gz_size:,} bytes (gzip), {qb_raw_size:,} bytes (raw)")
# Convert tile_id → quadkey_int64
sys.path.insert(0, os.path.join("..", "..", "examples"))
try:
import tileid_encoder
use_cpp = True
except ImportError:
use_cpp = False
t0 = time.time()
if use_cpp:
tile_ids = np.array([e.tile_id for e in entries], dtype=np.uint64)
quadkeys = tileid_encoder.encode_array(tile_ids)
label = "C++"
else:
quadkeys = np.array([qbt.tileid_to_quadkey_int64(e.tile_id) for e in entries], dtype=np.int64)
label = "Python"
lengths = np.array([e.length for e in entries], dtype=np.int64)
run_lengths = np.array([e.run_length for e in entries], dtype=np.int64)
print(f"Quadkey conversion ({label}): {time.time()-t0:.2f}s")
# Sort by quadkey
sort_idx = np.argsort(quadkeys)
quadkeys = quadkeys[sort_idx]
lengths = lengths[sort_idx]
run_lengths = run_lengths[sort_idx]
# Recalculate offsets
offsets = np.concatenate([[0], np.cumsum(lengths[:-1])])
# Build quadkey_info
qk_info = list(zip(
quadkeys.tolist(),
[""] * len(entries),
offsets.tolist(),
lengths.tolist(),
run_lengths.tolist(),
))
# Build quadtree and serialize
t0 = time.time()
root = qbt.build_quadtree(qk_info)
print(f"Tree build: {time.time()-t0:.2f}s")
t0 = time.time()
tmp_path = "_tmp_compare.gz"
qbt.write_tree_bitmask_to_single_file(root, tmp_path)
qb_gz_size = os.path.getsize(tmp_path)
with gzip.open(tmp_path, "rb") as f:
qb_raw_size = len(f.read())
os.remove(tmp_path)
print(f"Serialize: {time.time()-t0:.2f}s")
print(f"\nQBTiles index: {qb_gz_size:,} bytes (gzip), {qb_raw_size:,} bytes (raw)")
Quadkey conversion (C++): 0.01s
Tree build: 0.12s
Serialize: 0.06s QBTiles index: 61,251 bytes (gzip), 150,534 bytes (raw)
5. Comparison¶
In [5]:
Copied!
def fmt(n):
if n >= 1024*1024:
return f"{n/1024/1024:.1f} MB"
elif n >= 1024:
return f"{n/1024:.1f} KB"
return f"{n} B"
print(f"File: {os.path.basename(pmtiles_path)}")
print(f"Entries: {len(entries):,}")
print()
print(f"{'':20} {'Gzip':>12} {'Raw':>12}")
print("-" * 48)
print(f"{'PMTiles':20} {fmt(pm_gz_size):>12} {fmt(pm_raw_size):>12}")
print(f"{'QBTiles':20} {fmt(qb_gz_size):>12} {fmt(qb_raw_size):>12}")
print("-" * 48)
gz_ratio = (qb_gz_size / pm_gz_size - 1) * 100
raw_ratio = (qb_raw_size / pm_raw_size - 1) * 100
print(f"{'QBTiles / PMTiles':20} {gz_ratio:>+11.1f}% {raw_ratio:>+11.1f}%")
print()
bpe_pm = pm_gz_size / len(entries)
bpe_qb = qb_gz_size / len(entries)
print(f"Bytes per entry (gzip): PMTiles {bpe_pm:.2f} B, QBTiles {bpe_qb:.2f} B")
def fmt(n):
if n >= 1024*1024:
return f"{n/1024/1024:.1f} MB"
elif n >= 1024:
return f"{n/1024:.1f} KB"
return f"{n} B"
print(f"File: {os.path.basename(pmtiles_path)}")
print(f"Entries: {len(entries):,}")
print()
print(f"{'':20} {'Gzip':>12} {'Raw':>12}")
print("-" * 48)
print(f"{'PMTiles':20} {fmt(pm_gz_size):>12} {fmt(pm_raw_size):>12}")
print(f"{'QBTiles':20} {fmt(qb_gz_size):>12} {fmt(qb_raw_size):>12}")
print("-" * 48)
gz_ratio = (qb_gz_size / pm_gz_size - 1) * 100
raw_ratio = (qb_raw_size / pm_raw_size - 1) * 100
print(f"{'QBTiles / PMTiles':20} {gz_ratio:>+11.1f}% {raw_ratio:>+11.1f}%")
print()
bpe_pm = pm_gz_size / len(entries)
bpe_qb = qb_gz_size / len(entries)
print(f"Bytes per entry (gzip): PMTiles {bpe_pm:.2f} B, QBTiles {bpe_qb:.2f} B")
File: sample_adm_korea.pmtiles
Entries: 36,149
Gzip Raw
------------------------------------------------
PMTiles 79.0 KB 199.0 KB
QBTiles 59.8 KB 147.0 KB
------------------------------------------------
QBTiles / PMTiles -24.3% -26.1%
Bytes per entry (gzip): PMTiles 2.24 B, QBTiles 1.69 B