Significantly improved performance More compact data representation Ability to work with very large data sets while avoiding nasty GC pauses[1]

Suitable Example - Trade Data

Traditional Java Approach

public class TestJavaMemoryLayout { private static final int NUM_RECORDS = 50 * 1000 * 1000; private static JavaMemoryTrade[] trades; public static void main(final String[] args) { for (int i = 0; i < 5; i++) { System.gc(); perfRun(i); } } private static void perfRun(final int runNum) { long start = System.currentTimeMillis(); init(); System.out.format("Memory %,d total, %,d free

", Runtime.getRuntime().totalMemory(), Runtime.getRuntime().freeMemory()); long buyCost = 0; long sellCost = 0; for (int i = 0; i < NUM_RECORDS; i++) { final JavaMemoryTrade trade = get(i); if (trade.getSide() == 'B') { buyCost += (trade.getPrice() * trade.getQuantity()); } else { sellCost += (trade.getPrice() * trade.getQuantity()); } } long duration = System.currentTimeMillis() - start; System.out.println(runNum + " - duration " + duration + "ms"); System.out.println("buyCost = " + buyCost + " sellCost = " + sellCost); } private static JavaMemoryTrade get(final int index) { return trades[index]; } public static void init() { trades = new JavaMemoryTrade[NUM_RECORDS]; final byte[] londonStockExchange = {'X', 'L', 'O', 'N'}; final int venueCode = pack(londonStockExchange); final byte[] billiton = {'B', 'H', 'P'}; final int instrumentCode = pack( billiton); for (int i = 0; i < NUM_RECORDS; i++) { JavaMemoryTrade trade = new JavaMemoryTrade(); trades[i] = trade; trade.setTradeId(i); trade.setClientId(1); trade.setVenueCode(venueCode); trade.setInstrumentCode(instrumentCode); trade.setPrice(i); trade.setQuantity(i); trade.setSide((i & 1) == 0 ? 'B' : 'S'); } } private static int pack(final byte[] value) { int result = 0; switch (value.length) { case 4: result = (value[3]); case 3: result |= ((int)value[2] << 8); case 2: result |= ((int)value[1] << 16); case 1: result |= ((int)value[0] << 24); break; default: throw new IllegalArgumentException("Invalid array size"); } return result; } private static class JavaMemoryTrade { private long tradeId; private long clientId; private int venueCode; private int instrumentCode; private long price; private long quantity; private char side; public long getTradeId() { return tradeId; } public void setTradeId(final long tradeId) { this.tradeId = tradeId; } public long getClientId() { return clientId; } public void setClientId(final long clientId) { this.clientId = clientId; } public int getVenueCode() { return venueCode; } public void setVenueCode(final int venueCode) { this.venueCode = venueCode; } public int getInstrumentCode() { return instrumentCode; } public void setInstrumentCode(final int instrumentCode) { this.instrumentCode = instrumentCode; } public long getPrice() { return price; } public void setPrice(final long price) { this.price = price; } public long getQuantity() { return quantity; } public void setQuantity(final long quantity) { this.quantity = quantity; } public char getSide() { return side; } public void setSide(final char side) { this.side = side; } } }

Compact Off-Heap Structures

import sun.misc.Unsafe; import java.lang.reflect.Field; public class TestDirectMemoryLayout { private static final Unsafe unsafe; static { try { Field field = Unsafe.class.getDeclaredField("theUnsafe"); field.setAccessible(true); unsafe = (Unsafe)field.get(null); } catch (Exception e) { throw new RuntimeException(e); } } private static final int NUM_RECORDS = 50 * 1000 * 1000; private static long address; private static final DirectMemoryTrade flyweight = new DirectMemoryTrade(); public static void main(final String[] args) { for (int i = 0; i < 5; i++) { System.gc(); perfRun(i); } } private static void perfRun(final int runNum) { long start = System.currentTimeMillis(); init(); System.out.format("Memory %,d total, %,d free

", Runtime.getRuntime().totalMemory(), Runtime.getRuntime().freeMemory()); long buyCost = 0; long sellCost = 0; for (int i = 0; i < NUM_RECORDS; i++) { final DirectMemoryTrade trade = get(i); if (trade.getSide() == 'B') { buyCost += (trade.getPrice() * trade.getQuantity()); } else { sellCost += (trade.getPrice() * trade.getQuantity()); } } long duration = System.currentTimeMillis() - start; System.out.println(runNum + " - duration " + duration + "ms"); System.out.println("buyCost = " + buyCost + " sellCost = " + sellCost); destroy(); } private static DirectMemoryTrade get(final int index) { final long offset = address + (index * DirectMemoryTrade.getObjectSize()); flyweight.setObjectOffset(offset); return flyweight; } public static void init() { final long requiredHeap = NUM_RECORDS * DirectMemoryTrade.getObjectSize(); address = unsafe.allocateMemory(requiredHeap); final byte[] londonStockExchange = {'X', 'L', 'O', 'N'}; final int venueCode = pack(londonStockExchange); final byte[] billiton = {'B', 'H', 'P'}; final int instrumentCode = pack( billiton); for (int i = 0; i < NUM_RECORDS; i++) { DirectMemoryTrade trade = get(i); trade.setTradeId(i); trade.setClientId(1); trade.setVenueCode(venueCode); trade.setInstrumentCode(instrumentCode); trade.setPrice(i); trade.setQuantity(i); trade.setSide((i & 1) == 0 ? 'B' : 'S'); } } private static void destroy() { unsafe.freeMemory(address); } private static int pack(final byte[] value) { int result = 0; switch (value.length) { case 4: result |= (value[3]); case 3: result |= ((int)value[2] << 8); case 2: result |= ((int)value[1] << 16); case 1: result |= ((int)value[0] << 24); break; default: throw new IllegalArgumentException("Invalid array size"); } return result; } private static class DirectMemoryTrade { private static long offset = 0; private static final long tradeIdOffset = offset += 0; private static final long clientIdOffset = offset += 8; private static final long venueCodeOffset = offset += 8; private static final long instrumentCodeOffset = offset += 4; private static final long priceOffset = offset += 4; private static final long quantityOffset = offset += 8; private static final long sideOffset = offset += 8; private static final long objectSize = offset += 2; private long objectOffset; public static long getObjectSize() { return objectSize; } void setObjectOffset(final long objectOffset) { this.objectOffset = objectOffset; } public long getTradeId() { return unsafe.getLong(objectOffset + tradeIdOffset); } public void setTradeId(final long tradeId) { unsafe.putLong(objectOffset + tradeIdOffset, tradeId); } public long getClientId() { return unsafe.getLong(objectOffset + clientIdOffset); } public void setClientId(final long clientId) { unsafe.putLong(objectOffset + clientIdOffset, clientId); } public int getVenueCode() { return unsafe.getInt(objectOffset + venueCodeOffset); } public void setVenueCode(final int venueCode) { unsafe.putInt(objectOffset + venueCodeOffset, venueCode); } public int getInstrumentCode() { return unsafe.getInt(objectOffset + instrumentCodeOffset); } public void setInstrumentCode(final int instrumentCode) { unsafe.putInt(objectOffset + instrumentCodeOffset, instrumentCode); } public long getPrice() { return unsafe.getLong(objectOffset + priceOffset); } public void setPrice(final long price) { unsafe.putLong(objectOffset + priceOffset, price); } public long getQuantity() { return unsafe.getLong(objectOffset + quantityOffset); } public void setQuantity(final long quantity) { unsafe.putLong(objectOffset + quantityOffset, quantity); } public char getSide() { return unsafe.getChar(objectOffset + sideOffset); } public void setSide(final char side) { unsafe.putChar(objectOffset + sideOffset, side); } } }

Results

Intel i7-860 @ 2.8GHz, 8GB RAM DDR3 1333MHz, Windows 7 64-bit, Java 1.7.0_07 ============================================= java -server -Xms4g -Xmx4g TestJavaMemoryLayout Memory 4,116,054,016 total, 1,108,901,104 free 0 - duration 19334ms Memory 4,116,054,016 total, 1,109,964,752 free 1 - duration 14295ms Memory 4,116,054,016 total, 1,108,455,504 free 2 - duration 14272ms Memory 3,817,799,680 total, 815,308,600 free 3 - duration 28358ms Memory 3,817,799,680 total, 810,552,816 free 4 - duration 32487ms java -server TestDirectMemoryLayout Memory 128,647,168 total, 126,391,384 free 0 - duration 983ms Memory 128,647,168 total, 126,992,160 free 1 - duration 958ms Memory 128,647,168 total, 127,663,408 free 2 - duration 873ms Memory 128,647,168 total, 127,663,408 free 3 - duration 886ms Memory 128,647,168 total, 127,663,408 free 4 - duration 884ms Intel i7-2760QM @ 2.40GHz, 8GB RAM DDR3 1600MHz, Linux 3.4.11 kernel 64-bit, Java 1.7.0_07 ================================================= java -server -Xms4g -Xmx4g TestJavaMemoryLayout Memory 4,116,054,016 total, 1,108,912,960 free 0 - duration 12262ms Memory 4,116,054,016 total, 1,109,962,832 free 1 - duration 9822ms Memory 4,116,054,016 total, 1,108,458,720 free 2 - duration 10239ms Memory 3,817,799,680 total, 815,307,640 free 3 - duration 21558ms Memory 3,817,799,680 total, 810,551,856 free 4 - duration 23074ms java -server TestDirectMemoryLayout Memory 123,994,112 total, 121,818,528 free 0 - duration 634ms Memory 123,994,112 total, 122,455,944 free 1 - duration 619ms Memory 123,994,112 total, 123,103,320 free 2 - duration 546ms Memory 123,994,112 total, 123,103,320 free 3 - duration 547ms Memory 123,994,112 total, 123,103,320 free 4 - duration 534ms

Analysis

memory required = total memory - free memory - base JVM needs

2,883,248,712 =

3,817,799,680 - 810,551,856

- 123,999,112

System.gc()

-XX:+PrintGC -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintTenuringDistribution -XX:+PrintHeapAtGC -XX:+ PrintGCApplicationConcurrentTi me -XX:+ PrintGCApplicationStoppedTime -XX:+PrintSafepointStatistics





With System.gc() before each run ================================ Total time for which application threads were stopped: 0.0085280 seconds Total time for which application threads were stopped: 0.7280530 seconds Total time for which application threads were stopped: 8.1703460 seconds Total time for which application threads were stopped: 5.6112210 seconds Total time for which application threads were stopped: 1.2531370 seconds Total time for which application threads were stopped: 7.6392250 seconds Total time for which application threads were stopped: 5.7847050 seconds Total time for which application threads were stopped: 1.3070470 seconds Total time for which application threads were stopped: 8.2520880 seconds Total time for which application threads were stopped: 6.0949910 seconds Total time for which application threads were stopped: 1.3988480 seconds Total time for which application threads were stopped: 8.1793240 seconds Total time for which application threads were stopped: 6.4138720 seconds Total time for which application threads were stopped: 4.4991670 seconds Total time for which application threads were stopped: 4.5612290 seconds Total time for which application threads were stopped: 0.3598490 seconds Total time for which application threads were stopped: 0.7111000 seconds Total time for which application threads were stopped: 1.4426750 seconds Total time for which application threads were stopped: 1.5931500 seconds Total time for which application threads were stopped: 10.9484920 seconds Total time for which application threads were stopped: 7.0707230 seconds Without System.gc() before each run =================================== Test run times 0 - duration 12120ms 1 - duration 9439ms 2 - duration 9844ms 3 - duration 20933ms 4 - duration 23041ms Total time for which application threads were stopped: 0.0170860 seconds Total time for which application threads were stopped: 0.7915350 seconds Total time for which application threads were stopped: 10.7153320 seconds Total time for which application threads were stopped: 5.6234650 seconds Total time for which application threads were stopped: 1.2689950 seconds Total time for which application threads were stopped: 7.6238170 seconds Total time for which application threads were stopped: 6.0114540 seconds Total time for which application threads were stopped: 1.2990070 seconds Total time for which application threads were stopped: 7.9918480 seconds Total time for which application threads were stopped: 5.9997920 seconds Total time for which application threads were stopped: 1.3430040 seconds Total time for which application threads were stopped: 8.0759940 seconds Total time for which application threads were stopped: 6.3980610 seconds Total time for which application threads were stopped: 4.5572100 seconds Total time for which application threads were stopped: 4.6193830 seconds Total time for which application threads were stopped: 0.3877930 seconds Total time for which application threads were stopped: 0.7429270 seconds Total time for which application threads were stopped: 1.5248070 seconds Total time for which application threads were stopped: 1.5312130 seconds Total time for which application threads were stopped: 10.9120250 seconds Total time for which application threads were stopped: 7.3528590 seconds

Side Note On Serialization

Conclusion