Market Making with Alpha - Basis

Note: This example is for educational purposes only and demonstrates effective strategies for high-frequency market-making schemes. All backtests are based on a 0.005% rebate, the highest market maker rebate available on Binance Futures. See Binance Upgrades USDⓢ-Margined Futures Liquidity Provider Program for more details.

[1]:
import datetime
import os

import numpy as np

from numba import njit, uint64
from numba.typed import Dict

from hftbacktest import (
    BacktestAsset,
    ROIVectorMarketDepthBacktest,
    GTX,
    LIMIT,
    BUY,
    SELL,
    BUY_EVENT,
    SELL_EVENT,
    Recorder
)
from hftbacktest.stats import LinearAssetRecord

import polars as pl

Download L1 (book ticker) data to calculate the basis between spot and futures.

[3]:
start_date = datetime.datetime.strptime('20240901', '%Y%m%d')
end_date = datetime.datetime.strptime('20241031', '%Y%m%d')
tardis_token = ""
[4]:
def download_from_tardis(exchange, stream, symbol, start_date, end_date, output_path, token):
    date = start_date
    while date <= end_date:
        yyyymmdd = date.strftime('%Y%m%d')
        year = yyyymmdd[:4]
        month = yyyymmdd[4:6]
        day = yyyymmdd[6:]
        output_file = os.path.join(output_path, f'{symbol}_{yyyymmdd}.csv.gz')
        header = f'"Authorization: Bearer {token}"'
        !wget --header={header} https://datasets.tardis.dev/v1/{exchange}/{stream}/{year}/{month}/{day}/{symbol}.csv.gz -O {output_file}
        date += datetime.timedelta(days=1)
[5]:
download_from_tardis('binance', 'book_ticker', 'BTCUSDT', start_date, end_date, 'spot/book_ticker/BTCUSDT', tardis_token)
download_from_tardis('binance-futures', 'book_ticker', 'BTCUSDT', start_date, end_date, 'usdm/book_ticker/BTCUSDT', tardis_token)

Precompute the basis for faster backtesting.

[7]:
def load_bookticker(file):
    return pl.read_csv(file, schema={
        'exchange': pl.String,
        'symbol': pl.String,
        'timestamp': pl.Int64,
        'local_timestamp': pl.Int64,
        'ask_amount': pl.Float64,
        'ask_price': pl.Float64,
        'bid_price': pl.Float64,
        'bid_amount': pl.Float64
    }).with_columns(
        pl.col('local_timestamp').cast(pl.Datetime),
        mid_price = (.5 * (pl.col('bid_price') + pl.col('ask_price'))),
    ).select(['local_timestamp', 'mid_price'])

def prepare_px_basis(spot_file, futures_file, sampling_interval, rolling_window):
    spot = load_bookticker(spot_file)
    futures = load_bookticker(futures_file)

    # Resamples prices to calculate the basis.
    spot_rs = spot.group_by_dynamic(
        index_column='local_timestamp',
        every=sampling_interval
    ).agg(
        pl.col('mid_price').last()
    ).upsample(
        time_column='local_timestamp',
        every=sampling_interval
    ).select(pl.all().forward_fill())

    futures_rs = futures.group_by_dynamic(
        index_column='local_timestamp',
        every=sampling_interval
    ).agg(
        pl.col('mid_price').last(),
    ).upsample(
        time_column='local_timestamp',
        every=sampling_interval
    ).select(pl.all().forward_fill())

    return spot_rs.join(
        futures_rs,
        left_on='local_timestamp',
        right_on='local_timestamp',
        how='full'
    ).with_columns(
        rolling_mean_basis=(
            pl.col('mid_price_right').forward_fill() - pl.col('mid_price').forward_fill() # Computes the basis
        ).rolling_mean(window_size=rolling_window), # Computes the moving average of the basis over the given window.
    ).select(
        local_timestamp=pl.col('local_timestamp').dt.timestamp('ns'),
        spot=pl.col('mid_price'),
        basis=pl.col('rolling_mean_basis')
    )
[8]:
data = []
date = start_date
while date <= end_date:
    data.append(prepare_px_basis(
        f'spot/book_ticker/BTCUSDT/BTCUSDT_{date.strftime("%Y%m%d")}.csv.gz',
        f'usdm/book_ticker/BTCUSDT/BTCUSDT_{date.strftime("%Y%m%d")}.csv.gz',
        '100ms',
        3000 # 5-minute
    ).to_numpy())
    date += datetime.timedelta(days=1)
precompute_data = np.concatenate(data, axis=0)
[9]:
np.savez_compressed('px_basis_BTCUSDT_5m', data=precompute_data)
[10]:
precompute_data = np.load('px_basis_BTCUSDT_5m.npz')['data']

A market-making model based on the basis. Since the basis is often considered stationary, various time series analysis techniques, such as MA, AR, ARMA and etc, can be applied. Here, the simplest model, the Moving Average, is used for demonstration. This approach assumes that the basis will revert to the average of a given past period.

[12]:
@njit
def basis_mm(
    hbt,
    stat,
    half_spread,
    skew,
    precompute_data,
    interval,
    order_qty_dollar,
    max_position_dollar,
    grid_num,
    grid_interval,
    roi_lb,
    roi_ub
):
    asset_no = 0

    tick_size = hbt.depth(0).tick_size
    lot_size = hbt.depth(0).lot_size

    roi_lb_tick = int(round(roi_lb / tick_size))
    roi_ub_tick = int(round(roi_ub / tick_size))

    data_i = 0
    last_spot = np.nan
    last_basis = np.nan

    while hbt.elapse(interval) == 0:
        hbt.clear_inactive_orders(asset_no)

        depth = hbt.depth(asset_no)
        position = hbt.position(asset_no)
        orders = hbt.orders(asset_no)

        best_bid = depth.best_bid
        best_ask = depth.best_ask

        mid_price = (best_bid + best_ask) / 2.0

        #--------------------------------------------------------
        # Computes bid price and ask price.

        order_qty = max(round((order_qty_dollar / mid_price) / lot_size) * lot_size, lot_size)

        normalized_position = position / order_qty

        relative_bid_depth = half_spread + skew * normalized_position
        relative_ask_depth = half_spread - skew * normalized_position

        # Reads the latest observable spot price and basis from the precomputed data.
        while data_i < len(precompute_data):
            if precompute_data[data_i, 0] > hbt.current_timestamp:
                if data_i > 0:
                    last_spot = precompute_data[data_i - 1, 1]
                    last_basis = precompute_data[data_i - 1, 2]
                break
            data_i += 1
        # Our fair price is calculated as the spot price + the rolling average of the basis
        fair_px = last_spot + last_basis

        bid_price = min(fair_px * (1.0 - relative_bid_depth), best_bid)
        ask_price = max(fair_px * (1.0 + relative_ask_depth), best_ask)

        bid_price = np.floor(bid_price / tick_size) * tick_size
        ask_price = np.ceil(ask_price / tick_size) * tick_size

        #--------------------------------------------------------
        # Updates quotes.

        # Creates a new grid for buy orders.
        new_bid_orders = Dict.empty(np.uint64, np.float64)
        if position * mid_price < max_position_dollar and np.isfinite(bid_price):
            for i in range(grid_num):
                bid_price_tick = round(bid_price / tick_size)

                # order price in tick is used as order id.
                new_bid_orders[uint64(bid_price_tick)] = bid_price

                bid_price -= grid_interval

        # Creates a new grid for sell orders.
        new_ask_orders = Dict.empty(np.uint64, np.float64)
        if position * mid_price > -max_position_dollar and np.isfinite(ask_price):
            for i in range(grid_num):
                ask_price_tick = round(ask_price / tick_size)

                # order price in tick is used as order id.
                new_ask_orders[uint64(ask_price_tick)] = ask_price

                ask_price += grid_interval

        order_values = orders.values();
        while order_values.has_next():
            order = order_values.get()
            # Cancels if a working order is not in the new grid.
            if order.cancellable:
                if (
                    (order.side == BUY and order.order_id not in new_bid_orders)
                    or (order.side == SELL and order.order_id not in new_ask_orders)
                ):
                    hbt.cancel(asset_no, order.order_id, False)

        for order_id, order_price in new_bid_orders.items():
            # Posts a new buy order if there is no working order at the price on the new grid.
            if order_id not in orders:
                hbt.submit_buy_order(asset_no, order_id, order_price, order_qty, GTX, LIMIT, False)

        for order_id, order_price in new_ask_orders.items():
            # Posts a new sell order if there is no working order at the price on the new grid.
            if order_id not in orders:
                hbt.submit_sell_order(asset_no, order_id, order_price, order_qty, GTX, LIMIT, False)

        # Records the current state for stat calculation.
        stat.record(hbt)
[13]:
%%time

roi_lb = 10000
roi_ub = 90000

latency_data = []
date = start_date
while date <= end_date:
    latency_data.append('latency/order_latency_{}.npz'.format(date.strftime('%Y%m%d')))
    date += datetime.timedelta(days=1)

data = []
date = start_date
while date <= end_date:
    data.append('data2/btcusdt_{}.npz'.format(date.strftime('%Y%m%d')))
    date += datetime.timedelta(days=1)

asset = (
    BacktestAsset()
        .data(data)
        .initial_snapshot('data2/btcusdt_20240831_eod.npz')
        .linear_asset(1.0)
        .intp_order_latency(latency_data)
        .power_prob_queue_model(3)
        .no_partial_fill_exchange()
        .trading_value_fee_model(-0.00005, 0.0007)
        .tick_size(0.1)
        .lot_size(0.001)
        .roi_lb(roi_lb)
        .roi_ub(roi_ub)
)

hbt = ROIVectorMarketDepthBacktest([asset])

recorder = Recorder(1, 60_000_000)

half_spread = 0.0003 # a ratio relative to the fair price
skew = half_spread / 20
interval = 100_000_000 # in nanoseconds. 100ms
order_qty_dollar = 50_000
max_position_dollar = order_qty_dollar * 20
grid_num = 1
grid_interval = hbt.depth(0).tick_size

basis_mm(
    hbt,
    recorder.recorder,
    half_spread,
    skew,
    precompute_data,
    interval,
    order_qty_dollar,
    max_position_dollar,
    grid_num,
    grid_interval,
    roi_lb,
    roi_ub
)

hbt.close()

recorder.to_npz('stats/underlying_btcusdt_basis_5m.npz')
CPU times: user 1h 2min 41s, sys: 1min 45s, total: 1h 4min 27s
Wall time: 40min 22s
[14]:
data = np.load('stats/underlying_btcusdt_basis_5m.npz')['0']
stats = (
    LinearAssetRecord(data)
        .resample('5m')
        .stats(book_size=1_000_000)
)
stats.summary()
[14]:
shape: (1, 11)
startendSRSortinoReturnMaxDrawdownDailyNumberOfTradesDailyTurnoverReturnOverMDDReturnOverTradeMaxPositionValue
datetime[μs]datetime[μs]f64f64f64f64f64f64f64f64f64
2024-09-01 00:00:002024-10-31 23:55:003.2809364.3800480.051660.024406537.70273826.8850722.1167010.0000321.0409e6
[15]:
stats.plot()
../_images/tutorials_Market_Making_with_Alpha_-_Basis_15_0.png

On Binance, the BTCFDUSD spot market has a higher trading volume than the BTCUSDT spot market. BTCFDUSD records a daily trading volume of \$3 billion, while BTCUSDT has \$2.5 billion. Alternatively, you may consider using the exact index rather than a specific spot.

You can find the weights composing the index using the API. https://developers.binance.com/docs/derivatives/usds-margined-futures/market-data/rest-api/Composite-Index-Symbol-Information

[17]:
download_from_tardis('binance', 'book_ticker', 'BTCFDUSD', start_date, end_date, 'spot/book_ticker/BTCFDUSD', tardis_token)
[18]:
data = []
date = start_date
while date <= end_date:
    data.append(prepare_px_basis(
        f'spot/book_ticker/BTCFDUSD/BTCFDUSD_{date.strftime("%Y%m%d")}.csv.gz',
        f'usdm/book_ticker/BTCUSDT/BTCUSDT_{date.strftime("%Y%m%d")}.csv.gz',
        '100ms',
        3000 # 5-minute
    ).to_numpy())
    date += datetime.timedelta(days=1)
precompute_data = np.concatenate(data, axis=0)
[19]:
np.savez_compressed('px_basis_BTCFDUSD_5m', data=precompute_data)
[20]:
precompute_data = np.load('px_basis_BTCFDUSD_5m.npz')['data']
[21]:
%%time

roi_lb = 10000
roi_ub = 90000

latency_data = []
date = start_date
while date <= end_date:
    latency_data.append('latency/order_latency_{}.npz'.format(date.strftime('%Y%m%d')))
    date += datetime.timedelta(days=1)

data = []
date = start_date
while date <= end_date:
    data.append('data2/btcusdt_{}.npz'.format(date.strftime('%Y%m%d')))
    date += datetime.timedelta(days=1)

asset = (
    BacktestAsset()
        .data(data)
        .initial_snapshot('data2/btcusdt_20240831_eod.npz')
        .linear_asset(1.0)
        .intp_order_latency(latency_data)
        .power_prob_queue_model(3)
        .no_partial_fill_exchange()
        .trading_value_fee_model(-0.00005, 0.0007)
        .tick_size(0.1)
        .lot_size(0.001)
        .roi_lb(roi_lb)
        .roi_ub(roi_ub)
)

hbt = ROIVectorMarketDepthBacktest([asset])

recorder = Recorder(1, 60_000_000)

half_spread = 0.0003 # a ratio relative to the fair price
skew = half_spread / 20
interval = 100_000_000 # in nanoseconds. 100ms
order_qty_dollar = 50_000
max_position_dollar = order_qty_dollar * 20
grid_num = 1
grid_interval = hbt.depth(0).tick_size

basis_mm(
    hbt,
    recorder.recorder,
    half_spread,
    skew,
    precompute_data,
    interval,
    order_qty_dollar,
    max_position_dollar,
    grid_num,
    grid_interval,
    roi_lb,
    roi_ub
)

hbt.close()

recorder.to_npz('stats/underlying_btcfdusd_basis_5m.npz')
CPU times: user 1h 5min 24s, sys: 1min 50s, total: 1h 7min 14s
Wall time: 42min 59s
[22]:
data = np.load('stats/underlying_btcfdusd_basis_5m.npz')['0']
stats = (
    LinearAssetRecord(data)
        .resample('5m')
        .stats(book_size=1_000_000)
)
stats.summary()
[22]:
shape: (1, 11)
startendSRSortinoReturnMaxDrawdownDailyNumberOfTradesDailyTurnoverReturnOverMDDReturnOverTradeMaxPositionValue
datetime[μs]datetime[μs]f64f64f64f64f64f64f64f64f64
2024-09-01 00:00:002024-10-31 23:55:002.0696842.6475960.0452280.047641479.04366123.9521890.9493370.0000311.0376e6
[23]:
stats.plot()
../_images/tutorials_Market_Making_with_Alpha_-_Basis_23_0.png