/*
 * %CopyrightBegin%
 *
 * Copyright Ericsson AB 2014-2024. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * %CopyrightEnd%
 *
 * hashmaps are an adaption of Rich Hickeys Persistent HashMaps
 *   which were an adaption of Phil Bagwells - Hash Array Mapped Tries
 *
 * Author: Björn-Egil Dahlberg
 */

#ifdef HAVE_CONFIG_H
#  include "config.h"
#endif

#include "sys.h"
#include "erl_vm.h"
#include "global.h"
#include "erl_process.h"
#include "error.h"
#include "bif.h"
#include "erl_binary.h"
#include "erl_global_literals.h"

#include "erl_map.h"

/* BIFs
 *
 * DONE:
 * - erlang:is_map/1
 * - erlang:is_map_key/2
 * - erlang:map_size/1
 * - erlang:map_get/2
 *
 * - maps:find/2
 * - maps:from_list/1
 * - maps:get/2
 * - maps:is_key/2
 * - maps:keys/1
 * - maps:merge/2
 * - maps:new/0
 * - maps:put/3
 * - maps:remove/2
 * - maps:take/2
 * - maps:to_list/1
 * - maps:update/3
 * - maps:values/1
 *
 * TODO:
 * - maps:foldl/3
 * - maps:foldr/3
 * - maps:map/3
 * - maps:size/1
 * - maps:without/2
 *
 * DEBUG: for sharing calculation
 * - erts_internal:map_to_tuple_keys/1
 */

#ifndef DECL_AM
#define DECL_AM(S) Eterm AM_ ## S = am_atom_put(#S, sizeof(#S) - 1)
#endif

/* for hashmap_from_list/1 */
typedef struct {
    erts_ihash_t hx;
    Uint skip;
    Uint i;
    Eterm  val;
} hxnode_t;

/* Reverses the path element/slot order of `hash` */
static ERTS_INLINE erts_ihash_t swizzle_map_hash(erts_ihash_t hash);

static Eterm flatmap_merge(Process *p, Eterm nodeA, Eterm nodeB);
static BIF_RETTYPE map_merge_mixed(Process *p, Eterm flat, Eterm tree, int swap_args);
struct HashmapMergeContext_;
static BIF_RETTYPE hashmap_merge(Process *p, Eterm nodeA, Eterm nodeB, int swap_args,
                                 struct HashmapMergeContext_*);
static Export hashmap_merge_trap_export;
static BIF_RETTYPE maps_merge_trap_1(BIF_ALIST_1);
static Uint hashmap_subtree_size(Eterm node);
static Eterm hashmap_delete(Process *p, erts_ihash_t hx, Eterm key, Eterm node, Eterm *value);
static Eterm flatmap_from_validated_list(Process *p, Eterm list, Eterm fill_value, Uint size);
static Eterm hashmap_from_unsorted_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, int reject_dupkeys, ErtsAlcType_t temp_memory_allocator);
static Eterm hashmap_from_sorted_unique_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, ErtsAlcType_t temp_memory_allocator);
static Eterm hashmap_from_chunked_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, Uint size, ErtsAlcType_t temp_memory_allocator);
static Eterm hashmap_info(Process *p, Eterm node);
static Eterm hashmap_bld_tuple_uint(Uint **hpp, Uint *szp, Uint n, Uint nums[]);
static int hxnodecmp(const void* a, const void* b);
static int hxnodecmpkey(const void* a, const void* b);
#define cdepth(V1,V2)     (hashmap_clz((V1) ^ (V2)) >> 2)
#define maskval(V,L)      (((V) >> (((HAMT_MAX_LEVEL - 1) - (L)) * 4)) & 0xF)
#define DBG_PRINT(X)
/*erts_printf X*/
#define HALLOC_EXTRA 200

/* *******************************
 * ** Yielding C Fun (YCF) Note **
 * *******************************
 *
 * Yielding versions of some of the functions in this file are
 * generated by YCF. These generated functions are placed in the file
 * "erl_map.ycf.h" by the ERTS build system. The generation of
 * "erl_map.ycf.h" is defined in "$ERL_TOP/erts/emulator/Makefile.in".
 *
 * See "$ERL_TOP/erts/emulator/internal_doc/AutomaticYieldingOfCCode.md"
 * and "$ERL_TOP/erts/lib_src/yielding_c_fun/README.md" for more
 * information about YCF and the limitation that YCF imposes on the
 * code that it transforms.
 *
 */
#include "erl_map.ycf.h"
#define NOT_YCF_YIELDING_VERSION 1
#define YCF_CONSUME_REDS(X) while(0){}

void erts_init_map(void) {
    erts_init_trap_export(&hashmap_merge_trap_export,
			  am_maps, am_merge_trap, 1,
			  &maps_merge_trap_1);
    return;
}


/* erlang:map_size/1
 * the corresponding instruction is implemented in:
 *     beam/erl_bif_guard.c
 */

BIF_RETTYPE map_size_1(BIF_ALIST_1) {
    Sint size = erts_map_size(BIF_ARG_1);

    /* NOTE: The JIT has its own implementation of this BIF. */

    if (size < 0) {
        BIF_P->fvalue = BIF_ARG_1;
        BIF_ERROR(BIF_P, BADMAP);
    }

    /*
     * As long as a small has 28 bits (on a 32-bit machine) for
     * the integer itself, it is impossible to build a map whose
     * size would not fit in a small. Add an assertion in case we
     * ever decreases the number of bits in a small.
     */
    ASSERT(IS_USMALL(0, size));
    BIF_RET(make_small(size));
}

Sint
erts_map_size(Eterm map)
{
    if (is_flatmap(map)) {
	flatmap_t *mp = (flatmap_t*)flatmap_val(map);
	return (Sint) flatmap_get_size(mp);
    }
    else if (is_hashmap(map)) {
	Eterm *head = hashmap_val(map);
	return (Sint) head[1];
    }

    return -1;
}

/* maps:find/2
 * return value if key *matches* a key in the map
 */

const Eterm *
erts_maps_get(Eterm key, Eterm map)
{
    erts_ihash_t hx;
    if (is_flatmap(map)) {
	Eterm *ks, *vs;
	flatmap_t *mp;
	Uint n, i;

	mp  = (flatmap_t *)flatmap_val(map);
	n   = flatmap_get_size(mp);

	if (n == 0) {
	    return NULL;
	}

	ks  = (Eterm *)tuple_val(mp->keys) + 1;
	vs  = flatmap_get_values(mp);

	if (is_immed(key)) {
	    for (i = 0; i < n; i++) {
		if (ks[i] == key) {
		    return &vs[i];
		}
	    }
	} else {
            for (i = 0; i < n; i++) {
                if (EQ(ks[i], key)) {
                    return &vs[i];
                }
            }
        }
	return NULL;
    }
    ASSERT(is_hashmap(map));
    hx = hashmap_make_hash(key);

    return erts_hashmap_get(hx, key, map);
}

BIF_RETTYPE maps_find_2(BIF_ALIST_2) {
    if (is_map(BIF_ARG_2)) {
        Eterm *hp, res;
        const Eterm *value;

        value = erts_maps_get(BIF_ARG_1, BIF_ARG_2);
	if (value) {
	    hp    = HAlloc(BIF_P, 3);
	    res   = make_tuple(hp);
	    *hp++ = make_arityval(2);
	    *hp++ = am_ok;
            *hp++ = *value;
	    BIF_RET(res);
	}
	BIF_RET(am_error);
    }
    BIF_P->fvalue = BIF_ARG_2;
    BIF_ERROR(BIF_P, BADMAP);
}

/* maps:get/2 and erlang:map_get/2
 * return value if key *matches* a key in the map
 * exception badkey if none matches
 */

BIF_RETTYPE maps_get_2(BIF_ALIST_2) {
    if (is_map(BIF_ARG_2)) {
        const Eterm *value;

        value = erts_maps_get(BIF_ARG_1, BIF_ARG_2);
        if (value) {
            BIF_RET(*value);
	}

	BIF_P->fvalue = BIF_ARG_1;
	BIF_ERROR(BIF_P, BADKEY);
    }
    BIF_P->fvalue = BIF_ARG_2;
    BIF_ERROR(BIF_P, BADMAP);
}

BIF_RETTYPE map_get_2(BIF_ALIST_2) {
    /* NOTE: The JIT has its own implementation of this BIF. */
    BIF_RET(maps_get_2(BIF_CALL_ARGS));
}

/* **Important Note**
 *
 * A yielding version of this function is generated with YCF. This
 * means that the code has to follow some restrictions. See note about
 * YCF near the top of the file for more information.
 */

#ifdef INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS
static BIF_RETTYPE maps_from_keys_2_helper(Process* p, Eterm* bif_args) {
    Eterm list = bif_args[0];
    Eterm value = bif_args[1];
    Eterm item = list;
    Eterm res;
    Uint  size = 0;
    if (is_list(item) || is_nil(item)) {
        /* Calculate size and check validity */
        while(is_list(item)) {
            YCF_CONSUME_REDS(1);
            size++;
            item = CDR(list_val(item));
        }

        if (is_not_nil(item))
            goto error;

        if (size > MAP_SMALL_MAP_LIMIT) {
            /* Cannot put call in return statement because
               YCF cannot handle that */
            res = hashmap_from_validated_list(p, list, value, size);
            return res;
        } else {
            /* We don't yield while constructing flatmap because
               MAP_SMALL_MAP_LIMIT is small */
	    return flatmap_from_validated_list(p, list, value, size);
        }
    }

error:

    BIF_ERROR(p, BADARG);
}
#endif /* INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS */

/* **Important Note**
 *
 * A yielding version of this function is generated with YCF. This
 * means that the code has to follow some restrictions. See note about
 * YCF near the top of the file for more information.
 */
#ifdef INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS
static BIF_RETTYPE maps_from_list_1_helper(Process* p, Eterm* bif_args) {
    Eterm list = bif_args[0];
    Eterm res;
    Eterm *kv;
    Uint  size = 0;
    Eterm item = list;
    if (is_list(item) || is_nil(item)) {

	/* Calculate size and check validity */

	while(is_list(item)) {
            YCF_CONSUME_REDS(1);
	    res = CAR(list_val(item));
	    if (is_not_tuple(res))
		goto error;

	    kv = tuple_val(res);
	    if (*kv != make_arityval(2))
		goto error;

	    size++;
	    item = CDR(list_val(item));
	}

	if (is_not_nil(item))
	    goto error;

	if (size > MAP_SMALL_MAP_LIMIT) {
            /* Cannot put call in return statement because
               YCF cannot handle that */
            res = hashmap_from_validated_list(p, list, THE_NON_VALUE, size);
            return res;
	} else {
            /* We don't yield while constructing flatmap because
               MAP_SMALL_MAP_LIMIT is small */
	    return flatmap_from_validated_list(p, list, THE_NON_VALUE, size);
	}
    }

error:

    BIF_ERROR(p, BADARG);
}
#endif /* INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS */


static Eterm flatmap_from_validated_list(Process *p, Eterm list, Eterm fill_value, Uint size) {
    Eterm *kv, item = list;
    Eterm *hp, *thp,*vs, *ks, key, value, keys, res;
    flatmap_t *mp;
    Uint  unused_size = 0;
    Sint  c = 0;
    Sint  idx = 0;


    hp    = HAlloc(p, 3 + (size == 0 ? 0 : 1) + (2 * size));
    thp   = hp;
    if (size == 0) {
        keys = ERTS_GLOBAL_LIT_EMPTY_TUPLE;
    } else {
        keys  = make_tuple(hp);
        *hp++ = make_arityval(size);
    }
    ks    = hp;
    hp   += size;
    mp    = (flatmap_t*)hp;
    res   = make_flatmap(mp);
    hp   += MAP_HEADER_FLATMAP_SZ;
    vs    = hp;

    mp->thing_word = MAP_HEADER_FLATMAP;
    mp->size = size; /* set later, might shrink*/
    mp->keys = keys;

    if (size == 0)
	return res;

    /* first entry */
    if (is_value(fill_value)) {
	ks[0] = CAR(list_val(item));
	vs[0] = fill_value;
    } else {
	kv    = tuple_val(CAR(list_val(item)));
	ks[0] = kv[1];
	vs[0] = kv[2];
    }
    size  = 1;
    item  = CDR(list_val(item));

    /* insert sort key/value pairs */
    while(is_list(item)) {
	if (is_value(fill_value)) {
	    key = CAR(list_val(item));
	    value = fill_value;
	} else {
	    kv = tuple_val(CAR(list_val(item)));
	    key = kv[1];
	    value = kv[2];
	}

	/* compare ks backwards
	 * idx represent word index to be written (hole position).
	 * We cannot copy the elements when searching since we might
	 * have an equal key. So we search for just the index first =(
	 *
	 * It is perhaps faster to move the values in the first pass.
	 * Check for uniqueness during insert phase and then have a
	 * second phace compacting the map if duplicates are found
	 * during insert. .. or do someother sort .. shell-sort perhaps.
	 */

	idx = size;

	while(idx > 0 && (c = erts_cmp_flatmap_keys(key,ks[idx-1])) < 0) {
            idx--;
        }

	if (c == 0) {
	    /* last compare was equal,
	     * i.e. we have to release memory
	     * and overwrite that key/value
	     */
	    ks[idx-1] = key;
	    vs[idx-1] = value;
	    unused_size++;
	} else {
	    Uint i = size;
	    while(i > idx) {
		ks[i] = ks[i-1];
		vs[i] = vs[i-1];
		i--;
	    }
	    ks[idx] = key;
	    vs[idx] = value;
	    size++;
	}
	item = CDR(list_val(item));
    }

    if (unused_size) {
	/* the key tuple is embedded in the heap
	 * write a heap filler to clear it.
	 */
	/* release values as normal since they are on the top of the heap */

	erts_write_heap_filler(ks + size, unused_size);
	HRelease(p, vs + size + unused_size, vs + size);
    }

    *thp = make_arityval(size);
    mp->size = size;
    return res;
}

/* **Important Note**
 *
 * A yielding version of this function is generated with YCF. This
 * means that the code has to follow some restrictions. See note about
 * YCF near the top of the file for more information.
 */
#ifdef INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS
static Eterm hashmap_from_validated_list(Process *p,
                                         Eterm list,
                                         Eterm fill_value,
                                         Uint size) {
    Eterm item = list;
    Eterm *hp;
    Eterm *kv;
    Eterm res;
    Eterm key;
    Eterm value;
    erts_ihash_t sw;
    erts_ihash_t hx;
    Uint ix = 0;
    hxnode_t *hxns;
    ErtsHeapFactory *factory;
#ifdef NOT_YCF_YIELDING_VERSION
    /* Macro to make YCF ignore declarations */
#define YCF_IGNORE(X) X
    YCF_IGNORE(ErtsHeapFactory factory_instance;)
#undef YCF_IGNORE
    factory = &factory_instance;
#else
    factory = YCF_STACK_ALLOC(sizeof(ErtsHeapFactory));
#endif
    ASSERT(size > 0);

    hp = HAlloc(p, (2 * size));

    /* create tmp hx values and leaf ptrs */
    hxns = (hxnode_t *)erts_alloc(ERTS_ALC_T_MAP_TRAP, size * sizeof(hxnode_t));

    while(is_list(item)) {
        YCF_CONSUME_REDS(1);
	res = CAR(list_val(item));
	if(is_value(fill_value)) {
	    key = res;
	    value = fill_value;
	} else {
	    kv = tuple_val(res);
	    key = kv[1];
	    value = kv[2];
	}
	hx  = hashmap_restore_hash(0,key);
	sw = swizzle_map_hash(hx);
	hxns[ix].hx   = sw;
	hxns[ix].val  = CONS(hp, key, value); hp += 2;
	hxns[ix].skip = 1; /* will be reassigned in from_array */
	hxns[ix].i    = ix;
	ix++;
	item = CDR(list_val(item));
    }

    erts_factory_proc_init(factory, p);
    res = hashmap_from_unsorted_array(factory, hxns, size, 0, ERTS_ALC_T_MAP_TRAP);
    erts_factory_close(factory);

    YCF_SPECIAL_CODE_START(ON_DESTROY_STATE);
    {
        if (hxns != NULL) {
            /* Execution of this function got destroyed while yielding in
               the loop above */
            erts_free(ERTS_ALC_T_MAP_TRAP, (void *) hxns);
        }
    }
    YCF_SPECIAL_CODE_END();
    erts_free(ERTS_ALC_T_MAP_TRAP, (void *) hxns);
    /* Memory management depends on the line below */
    hxns = NULL;
    ERTS_VERIFY_UNUSED_TEMP_ALLOC(p);
    
   /* No yielding in the loops below as the number of loop
      iterations must be small */

    if (hashmap_size(res) <= MAP_SMALL_MAP_LIMIT) {
        DECLARE_WSTACK(wstack);
	Eterm *kv;
        Eterm *ks;
        Eterm *vs;
	flatmap_t *mp;
	Eterm keys;
        Uint n = hashmap_size(res);
        ASSERT(n > 0);
	/* build flat structure */
	hp    = HAlloc(p, 3 + 1 + (2 * n));
	keys  = make_tuple(hp);
	*hp++ = make_arityval(n);
	ks    = hp;
	hp   += n;
	mp    = (flatmap_t*)hp;
	hp   += MAP_HEADER_FLATMAP_SZ;
	vs    = hp;

	mp->thing_word = MAP_HEADER_FLATMAP;
	mp->size = n;
	mp->keys = keys;

	hashmap_iterator_init(&wstack, res, 0);

	while ((kv=hashmap_iterator_next(&wstack)) != NULL) {
	    *ks++ = CAR(kv);
	    *vs++ = CDR(kv);
	}

	/* it cannot have multiple keys */
	erts_validate_and_sort_flatmap(mp);

	DESTROY_WSTACK(wstack);
	return make_flatmap(mp);
    }

    return res;
}
#endif /* INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS */

/* maps:from_list/1
 * List may be unsorted [{K,V}]
 *
 * See note about YCF near the top of the file for more information
 * about the generated functions
 * maps_from_list_1_helper_ycf_gen_continue and
 * maps_from_list_1_helper_ycf_gen_yielding.
 *
 */
BIF_RETTYPE maps_from_list_1(BIF_ALIST_1) {
    const size_t iterations_per_red = 40;
    return erts_ycf_trap_driver(BIF_P,
                                BIF__ARGS,
                                1,
                                iterations_per_red,
                                ERTS_ALC_T_MAP_TRAP,
                                /*Add 2*sizeof(void*) as YCF_STACK_ALLOC may pad both allocations */
                                sizeof(ErtsHeapFactory) + 2*sizeof(Eterm) + 2*sizeof(void*),
                                BIF_maps_from_list_1,
                                maps_from_list_1_helper_ycf_gen_continue,
                                maps_from_list_1_helper_ycf_gen_destroy,
                                maps_from_list_1_helper_ycf_gen_yielding);
}

/* maps:from_keys/2
 * List may be unsorted
 *
 * See note about YCF near the top of the file for more information
 * about the generated functions
 * maps_from_keys_2_helper_ycf_gen_continue and
 * maps_from_keys_2_helper_ycf_gen_yielding.
 *
 */
BIF_RETTYPE maps_from_keys_2(BIF_ALIST_2) {
    const size_t iterations_per_red = 40;
    return erts_ycf_trap_driver(BIF_P,
                                BIF__ARGS,
                                2,
                                iterations_per_red,
                                ERTS_ALC_T_MAP_TRAP,
                                /*Add 2*sizeof(void*) as YCF_STACK_ALLOC may pad both allocations */
                                sizeof(ErtsHeapFactory) + 2*sizeof(Eterm) + 2*sizeof(void*),
                                BIF_maps_from_keys_2,
                                maps_from_keys_2_helper_ycf_gen_continue,
                                maps_from_keys_2_helper_ycf_gen_destroy,
                                maps_from_keys_2_helper_ycf_gen_yielding);
}

Eterm erts_hashmap_from_array(ErtsHeapFactory* factory, Eterm *leafs, Uint n,
                              int reject_dupkeys) {
    erts_ihash_t sw, hx;
    Uint ix;
    hxnode_t *hxns;
    Eterm res;

    /* create tmp hx values and leaf ptrs */
    hxns = (hxnode_t *)erts_alloc(ERTS_ALC_T_TMP, n * sizeof(hxnode_t));

    for (ix = 0; ix < n; ix++) {
	hx  = hashmap_make_hash(*leafs);
	sw = swizzle_map_hash(hx);
	hxns[ix].hx   = sw;
	hxns[ix].val  = make_list(leafs);
	hxns[ix].skip = 1;
	hxns[ix].i    = ix;
	leafs += 2;
    }

    res = hashmap_from_unsorted_array(factory, hxns, n, reject_dupkeys, ERTS_ALC_T_TMP);

    erts_free(ERTS_ALC_T_TMP, (void *) hxns);

    return res;
}

static ERTS_INLINE Eterm
from_ks_and_vs(ErtsHeapFactory *factory, Eterm *ks, Eterm *vs,
               Uint n, flatmap_t **fmpp)
{
    if (n <= MAP_SMALL_MAP_LIMIT) {
        Eterm *hp;
	flatmap_t *fmp;
	Eterm keys;

        if (n == 0) {
            keys = ERTS_GLOBAL_LIT_EMPTY_TUPLE;
            hp = erts_produce_heap(factory, MAP_HEADER_FLATMAP_SZ + n, 0);
        }
        else {
            hp = erts_produce_heap(factory, 1 + MAP_HEADER_FLATMAP_SZ + 2*n, 0);
            keys = make_tuple(hp);
            *hp++ = make_arityval(n);
            sys_memcpy((void *) hp,
                       (void *) ks,
                       n * sizeof(Eterm));
            hp += n;
        }

	fmp = (flatmap_t*)hp;
	hp += MAP_HEADER_FLATMAP_SZ;

        fmp->thing_word = MAP_HEADER_FLATMAP;
	fmp->size = n;
	fmp->keys = keys;

        sys_memcpy((void *) hp, (void *) vs, n * sizeof(Eterm));

        *fmpp = fmp;
        return THE_NON_VALUE;
    } else {
        *fmpp = NULL;
        return erts_hashmap_from_ks_and_vs(factory, ks, vs, n);
    }
}

Eterm erts_map_from_ks_and_vs(ErtsHeapFactory *factory, Eterm *ks, Eterm *vs, Uint n)
{
    Eterm res;
    flatmap_t *fmp;

    res = from_ks_and_vs(factory, ks, vs, n, &fmp);
    if (fmp) {
        if (erts_validate_and_sort_flatmap(fmp)) {
            res = make_flatmap(fmp);
        }
        else {
            res = THE_NON_VALUE;
        }
    }
    return res;
}

Eterm erts_hashmap_from_ks_and_vs_extra(ErtsHeapFactory *factory,
                                        Eterm *ks, Eterm *vs, Uint n,
					Eterm key, Eterm value) {
    erts_ihash_t sw, hx;
    Uint i,sz;
    hxnode_t *hxns;
    Eterm *hp, res;

    sz = (key == THE_NON_VALUE) ? n : (n + 1);
    ASSERT(sz > MAP_SMALL_MAP_LIMIT);
    hp = erts_produce_heap(factory, 2 * sz, 0);

    /* create tmp hx values and leaf ptrs */
    hxns = (hxnode_t *)erts_alloc(ERTS_ALC_T_TMP, sz * sizeof(hxnode_t));

    for(i = 0; i < n; i++) {
	hx = hashmap_make_hash(ks[i]);
	sw = swizzle_map_hash(hx);
	hxns[i].hx   = sw;
	hxns[i].val  = CONS(hp, ks[i], vs[i]); hp += 2;
	hxns[i].skip = 1; /* will be reassigned in from_array */
	hxns[i].i    = i;
    }

    if (key != THE_NON_VALUE) {
	hx = hashmap_make_hash(key);
	sw = swizzle_map_hash(hx);
	hxns[i].hx   = sw;
	hxns[i].val  = CONS(hp, key, value); hp += 2;
	hxns[i].skip = 1;
	hxns[i].i    = i;
    }

    res = hashmap_from_unsorted_array(factory, hxns, sz, 0, ERTS_ALC_T_TMP);

    erts_free(ERTS_ALC_T_TMP, (void *) hxns);

    return res;
}

/* **Important Note**
 *
 * A yielding version of this function is generated with YCF. This
 * means that the code has to follow some restrictions. See note about
 * YCF near the top of the file for more information.
 */
static Eterm hashmap_from_unsorted_array(ErtsHeapFactory* factory,
                                         hxnode_t *hxns, Uint n,
                                         int reject_dupkeys,
                                         ErtsAlcType_t temp_memory_allocator) {
    Uint jx = 0;
    Uint ix = 0;
    Uint lx;
    Uint cx;
    Eterm res;

    ASSERT(n > 0);

    /* sort and compact array (remove non-unique entries) */
    erts_qsort(hxns, n, sizeof(hxnode_t), hxnodecmp);

    ix = 0, cx = 0;
    while(ix < n - 1) {
	if (hxns[ix].hx == hxns[ix+1].hx) {

	    /* find region of equal hash values */
	    jx = ix + 2;
	    while(jx < n && hxns[ix].hx == hxns[jx].hx) { jx++; }
	    /* find all correct keys from region
	     * (last in list but now hash sorted so we check highest id instead) */

	    /* resort with keys instead of hash value within region */

	    erts_qsort(&hxns[ix], jx - ix, sizeof(hxnode_t), hxnodecmpkey);

	    while(ix < jx) {
		lx = ix;
		while(++ix < jx && EQ(CAR(list_val(hxns[ix].val)),
				      CAR(list_val(hxns[lx].val)))) {
                    if (reject_dupkeys)
                        return THE_NON_VALUE;

                    if (hxns[ix].i > hxns[lx].i) {
			lx = ix;
		    }
		}
		hxns[cx].hx  = hxns[lx].hx;
		hxns[cx].val = hxns[lx].val;
		cx++;
	    }
	    ix = jx;
	    continue;
	}
	if (ix > cx) {
	    hxns[cx].hx  = hxns[ix].hx;
	    hxns[cx].val = hxns[ix].val;
	}
	cx++;
	ix++;
    }

    if (ix < n) {
	hxns[cx].hx  = hxns[ix].hx;
	hxns[cx].val = hxns[ix].val;
	cx++;
    }

    if (cx > 1) {
        /* recursive decompose array */
        res = hashmap_from_sorted_unique_array(factory, hxns, cx,
                                               temp_memory_allocator);
    } else {
        Eterm slot;
        Eterm *hp;

        /* We only have one item, either because n was 1 or because we have
         * multiples of the same key.
         *
         * As the hash value has been swizzled, we need to drag it down to get
         * the correct slot. */
        slot = hxns[0].hx >> ((HAMT_MAX_LEVEL - 1) * 4);
        ASSERT(slot < 16);

        hp    = erts_produce_heap(factory, HAMT_HEAD_BITMAP_SZ(1), 0);
        hp[0] = MAP_HEADER_HAMT_HEAD_BITMAP(1 << slot);
        hp[1] = 1;
        hp[2] = hxns[0].val;
        res   = make_hashmap(hp);
    }

    return res;
}

/* **Important Note**
 *
 * A yielding version of this function is generated with YCF. This
 * means that the code has to follow some restrictions. See note about
 * YCF near the top of the file for more information.
 */
static Eterm hashmap_from_sorted_unique_array(ErtsHeapFactory* factory,
                                              hxnode_t *hxns, Uint n,
                                              ErtsAlcType_t temp_memory_allocator) {
    Eterm res = NIL;
    Uint ix;
    Uint elems;
    hxnode_t *tmp = NULL;

    ix = 0;
    elems = 1;
    while (ix < n - 1) {
	if (hxns[ix].hx == hxns[ix+1].hx) {
            Uint n_colliders;
            Eterm* hp;
            Eterm collision_node;
            Uint jx = ix + 2;
            Uint i;

	    while (jx < n && hxns[ix].hx == hxns[jx].hx)
                jx++;

            n_colliders = jx - ix;
            hp = erts_produce_heap(factory, HAMT_COLLISION_NODE_SZ(n_colliders),
                                   HALLOC_EXTRA);
            collision_node = make_tuple(hp);

            *hp++ = MAP_HEADER_HAMT_COLLISION_NODE(n_colliders);
            for (i = 0; i < n_colliders; i++) {
                *hp++ = hxns[ix + i].val;
                ASSERT(i == 0
                       || CMP_TERM(CAR(list_val(hxns[ix+i-1].val)),
                                   CAR(list_val(hxns[ix+i].val))) < 0);
            }

            hxns[ix].val  = collision_node;
            hxns[ix].skip = n_colliders;
            ix = jx;

            if (ix < n) { elems++; }
            continue;
	}
        hxns[ix].skip = 1;
        elems++;
        ix++;
    }
    YCF_SPECIAL_CODE_START(ON_DESTROY_STATE);
    {
        if (tmp != NULL) {
            /* Execution of this function got destroyed while yielding in
               the loop above */
            erts_free(temp_memory_allocator, (void *) tmp);
        }
    }
    YCF_SPECIAL_CODE_END();
    res = hashmap_from_chunked_array(factory, hxns, elems, n, temp_memory_allocator);

    ERTS_FACTORY_HOLE_CHECK(factory);

    return res;
}

/* **Important Note**
 *
 * A yielding version of this function is generated with YCF. This
 * means that the code has to follow some restrictions. See note about
 * YCF near the top of the file for more information.
 */
static Eterm hashmap_from_chunked_array(ErtsHeapFactory *factory, hxnode_t *hxns, Uint n,
                                        Uint size,
                                        ErtsAlcType_t temp_memory_allocator) {
    Uint ix;
    Uint d;
    Uint dn;
    Uint dc;
    Uint slot;
    Uint elems;
    erts_ihash_t v;
    erts_ihash_t vp;
    erts_ihash_t vn;
    Uint32 hdr;
    Uint bp;
    Uint sz;
    Eterm res = NIL;
    Eterm *hp = NULL;
    Eterm *nhp = NULL;
    Eterm stack_default_estack[16];
#if DEF_ESTACK_SIZE != (16)
    #error "The macro DEF_ESTACK_SIZE has changed from 16 (need to change constant above)"
    /* We cannot use "Eterm stack_default_estack[DEF_ESTACK_SIZE];"
       because macros are not expanded before the code is passed to
       YCF, and YCF needs to know the size of the array */
#endif
    ErtsEStack stack;
    stack = ESTACK_DEFAULT_VALUE(stack_default_estack, temp_memory_allocator);
    YCF_SPECIAL_CODE_START(ON_SAVE_YIELD_STATE);
    {
        ENSURE_ESTACK_HEAP_STACK_ARRAY(stack, stack_default_estack);
    }
    YCF_SPECIAL_CODE_END();
    YCF_SPECIAL_CODE_START(ON_DESTROY_STATE);
    {
        DESTROY_ESTACK_EXPLICIT_DEFAULT_ARRAY(stack, stack_default_estack);
    }
    YCF_SPECIAL_CODE_END();
    /* if we get here with only one element then
     * we have eight levels of collisions
     */

    if (n == 1) {
	res = hxns[0].val;
	v   = hxns[0].hx;
	for (d = HAMT_MAX_LEVEL-1; d > 0; d--) {
	    slot  = maskval(v,d);
	    hp    = erts_produce_heap(factory, HAMT_NODE_BITMAP_SZ(1), HALLOC_EXTRA);
	    hp[0] = MAP_HEADER_HAMT_NODE_BITMAP(1 << slot);
	    hp[1] = res;
	    res   = make_hashmap(hp);
	}

	slot  = maskval(v,0);
	hp    = erts_produce_heap(factory, 3, 0);

        hp[0] = MAP_HEADER_HAMT_HEAD_BITMAP(1 << slot);
        hp[1] = size;
        hp[2] = res;
	return make_hashmap(hp);
    }

    /* push initial nodes on the stack,
     * this is the starting depth */

    ix = 0;
    d  = 0;
    vp = hxns[ix].hx;
    v  = hxns[ix + hxns[ix].skip].hx;

    ASSERT(vp > v);
    slot = maskval(vp,d);

    while(slot == maskval(v,d)) {
	ESTACK_PUSH(stack, 1 << slot);
	d++;
	slot = maskval(vp,d);
    }

    res = hxns[ix].val;

    if (hxns[ix].skip > 1) {
	dc = HAMT_MAX_LEVEL - 1;
	/* build collision nodes */
	while (dc > d) {
	    hp    = erts_produce_heap(factory, HAMT_NODE_BITMAP_SZ(1), HALLOC_EXTRA);
	    hp[0] = MAP_HEADER_HAMT_NODE_BITMAP(1 << maskval(vp,dc));
	    hp[1] = res;
	    res   = make_hashmap(hp);
	    dc--;
	}
    }

    ESTACK_PUSH2(stack,res,1 << slot);

    /* all of the other nodes .. */
    elems = n - 2; /* remove first and last elements */
    while(elems--) {
	hdr = ESTACK_POP(stack);
	ix  = ix + hxns[ix].skip;

	/* determine if node or subtree should be built by looking
	 * at the next value. */

	vn = hxns[ix + hxns[ix].skip].hx;
	dn = cdepth(v,vn);
	ASSERT(v > vn);

	res = hxns[ix].val;

	if (hxns[ix].skip > 1) {
	    int wat = (d > dn) ? d : dn;
	    dc = HAMT_MAX_LEVEL - 1;
	    /* build collision nodes */
	    while (dc > wat) {
		hp    = erts_produce_heap(factory, HAMT_NODE_BITMAP_SZ(1), HALLOC_EXTRA);
		hp[0] = MAP_HEADER_HAMT_NODE_BITMAP(1 << maskval(v,dc));
		hp[1] = res;
		res   = make_hashmap(hp);
		dc--;
	    }
	}

	/* next depth is higher (implies collision) */
	if (d < dn) {
	    /* hdr is the popped one initially */
	    while(d < dn) {
		slot = maskval(v, d);
		bp   = 1 << slot;
		ESTACK_PUSH(stack, hdr | bp);
		d++;
		hdr = 0; /* clear hdr for all other collisions */
	    }

	    slot = maskval(v, d);
	    bp   = 1 << slot;
	    /* no more collisions */
            ESTACK_PUSH2(stack,res,bp);
	} else if (d == dn) {
	    /* no collisions at all */
	    slot = maskval(v, d);
	    bp   = 1 << slot;
            ESTACK_PUSH2(stack,res,hdr | bp);
	} else {
	    /* dn < n, we have a drop and we are done
	     * build nodes and subtree */
	    while (dn != d) {
		slot  = maskval(v, d);
		bp    = 1 << slot;
		/* OR bitposition before sz calculation to handle
		 * redundant collisions */
		hdr  |= bp;
		sz    = hashmap_bitcount(hdr);
		hp    = erts_produce_heap(factory, HAMT_NODE_BITMAP_SZ(sz), HALLOC_EXTRA);
		nhp   = hp;
		*hp++ = MAP_HEADER_HAMT_NODE_BITMAP(hdr);
		*hp++ = res; sz--;
		while (sz--) { *hp++ = ESTACK_POP(stack); }
		ASSERT((hp - nhp) < 18);
		res = make_hashmap(nhp);

		/* we need to pop the next hdr and push if we don't need it */

		hdr = ESTACK_POP(stack);
		d--;
	    }
            ESTACK_PUSH2(stack,res,hdr);
	}

	vp = v;
	v  = vn;
	d  = dn;
	ERTS_FACTORY_HOLE_CHECK(factory);
    }

    /* v and vp are reused from above */
    dn  = cdepth(vp,v);
    ix  = ix + hxns[ix].skip;
    res = hxns[ix].val;

    if (hxns[ix].skip > 1) {
	dc = HAMT_MAX_LEVEL - 1;
	/* build collision nodes */
	while (dc > dn) {
	    hp    = erts_produce_heap(factory, HAMT_NODE_BITMAP_SZ(1), HALLOC_EXTRA);
	    hp[0] = MAP_HEADER_HAMT_NODE_BITMAP(1 << maskval(v,dc));
	    hp[1] = res;
	    res   = make_hashmap(hp);
	    dc--;
	}
    }

    hdr = ESTACK_POP(stack);
    /* pop remaining subtree if any */
    while (dn) {
	slot  = maskval(v, dn);
	bp    = 1 << slot;
	/* OR bitposition before sz calculation to handle
	 * redundant collisions */
	hdr  |= bp;
	sz    = hashmap_bitcount(hdr);
	hp    = erts_produce_heap(factory, HAMT_NODE_BITMAP_SZ(sz), HALLOC_EXTRA);
	nhp   = hp;
	*hp++ = MAP_HEADER_HAMT_NODE_BITMAP(hdr);
	*hp++ = res; sz--;

        ASSERT(ESTACK_COUNT(stack) > sz);

	while (sz--) { *hp++ = ESTACK_POP(stack); }
	res = make_hashmap(nhp);
	hdr = ESTACK_POP(stack);
	dn--;
    }

    /* and finally the root .. */

    slot  = maskval(v, dn);
    bp    = 1 << slot;
    hdr  |= bp;
    sz    = hashmap_bitcount(hdr);
    hp    = erts_produce_heap(factory, sz + /* hdr + item */ 2, 0);
    nhp   = hp;

    *hp++ = (hdr == 0xffff) ? MAP_HEADER_HAMT_HEAD_ARRAY : MAP_HEADER_HAMT_HEAD_BITMAP(hdr);
    *hp++ = size;

    *hp++ = res; sz--;
    while (sz--) { *hp++ = ESTACK_POP(stack); }

    res = make_hashmap(nhp);

    ASSERT(ESTACK_COUNT(stack) == 0);
    DESTROY_ESTACK_EXPLICIT_DEFAULT_ARRAY(stack, stack_default_estack);
    ERTS_FACTORY_HOLE_CHECK(factory);
    return res;
}

static int hxnodecmpkey(const void *va, const void *vb) {
    const hxnode_t *a = (const hxnode_t*) va;
    const hxnode_t *b = (const hxnode_t*) vb;
    Sint c = CMP_TERM(CAR(list_val(a->val)), CAR(list_val(b->val)));
#if ERTS_SIZEOF_ETERM <= SIZEOF_INT
    return c;
#else
    return c > 0 ? 1 : (c < 0 ? -1 : 0);
#endif
}

static int hxnodecmp(const void *va, const void *vb) {
    const hxnode_t *a = (const hxnode_t*) va;
    const hxnode_t *b = (const hxnode_t*) vb;

    if (a->hx < b->hx)
	return 1;
    else if (a->hx == b->hx)
	return 0;
    else
	return -1;
}

/* maps:is_key/2 and erlang:is_map_key/2 */

BIF_RETTYPE maps_is_key_2(BIF_ALIST_2) {
    if (is_map(BIF_ARG_2)) {
	BIF_RET(erts_maps_get(BIF_ARG_1, BIF_ARG_2) ? am_true : am_false);
    }
    BIF_P->fvalue = BIF_ARG_2;
    BIF_ERROR(BIF_P, BADMAP);
}

BIF_RETTYPE is_map_key_2(BIF_ALIST_2) {
    /* NOTE: The JIT has its own implementation of this BIF. */
    BIF_RET(maps_is_key_2(BIF_CALL_ARGS));
}

/* maps:keys/1 */

/* **Important Note**
 *
 * A yielding version of this function is generated with YCF. This
 * means that the code has to follow some restrictions. See note about
 * YCF near the top of the file for more information.
 */
#ifdef INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS
static BIF_RETTYPE maps_keys_1_helper(Process* p, Eterm* bif_args) {
    Eterm map = bif_args[0];
    if (is_flatmap(map)) {
	Eterm *hp;
        Eterm *ks;
        Eterm res = NIL;
	flatmap_t *mp;
	Uint n;

	mp  = (flatmap_t*)flatmap_val(map);
	n   = flatmap_get_size(mp);

	if (n == 0)
	    BIF_RET(res);

	hp  = HAlloc(p, (2 * n));
	ks  = flatmap_get_keys(mp);

	while(n--) {
	    res = CONS(hp, ks[n], res); hp += 2;
	}

	return res;
    } else if (is_hashmap(bif_args[0])) {
        /* YCF cannot handle function calls as return expression */
        BIF_RETTYPE res = hashmap_keys(p, map);
	return res;
    }
    p->fvalue = map;
    (p)->freason = BADMAP;
    return THE_NON_VALUE;
}
#endif /* INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS */

BIF_RETTYPE maps_keys_1(BIF_ALIST_1) {
    const size_t iterations_per_red = 15;
    return erts_ycf_trap_driver(BIF_P,
                                BIF__ARGS,
                                1,
                                iterations_per_red,
                                ERTS_ALC_T_MAP_TRAP,
                                0,
                                BIF_maps_keys_1,
                                maps_keys_1_helper_ycf_gen_continue,
                                maps_keys_1_helper_ycf_gen_destroy,
                                maps_keys_1_helper_ycf_gen_yielding);
}

/* maps:merge/2 */

BIF_RETTYPE maps_merge_2(BIF_ALIST_2) {
    if (BIF_ARG_1 == BIF_ARG_2) {
	/* Merging upon itself always returns itself */
	if (is_map(BIF_ARG_1)) {
	    return BIF_ARG_1;
	}
	BIF_P->fvalue = BIF_ARG_1;
    } else if (is_flatmap(BIF_ARG_1)) {
	if (is_flatmap(BIF_ARG_2)) {
	    BIF_RET(flatmap_merge(BIF_P, BIF_ARG_1, BIF_ARG_2));
	} else if (is_hashmap(BIF_ARG_2)) {
	    /* Will always become a tree */
            return map_merge_mixed(BIF_P, BIF_ARG_1, BIF_ARG_2, 0);
	}
	BIF_P->fvalue = BIF_ARG_2;
    } else if (is_hashmap(BIF_ARG_1)) {
	if (is_hashmap(BIF_ARG_2)) {
	    return hashmap_merge(BIF_P, BIF_ARG_1, BIF_ARG_2, 0, NULL);
	} else if (is_flatmap(BIF_ARG_2)) {
	    /* Will always become a tree */
	    return map_merge_mixed(BIF_P, BIF_ARG_2, BIF_ARG_1, 1);
	}
	BIF_P->fvalue = BIF_ARG_2;
    } else {
	BIF_P->fvalue = BIF_ARG_1;
    }
    BIF_ERROR(BIF_P, BADMAP);
}

static Eterm flatmap_merge(Process *p, Eterm map1, Eterm map2) {
    Eterm *hp,*thp;
    Eterm *ks,*vs,*ks1,*vs1,*ks2,*vs2;
    flatmap_t *mp1,*mp2,*mp_new;
    Uint n,n1,n2,i1,i2,need,unused_size=0;
    Sint c = 0;

    mp1  = (flatmap_t*)flatmap_val(map1);
    mp2  = (flatmap_t*)flatmap_val(map2);
    n1   = flatmap_get_size(mp1);
    n2   = flatmap_get_size(mp2);

    if (n1 == 0) return map2;
    if (n2 == 0) return map1;

    need = MAP_HEADER_FLATMAP_SZ + 1 + 2 * (n1 + n2);

    hp     = HAlloc(p, need);
    mp_new = (flatmap_t*)hp; hp += MAP_HEADER_FLATMAP_SZ;
    vs     = hp; hp += n1 + n2;
    thp    = hp;
    ks     = hp + 1; hp += 1 + n1 + n2;

    mp_new->thing_word = MAP_HEADER_FLATMAP;
    mp_new->keys = make_tuple(thp);

    i1  = 0; i2 = 0;
    ks1 = flatmap_get_keys(mp1);
    vs1 = flatmap_get_values(mp1);
    ks2 = flatmap_get_keys(mp2);
    vs2 = flatmap_get_values(mp2);

    while(i1 < n1 && i2 < n2) {
	c = (ks1[i1] == ks2[i2]) ? 0 : erts_cmp_flatmap_keys(ks1[i1],ks2[i2]);
	if (c == 0) {
	    /* use righthand side arguments map value,
	     * but advance both maps */
	    *ks++ = ks2[i2];
	    *vs++ = vs2[i2];
	    i1++, i2++, unused_size++;
	} else if (c < 0) {
	    *ks++ = ks1[i1];
	    *vs++ = vs1[i1];
	    i1++;
	} else {
	    *ks++ = ks2[i2];
	    *vs++ = vs2[i2];
	    i2++;
	}
    }

    /* copy remaining */
    while (i1 < n1) {
	*ks++ = ks1[i1];
	*vs++ = vs1[i1];
	i1++;
    }

    while (i2 < n2) {
	*ks++ = ks2[i2];
	*vs++ = vs2[i2];
	i2++;
    }

    n = n1 + n2 - unused_size;
    mp_new->size = n;
    *thp = make_arityval(n);

    if (unused_size ) {
        Eterm* hp_release;

        if (n == n2) {
            /* Reuse entire map2 */
            if (n == n1
                &&  erts_is_literal(mp1->keys, boxed_val(mp1->keys))
                && !erts_is_literal(mp2->keys, boxed_val(mp2->keys))) {
                /*
                 * We want map2, but map1 has a nice literal key tuple.
                 * Solution: MUTATE HEAP to get both.
                 */
                ASSERT(eq(mp1->keys, mp2->keys));
                mp2->keys = mp1->keys;
            }
            HRelease(p, hp, (Eterm *)mp_new);
            return map2;
        }
        else if (n == n1) {
            /* Reuse key tuple of map1 */
            mp_new->keys = mp1->keys;
            /* Release key tuple and unused values */
            hp_release = thp - unused_size;
        }
        else {
            /* Unused values are embedded in the heap, write filler to clear them */
            erts_write_heap_filler(vs, unused_size);
            /* Release unused keys */
            hp_release = ks;
        }
	HRelease(p, hp, hp_release);
    }

    /* Reshape map to a hashmap if the map exceeds the limit */

    if (n > MAP_SMALL_MAP_LIMIT) {
	erts_ihash_t hx,sw;
	Uint i;
	Eterm res;
	hxnode_t *hxns;
        ErtsHeapFactory factory;

	ks = flatmap_get_keys(mp_new);
	vs = flatmap_get_values(mp_new);

	hp = HAlloc(p, 2 * n);

	hxns = (hxnode_t *)erts_alloc(ERTS_ALC_T_TMP,n * sizeof(hxnode_t));

	for (i = 0; i < n; i++) {
	    hx = hashmap_make_hash(ks[i]);
	    sw = swizzle_map_hash(hx);
	    hxns[i].hx   = sw;
	    hxns[i].val  = CONS(hp, ks[i], vs[i]); hp += 2;
	    hxns[i].skip = 1;
	    hxns[i].i    = i;
	}

        erts_factory_proc_init(&factory, p);
	res = hashmap_from_unsorted_array(&factory, hxns, n, 0, ERTS_ALC_T_TMP);
	erts_factory_close(&factory);

	erts_free(ERTS_ALC_T_TMP, (void *) hxns);
	ERTS_VERIFY_UNUSED_TEMP_ALLOC(p);

	return res;
    }

    return make_flatmap(mp_new);
}

static Eterm map_merge_mixed(Process *p, Eterm flat, Eterm tree, int swap_args) {
    Eterm *ks, *vs, *hp, res;
    flatmap_t *mp;
    Uint n, i;
    hxnode_t *hxns;
    erts_ihash_t sw, hx;
    ErtsHeapFactory factory;

    /* convert flat to tree */

    ASSERT(is_flatmap(flat));
    ASSERT(is_hashmap(tree));

    mp = (flatmap_t*)flatmap_val(flat);
    n  = flatmap_get_size(mp);
    if (n == 0) return tree;

    ks = flatmap_get_keys(mp);
    vs = flatmap_get_values(mp);

    hp = HAlloc(p, 2 * n);

    hxns = (hxnode_t *)erts_alloc(ERTS_ALC_T_TMP, n * sizeof(hxnode_t));

    for (i = 0; i < n; i++) {
	hx = hashmap_make_hash(ks[i]);
	sw = swizzle_map_hash(hx);
	hxns[i].hx   = sw;
	hxns[i].val  = CONS(hp, ks[i], vs[i]); hp += 2;
	hxns[i].skip = 1;
	hxns[i].i    = i;
    }

    erts_factory_proc_init(&factory, p);
    res = hashmap_from_unsorted_array(&factory, hxns, n, 0, ERTS_ALC_T_TMP);
    erts_factory_close(&factory);

    erts_free(ERTS_ALC_T_TMP, (void *) hxns);
    ERTS_VERIFY_UNUSED_TEMP_ALLOC(p);

    return hashmap_merge(p, res, tree, swap_args, NULL);
}

#define PSTACK_TYPE struct HashmapMergePStackType
struct HashmapMergePStackType {
    Eterm nodeA, nodeB;
    Eterm *srcA, *srcB;
    Uint32 abm, bbm, rbm; /* node bitmaps */
    int mix;       /* &1: there are unique A stuff in node
                    * &2: there are unique B stuff in node */
    int ix;
    Eterm array[16];   /* temp node construction area */
};

typedef struct HashmapMergeContext_ {
    Uint size;  /* total key-value counter */
    unsigned int lvl;
    Eterm trap_bin;
    ErtsPStack pstack;
#ifdef DEBUG
    Eterm dbg_map_A, dbg_map_B;
#endif
} HashmapMergeContext;

static int hashmap_merge_ctx_destructor(Binary* ctx_bin)
{
    HashmapMergeContext* ctx = (HashmapMergeContext*) ERTS_MAGIC_BIN_DATA(ctx_bin);
    ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(ctx_bin) == hashmap_merge_ctx_destructor);

    PSTACK_DESTROY_SAVED(&ctx->pstack);
    return 1;
}

BIF_RETTYPE maps_merge_trap_1(BIF_ALIST_1) {
    Binary* ctx_bin = erts_magic_ref2bin(BIF_ARG_1);

    ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(ctx_bin) == hashmap_merge_ctx_destructor);

    return hashmap_merge(BIF_P, NIL, NIL, 0,
                         (HashmapMergeContext*) ERTS_MAGIC_BIN_DATA(ctx_bin));
}

#define MAP_MERGE_LOOP_FACTOR 8

static Eterm merge_collision_node(Process* p,
                                  Eterm* srcA, Uint szA,
                                  Eterm* srcB, Uint szB,
                                  Uint* map_sizep)
{
    Eterm *hp;
    Eterm *hdr_ptr;
    Eterm *hp_end;
    Uint arity;

    ERTS_ASSERT(szA >= 1 && szB >= 1);
    arity = szA + szB;
    hp = HAlloc(p, HAMT_COLLISION_NODE_SZ(arity));
    hp_end = hp + HAMT_COLLISION_NODE_SZ(arity);
    hdr_ptr = hp++;

    while (szA && szB) {
        Eterm keyA = CAR(list_val(*srcA));
        Eterm keyB = CAR(list_val(*srcB));
        const Sint key_cmp = CMP_TERM(keyA, keyB);

        if (key_cmp < 0) {
            *hp++ = *srcA++;
            szA--;
        }
        else {
            *hp++ = *srcB++;
            szB--;
            if (key_cmp == 0) {
                srcA++;
                szA--;
                arity--;
                (*map_sizep)--;
            }
        }
    }
    ASSERT(arity >= 2);

    for ( ; szA; szA--)
        *hp++ = *srcA++;
    for ( ; szB; szB--)
        *hp++ = *srcB++;

    HRelease(p, hp_end, hp);
    *hdr_ptr = make_arityval(arity);
    return make_tuple(hdr_ptr);
}


static BIF_RETTYPE hashmap_merge(Process *p, Eterm map_A, Eterm map_B,
                                 int swap_args, HashmapMergeContext* ctx) {
#define PSTACK_TYPE struct HashmapMergePStackType
    PSTACK_DECLARE(s, 4);
    HashmapMergeContext local_ctx;
    struct HashmapMergePStackType* sp;
    erts_ihash_t hx;
    Eterm res = THE_NON_VALUE;
    Eterm hdrA, hdrB;
    Eterm *hp, *nhp;
    Eterm trap_ret;
    Sint initial_reds = (Sint) (ERTS_BIF_REDS_LEFT(p) * MAP_MERGE_LOOP_FACTOR);
    Sint reds =  initial_reds;
    Uint coll_szA = 0, coll_szB = 0;

    /*
     * Strategy: Do depth-first traversal of both trees (at the same time)
     * and merge each pair of nodes.
     */

    PSTACK_CHANGE_ALLOCATOR(s, ERTS_ALC_T_SAVED_ESTACK);

    if (ctx == NULL) { /* first call */
        hashmap_head_t* a = (hashmap_head_t*) hashmap_val(map_A);
        hashmap_head_t* b = (hashmap_head_t*) hashmap_val(map_B);

        sp = PSTACK_PUSH(s);
        sp->srcA = swap_args ? &map_B : &map_A;
        sp->srcB = swap_args ? &map_A : &map_B;
        sp->mix = 0;
        local_ctx.size = a->size + b->size;
        local_ctx.lvl = 0;
    #ifdef DEBUG
        local_ctx.dbg_map_A = map_A;
        local_ctx.dbg_map_B = map_B;
        local_ctx.trap_bin = THE_NON_VALUE;
    #endif
        ctx = &local_ctx;
    }
    else {
        PSTACK_RESTORE(s, &ctx->pstack);
        sp = PSTACK_TOP(s);
        goto resume_from_trap;
    }

recurse:

    sp->nodeA = *sp->srcA;
    sp->nodeB = *sp->srcB;

    if (sp->nodeA == sp->nodeB) {
        res = sp->nodeA;
        ctx->size -= is_list(sp->nodeB) ? 1 : hashmap_subtree_size(sp->nodeB);
        ASSERT(is_value(res));
    }
    else {
        if (is_list(sp->nodeA)) { /* A is LEAF */
            Eterm keyA = CAR(list_val(sp->nodeA));

            if (is_list(sp->nodeB)) { /* LEAF + LEAF */
                Eterm keyB = CAR(list_val(sp->nodeB));

                if (EQ(keyA, keyB)) {
                    --ctx->size;
                    res = sp->nodeB;
                    sp->mix = 2;   /* We assume values differ.
                                      + Don't spend time comparing big values.
                                      - Might waste some heap space for internal
                                        nodes that could otherwise be reused. */
                    ASSERT(is_value(res));
                    goto merge_nodes;
                }
            }
            if (ctx->lvl < HAMT_MAX_LEVEL) {
                hx = hashmap_restore_hash(ctx->lvl, keyA);
                sp->abm = 1 << hashmap_index(hx);
            }
            else {
                coll_szA = 1;
            }
            /* keep srcA pointing at the leaf */
        }
        else { /* A is NODE */
            sp->srcA = boxed_val(sp->nodeA);
            hdrA = *sp->srcA++;
            ASSERT(is_header(hdrA));
            switch (hdrA & _HEADER_MAP_SUBTAG_MASK) {
            case HAMT_SUBTAG_HEAD_ARRAY: {
                ASSERT(ctx->lvl < HAMT_MAX_LEVEL);
                sp->srcA++;
                sp->abm = 0xffff;
                break;
            }
            case HAMT_SUBTAG_HEAD_BITMAP: sp->srcA++;
            case HAMT_SUBTAG_NODE_BITMAP: {
                ASSERT(ctx->lvl < HAMT_MAX_LEVEL);
                sp->abm = MAP_HEADER_VAL(hdrA);
                break;
            }
            default: /* collision node */
                ERTS_ASSERT(is_arity_value(hdrA));
                ASSERT(ctx->lvl == HAMT_MAX_LEVEL);
                coll_szA = arityval(hdrA);
                ASSERT(coll_szA >= 2);
            }
        }

        if (is_list(sp->nodeB)) { /* B is LEAF */
            Eterm keyB = CAR(list_val(sp->nodeB));

            if (ctx->lvl < HAMT_MAX_LEVEL) {
                hx = hashmap_restore_hash(ctx->lvl, keyB);
                sp->bbm = 1 << hashmap_index(hx);
            }
            else {
                coll_szB = 1;
            }
            /* keep srcB pointing at the leaf */
        }
        else { /* B is NODE */
            sp->srcB = boxed_val(sp->nodeB);
            hdrB = *sp->srcB++;
            ASSERT(is_header(hdrB));
            switch (hdrB & _HEADER_MAP_SUBTAG_MASK) {
            case HAMT_SUBTAG_HEAD_ARRAY: {
                ASSERT(ctx->lvl < HAMT_MAX_LEVEL);
                sp->srcB++;
                sp->bbm = 0xffff;
                break;
            }
            case HAMT_SUBTAG_HEAD_BITMAP: sp->srcB++;
            case HAMT_SUBTAG_NODE_BITMAP: {
                ASSERT(ctx->lvl < HAMT_MAX_LEVEL);
                sp->bbm = MAP_HEADER_VAL(hdrB);
                break;
            }
            default: /* collision node */
                ERTS_ASSERT(is_arity_value(hdrB));
                ASSERT(ctx->lvl == HAMT_MAX_LEVEL);
                coll_szB = arityval(hdrB);
                ASSERT(coll_szB >= 2);
            }
        }
    }

merge_nodes:

    for (;;) {
	if (is_value(res)) { /* We have a complete (sub-)tree or leaf */
            int child_mix;
	    if (ctx->lvl == 0)
		break;

	    /* Pop from stack and continue build parent node */
	    ctx->lvl--;
            child_mix = sp->mix;
	    sp = PSTACK_POP(s);
	    sp->array[sp->ix++] = res;
            sp->mix |= child_mix;
	    res = THE_NON_VALUE;
	    if (sp->rbm) {
		sp->srcA++;
		sp->srcB++;
	    }
        }
        else if (ctx->lvl < HAMT_MAX_LEVEL) { /* Start build a node */
	    sp->ix = 0;
	    sp->rbm = sp->abm | sp->bbm;
	    ASSERT(!(sp->rbm == 0 && ctx->lvl > 0));
	}
        else {
            res = merge_collision_node(p, sp->srcA, coll_szA,
                                       sp->srcB, coll_szB, &ctx->size);
            sp->mix = 3;
            coll_szA = coll_szB = 0;
            continue;
        }

        if (--reds <= 0) {
            ASSERT(!coll_szA && !coll_szB);
            goto trap;
        }
resume_from_trap:

	while (sp->rbm) {
	    Uint32 next = sp->rbm & (sp->rbm-1);
	    Uint32 bit = sp->rbm ^ next;
	    sp->rbm = next;
	    if (sp->abm & bit) {
		if (sp->bbm & bit) {
		    /* Bit clash. Push and resolve by recursive merge */
		    Eterm* srcA = sp->srcA;
		    Eterm* srcB = sp->srcB;
		    ctx->lvl++;
		    sp = PSTACK_PUSH(s);
                    sp->srcA = srcA;
                    sp->srcB = srcB;
                    sp->mix = 0;
		    goto recurse;
		} else {
		    sp->array[sp->ix++] = *sp->srcA++;
                    sp->mix |= 1;
		}
	    } else {
		ASSERT(sp->bbm & bit);
		sp->array[sp->ix++] = *sp->srcB++;
                sp->mix |=  2;
	    }
	}

        switch (sp->mix) {
        case 0: /* Nodes A and B contain the *EXACT* same sub-trees
                   => fall through and reuse nodeA */

        case 1: /* Only unique A stuff => reuse nodeA */
            res = sp->nodeA;
            break;

        case 2: /* Only unique B stuff => reuse nodeB */
            res = sp->nodeB;
            break;

        case 3: /* We have a mix => must build new node */
            ASSERT(sp->ix == hashmap_bitcount(sp->abm | sp->bbm));
            if (ctx->lvl == 0) {
                nhp = HAllocX(p, HAMT_HEAD_BITMAP_SZ(sp->ix), HALLOC_EXTRA);
                hp = nhp;
                *hp++ = (sp->ix == 16 ? MAP_HEADER_HAMT_HEAD_ARRAY
                         : MAP_HEADER_HAMT_HEAD_BITMAP(sp->abm | sp->bbm));
                *hp++ = ctx->size;
            } else {
                nhp = HAllocX(p, HAMT_NODE_BITMAP_SZ(sp->ix), HALLOC_EXTRA);
                hp = nhp;
                *hp++ = MAP_HEADER_HAMT_NODE_BITMAP(sp->abm | sp->bbm);
            }
            sys_memcpy(hp, sp->array, sp->ix * sizeof(Eterm));
            res = make_boxed(nhp);
            break;
        default:
            erts_exit(ERTS_ABORT_EXIT, "strange mix %d\r\n", sp->mix);
        }
    }

    /* Done */

#ifdef DEBUG
    {
        Eterm *head = hashmap_val(res);
        Uint size = head[1];
        Uint real_size = hashmap_subtree_size(res);
        ASSERT(size == real_size);
    }
#endif

    if (ctx != &local_ctx) {
        ASSERT(ctx->trap_bin != THE_NON_VALUE);
        ASSERT(p->flags & F_DISABLE_GC);
        erts_set_gc_state(p, 1);
    }
    else {
        ASSERT(ctx->trap_bin == THE_NON_VALUE);
        ASSERT(!(p->flags & F_DISABLE_GC));
    }
    PSTACK_DESTROY(s);
    UnUseTmpHeap(2,p);
    BUMP_REDS(p, (initial_reds - reds) / MAP_MERGE_LOOP_FACTOR);
    return res;

trap:  /* Yield */

    if (ctx == &local_ctx) {
        Binary* ctx_b = erts_create_magic_binary(sizeof(HashmapMergeContext),
                                                 hashmap_merge_ctx_destructor);
        ctx = ERTS_MAGIC_BIN_DATA(ctx_b);
        sys_memcpy(ctx, &local_ctx, sizeof(HashmapMergeContext));
        hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE);
        ASSERT(ctx->trap_bin == THE_NON_VALUE);
        ctx->trap_bin = erts_mk_magic_ref(&hp, &MSO(p), ctx_b);

        erts_set_gc_state(p, 0);
    }
    else {
        ASSERT(ctx->trap_bin != THE_NON_VALUE);
        ASSERT(p->flags & F_DISABLE_GC);
    }

    PSTACK_SAVE(s, &ctx->pstack);

    BUMP_ALL_REDS(p);
    ERTS_BIF_PREP_TRAP1(trap_ret, &hashmap_merge_trap_export,
                        p, ctx->trap_bin);
    UnUseTmpHeap(2,p);
    return trap_ret;
}

static Uint hashmap_subtree_size(Eterm node) {
    DECLARE_WSTACK(stack);
    Uint size = 0;

    hashmap_iterator_init(&stack, node, 0);
    while (hashmap_iterator_next(&stack)) {
        size++;
    }
    DESTROY_WSTACK(stack);
    return size;
}

static int hash_cmp(erts_ihash_t ha, erts_ihash_t hb)
{
    for (int i = 0; i < HAMT_MAX_LEVEL; i++) {
        int cmp = (int)(ha & 0xF) - (int)(hb & 0xF);

        if (cmp) {
            return cmp;
        }

        ha >>= 4;
        hb >>= 4;
    }

    return 0;
}

int hashmap_key_hash_cmp(Eterm* ap, Eterm* bp)
{
    if (ap && bp) {
        erts_ihash_t ha, hb;

        ASSERT(CMP_TERM(CAR(ap), CAR(bp)) != 0);

        ha = hashmap_make_hash(CAR(ap));
        hb = hashmap_make_hash(CAR(bp));

        return hash_cmp(ha, hb);
    }

    ASSERT(ap || bp);
    return ap ? -1 : 1;
}

/* maps:put/3 */

BIF_RETTYPE maps_put_3(BIF_ALIST_3) {
    if (is_map(BIF_ARG_3)) {
	BIF_RET(erts_maps_put(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3));
    }
    BIF_P->fvalue = BIF_ARG_3;
    BIF_ERROR(BIF_P, BADMAP);
}

/* maps:take/2 */

BIF_RETTYPE maps_take_2(BIF_ALIST_2) {
    if (is_map(BIF_ARG_2)) {
        Eterm res, map, val;
        if (erts_maps_take(BIF_P, BIF_ARG_1, BIF_ARG_2, &map, &val)) {
            Eterm *hp = HAlloc(BIF_P, 3);
            res   = make_tuple(hp);
            *hp++ = make_arityval(2);
            *hp++ = val;
            *hp++ = map;
            BIF_RET(res);
        }
        BIF_RET(am_error);
    }
    BIF_P->fvalue = BIF_ARG_2;
    BIF_ERROR(BIF_P, BADMAP);
}

/* maps:remove/2 */

BIF_RETTYPE maps_remove_2(BIF_ALIST_2) {
    if (is_map(BIF_ARG_2)) {
        Eterm res;
        (void) erts_maps_take(BIF_P, BIF_ARG_1, BIF_ARG_2, &res, NULL);
        BIF_RET(res);
    }
    BIF_P->fvalue = BIF_ARG_2;
    BIF_ERROR(BIF_P, BADMAP);
}

/* erts_maps_take
 * return 1 if key is found, otherwise 0
 * If the key is not found res (output map) will be map (input map)
 */
int erts_maps_take(Process *p, Eterm key, Eterm map,
                   Eterm *res, Eterm *value) {
    erts_ihash_t hx;
    Eterm ret;
    if (is_flatmap(map)) {
	Sint n;
	Uint need;
	Eterm *hp_start;
	Eterm *thp, *mhp;
	Eterm *ks, *vs, tup;
	flatmap_t *mp = (flatmap_t*)flatmap_val(map);

	n = flatmap_get_size(mp);

	if (n == 0) {
	    *res = map;
	    return 0;
	}

	ks = flatmap_get_keys(mp);
	vs = flatmap_get_values(mp);

	/* Assume key exists.
	 * Release allocated if it didn't.
	 * Allocate key tuple first.
	 */

	need   = n + ((n-1) == 0 ? 0 : 1) - 1 + 3 + n - 1; /* tuple - 1 + map - 1 */
	hp_start = HAlloc(p, need);
	thp    = hp_start;
	mhp    = thp + n + ((n-1) == 0 ? -1 : 0);  /* offset with tuple heap size */
        if ((n-1) == 0) {
            tup = ERTS_GLOBAL_LIT_EMPTY_TUPLE;
        } else {
            tup    = make_tuple(thp);
            *thp++ = make_arityval(n - 1);
        }
	*res   = make_flatmap(mhp);
	*mhp++ = MAP_HEADER_FLATMAP;
	*mhp++ = n - 1;
	*mhp++ = tup;

	if (is_immed(key)) {
	    while (1) {
		if (*ks == key) {
                    if (value) *value = *vs;
		    goto found_key;
		} else if (--n) {
		    *mhp++ = *vs++;
		    *thp++ = *ks++;
		} else
		    break;
	    }
	} else {
	    while(1) {
		if (EQ(*ks, key)) {
                    if (value) *value = *vs;
		    goto found_key;
		} else if (--n) {
		    *mhp++ = *vs++;
		    *thp++ = *ks++;
		} else
		    break;
	    }
	}

	/* Not found, remove allocated memory
	 * and return previous map.
	 */
	HRelease(p, hp_start + need, hp_start);

	*res = map;
	return 0;

found_key:
	/* Copy rest of keys and values */
	if (--n) {
	    sys_memcpy(mhp, vs+1, n*sizeof(Eterm));
	    sys_memcpy(thp, ks+1, n*sizeof(Eterm));
	}
	return 1;
    }
    ASSERT(is_hashmap(map));
    hx = hashmap_make_hash(key);
    ret = hashmap_delete(p, hx, key, map, value);
    if (is_value(ret)) {
        *res = ret;
        return 1;
    }
    *res = map;
    return 0;
}

int erts_maps_update(Process *p, Eterm key, Eterm value, Eterm map, Eterm *res) {
    erts_ihash_t hx;
    if (is_flatmap(map)) {
	Sint n,i;
	Eterm* hp,*shp;
	Eterm *ks,*vs;
	flatmap_t *mp = (flatmap_t*)flatmap_val(map);

	if ((n = flatmap_get_size(mp)) == 0) {
	    return 0;
	}

	ks  = flatmap_get_keys(mp);
	vs  = flatmap_get_values(mp);

	/* only allocate for values,
	 * assume key-tuple will be intact
	 */

	hp  = HAlloc(p, MAP_HEADER_FLATMAP_SZ + n);
	shp = hp;
	*hp++ = MAP_HEADER_FLATMAP;
	*hp++ = n;
	*hp++ = mp->keys;

	if (is_immed(key)) {
	    for( i = 0; i < n; i ++) {
		if (ks[i] == key) {
		    goto found_key;
		} else {
		    *hp++ = *vs++;
		}
	    }
	} else {
	    for( i = 0; i < n; i ++) {
		if (EQ(ks[i], key)) {
		    goto found_key;
		} else {
		    *hp++ = *vs++;
		}
	    }
	}

	HRelease(p, shp + MAP_HEADER_FLATMAP_SZ + n, shp);
	return 0;

found_key:
        if(*vs == value) {
            HRelease(p, shp + MAP_HEADER_FLATMAP_SZ + n, shp);
            *res = map;
        } else {
	    *hp++ = value;
	    vs++;
	    if (++i < n)
	       sys_memcpy(hp, vs, (n - i)*sizeof(Eterm));
	    *res = make_flatmap(shp);
        }
	return 1;
    }

    ASSERT(is_hashmap(map));
    hx = hashmap_make_hash(key);
    *res = erts_hashmap_insert(p, hx, key, value, map, 1);
    if (is_value(*res))
	return 1;

    return 0;
}

Eterm erts_maps_put(Process *p, Eterm key, Eterm value, Eterm map) {
    erts_ihash_t hx;
    Eterm res;
    if (is_flatmap(map)) {
	Sint n,i;
	Sint c = 0;
	Eterm* hp, *shp;
	Eterm *ks, *vs, tup;
	flatmap_t *mp = (flatmap_t*)flatmap_val(map);

	n = flatmap_get_size(mp);

	if (n == 0) {
	    hp    = HAlloc(p, MAP_HEADER_FLATMAP_SZ + 1 + 2);
	    tup   = make_tuple(hp);
	    *hp++ = make_arityval(1);
	    *hp++ = key;
	    res   = make_flatmap(hp);
	    *hp++ = MAP_HEADER_FLATMAP;
	    *hp++ = 1;
	    *hp++ = tup;
	    *hp++ = value;

	    return res;
	}

	ks = flatmap_get_keys(mp);
	vs = flatmap_get_values(mp);

	/* only allocate for values,
	 * assume key-tuple will be intact
	 */

	hp  = HAlloc(p, MAP_HEADER_FLATMAP_SZ + n);
	shp = hp; /* save hp, used if optimistic update fails */
	res = make_flatmap(hp);
	*hp++ = MAP_HEADER_FLATMAP;
	*hp++ = n;
	*hp++ = mp->keys;

	if (is_immed(key)) {
	    for( i = 0; i < n; i ++) {
		if (ks[i] == key) {
                    goto found_key;
		} else {
		    *hp++ = *vs++;
		}
	    }
	} else {
	    for( i = 0; i < n; i ++) {
		if (EQ(ks[i], key)) {
		    goto found_key;
		} else {
		    *hp++ = *vs++;
		}
	    }
	}

	/* the map will grow */

	if (n >= MAP_SMALL_MAP_LIMIT) {
            ErtsHeapFactory factory;
	    HRelease(p, shp + MAP_HEADER_FLATMAP_SZ + n, shp);
	    ks = flatmap_get_keys(mp);
	    vs = flatmap_get_values(mp);

            erts_factory_proc_init(&factory, p);
	    res = erts_hashmap_from_ks_and_vs_extra(&factory,ks,vs,n,key,value);
            erts_factory_close(&factory);

	    return res;
	}

	/* still a small map. need to make a new tuple,
	 * use old hp since it needs to be recreated anyway. */

	tup    = make_tuple(shp);
	*shp++ = make_arityval(n+1);

	hp    = HAlloc(p, 3 + n + 1);
	res   = make_flatmap(hp);
	*hp++ = MAP_HEADER_FLATMAP;
	*hp++ = n + 1;
	*hp++ = tup;

	ks = flatmap_get_keys(mp);
	vs = flatmap_get_values(mp);

	ASSERT(n >= 0);

	/* copy map in order */
	while (n && ((c = erts_cmp_flatmap_keys(*ks, key)) < 0)) {
	    *shp++ = *ks++;
	    *hp++  = *vs++;
	    n--;
	}

	*shp++ = key;
	*hp++  = value;

	ASSERT(n >= 0);

	while(n--) {
	    *shp++ = *ks++;
	    *hp++  = *vs++;
	}
	/* we have one word remaining
	 * this will work out fine once we get the size word
	 * in the header.
	 */
	erts_write_heap_filler(shp, 1);
	return res;

found_key:
        if(*vs == value) {
            HRelease(p, shp + MAP_HEADER_FLATMAP_SZ + n, shp);
            return map;
        } else {
            *hp++ = value;
            vs++;
            if (++i < n)
               sys_memcpy(hp, vs, (n - i)*sizeof(Eterm));
            return res;
        }
    }
    ASSERT(is_hashmap(map));

    hx  = hashmap_make_hash(key);
    res = erts_hashmap_insert(p, hx, key, value, map, 0);
    ASSERT(is_hashmap(res));

    return res;
}

/* maps:update/3 */

BIF_RETTYPE maps_update_3(BIF_ALIST_3) {
    if (is_not_map(BIF_ARG_3)) {
	BIF_P->fvalue = BIF_ARG_3;
	BIF_ERROR(BIF_P, BADMAP);
    } else {
	Eterm res;
	if (erts_maps_update(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3, &res)) {
	    BIF_RET(res);
	}
	BIF_P->fvalue = BIF_ARG_1;
	BIF_ERROR(BIF_P, BADKEY);
    }
}

/* **Important Note**
 *
 * A yielding version of this function is generated with YCF. This
 * means that the code has to follow some restrictions. See note about
 * YCF near the top of the file for more information.
 */
#ifdef INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS
static BIF_RETTYPE maps_values_1_helper(Process* p, Eterm* bif_args) {
    Eterm map = bif_args[0];
    if (is_flatmap(map)) {
        Eterm *hp;
        Eterm *vs;
        Eterm res = NIL;
	flatmap_t *mp;
	Uint n;

	mp  = (flatmap_t*)flatmap_val(map);
	n   = flatmap_get_size(mp);

	if (n == 0)
	    BIF_RET(res);

	hp  = HAlloc(p, (2 * n));
	vs  = flatmap_get_values(mp);

	while(n--) {
	    res = CONS(hp, vs[n], res); hp += 2;
	}

	return res;
    } else if (is_hashmap(map)) {
        /* YCF cannot handle function calls as return expression */
        BIF_RETTYPE res = hashmap_values(p, map);
	return res;
    }
    p->fvalue = map;
    (p)->freason = BADMAP;
    return THE_NON_VALUE;
}
#endif /* INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS */

/* maps:values/1 */

BIF_RETTYPE maps_values_1(BIF_ALIST_1) {
    const size_t iterations_per_red = 15;
    return erts_ycf_trap_driver(BIF_P,
                                BIF__ARGS,
                                1,
                                iterations_per_red,
                                ERTS_ALC_T_MAP_TRAP,
                                0,
                                BIF_maps_values_1,
                                maps_values_1_helper_ycf_gen_continue,
                                maps_values_1_helper_ycf_gen_destroy,
                                maps_values_1_helper_ycf_gen_yielding);
}

static ERTS_INLINE
Uint hashmap_node_size(Eterm hdr, Eterm **nodep)
{
    Uint sz;

    switch(hdr & _HEADER_MAP_SUBTAG_MASK) {
    case HAMT_SUBTAG_HEAD_ARRAY:
	sz = 16;
        if (nodep) ++*nodep;
	break;
    case HAMT_SUBTAG_HEAD_BITMAP:
        if (nodep) ++*nodep;
    case HAMT_SUBTAG_NODE_BITMAP:
        sz = hashmap_bitcount(MAP_HEADER_VAL(hdr));
        ASSERT(sz < 17);
	break;
    default:
        ERTS_ASSERT(is_arity_value(hdr));
        sz = arityval(hdr);
        break;
    }
    return sz;
}

void hashmap_iterator_init(ErtsWStack* s, Eterm node, int reverse) {
    Eterm hdr = *hashmap_val(node);
    Uint sz = hashmap_node_size(hdr, NULL);

    WSTACK_PUSH3((*s), (UWord)THE_NON_VALUE,  /* end marker */
		 (UWord)(!reverse ? 0 : sz+1),
		 (UWord)node);
}

Eterm* hashmap_iterator_next(ErtsWStack* s) {
    Eterm node, *ptr, hdr;
    Uint32 sz;
    Uint idx;

    for (;;) {
        ASSERT(!WSTACK_ISEMPTY((*s)));
	node = (Eterm) WSTACK_POP((*s));
        if (is_non_value(node)) {
            return NULL;
        }
	idx = (Uint) WSTACK_POP((*s));
        for (;;) {
	    ASSERT(is_boxed(node));
	    ptr = boxed_val(node);
	    hdr = *ptr;
	    ASSERT(is_header(hdr));
            sz = hashmap_node_size(hdr, &ptr);

	    idx++;

	    if (idx <= sz) {
		WSTACK_PUSH2((*s), (UWord)idx, (UWord)node);

		if (is_list(ptr[idx])) {
		    return list_val(ptr[idx]);
		}
		ASSERT(is_boxed(ptr[idx]));
		node = ptr[idx];
		idx = 0;
	    }
	    else
		break; /* and pop parent node */
        }
    }
}

Eterm* hashmap_iterator_prev(ErtsWStack* s) {
    Eterm node, *ptr, hdr;
    Uint32 sz;
    Uint idx;

    for (;;) {
        ASSERT(!WSTACK_ISEMPTY((*s)));
	node = (Eterm) WSTACK_POP((*s));
        if (is_non_value(node)) {
            return NULL;
        }
	idx = (Uint) WSTACK_POP((*s));
        for (;;) {
	    ASSERT(is_boxed(node));
	    ptr = boxed_val(node);
	    hdr = *ptr;
	    ASSERT(is_header(hdr));
            sz = hashmap_node_size(hdr, &ptr);

            if (idx > sz)
		idx = sz;
	    else
		idx--;

	    if (idx >= 1) {
		WSTACK_PUSH2((*s), (UWord)idx, (UWord)node);

		if (is_list(ptr[idx])) {
		    return list_val(ptr[idx]);
		}
		ASSERT(is_boxed(ptr[idx]));
		node = ptr[idx];
		idx = UINT_MAX;
	    }
	    else
		break; /* and pop parent node */
        }
    }
}

const Eterm *
erts_hashmap_get(erts_ihash_t hx, Eterm key, Eterm node)
{
    Eterm *ptr, hdr;
    Uint ix, lvl = 0;
    Uint32 hval,bp;

    ASSERT(is_boxed(node));
    ptr = boxed_val(node);
    hdr = *ptr;
    ASSERT(is_header(hdr));
    ASSERT(is_hashmap_header_head(hdr));
    ptr++;

    do {
        ASSERT(lvl == 0 || is_hashmap_header_node(hdr));

        hval = MAP_HEADER_VAL(hdr);
        ix   = hashmap_index(hx);
        if (hval != 0xffff) {
            bp   = 1 << ix;
            if (!(bp & hval)) {
                /* not occupied */
                return NULL;
            }
            ix = hashmap_bitcount(hval & (bp - 1));
        }
        node  = ptr[ix+1];

        if (is_list(node)) { /* LEAF NODE [K|V] */
            ptr = list_val(node);
            return EQ(CAR(ptr), key) ? &(CDR(ptr)) : NULL;
        }

        hx = hashmap_shift_hash(hx,lvl,key);

        ASSERT(is_boxed(node));
        ptr = boxed_val(node);
        hdr = *ptr;
        ASSERT(is_header(hdr));
    } while (!is_arity_value(hdr));

    /* collision node */
    ASSERT(lvl == HAMT_MAX_LEVEL);
    ix = arityval(hdr);
    ASSERT(ix > 1);
    do {
        Eterm* kv = list_val(*(++ptr));
        if (EQ(CAR(kv), key))
            return &(CDR(kv));
    } while (--ix > 0);
    return NULL;
}

Eterm erts_hashmap_insert(Process *p, erts_ihash_t hx, Eterm key, Eterm value,
			  Eterm map, int is_update) {
    Uint size, upsz;
    Eterm *hp, res = THE_NON_VALUE;
    DECLARE_ESTACK(stack);
    if (erts_hashmap_insert_down(hx, key, value, map, &size, &upsz, &stack,
                                 is_update)) {
        if (size) {
            /* We are putting a new value (under a new or existing key) */
	    hp  = HAlloc(p, size);
	    res = erts_hashmap_insert_up(hp, key, value, upsz, &stack);
            ASSERT(hashmap_val(res) + 2 + hashmap_bitcount(MAP_HEADER_VAL(*hashmap_val(res)))
                   == hp + size);
	}
        else {
            /* We are putting the same key-value */
            res = map;
        }
    }
    else {
        /* We are updating and the key does not exist */
        ASSERT(is_update);
    }

    DESTROY_ESTACK(stack);
    return res;
}


int erts_hashmap_insert_down(erts_ihash_t hx, Eterm key, Eterm value, Eterm node, Uint *sz,
			     Uint *update_size, ErtsEStack *sp, int is_update) {
    Eterm *ptr;
    Eterm hdr, ckey;
    Uint32 ix, cix, bp, hval;
    Uint slot, lvl = 0, clvl;
    Uint size = 0, n = 0;
    erts_ihash_t chx;

    *update_size = 1;

    for (;;) {
	switch(primary_tag(node)) {
	    case TAG_PRIMARY_LIST: /* LEAF NODE [K|V] */
		ptr  = list_val(node);
		ckey = CAR(ptr);
		if (EQ(ckey, key)) {
		    if (CDR(ptr) == value) {
                        *sz = 0; /* same value, same map, no heap needed */
                        return 1;
                    }
		    *update_size = 0;
		    goto unroll;
		}
		if (is_update) {
		    return 0;
		}
		goto insert_subnodes;
	    case TAG_PRIMARY_BOXED:
		ptr = boxed_val(node);
		hdr = *ptr;
		ASSERT(is_header(hdr));

		switch(hdr & _HEADER_MAP_SUBTAG_MASK) {
		    case HAMT_SUBTAG_HEAD_ARRAY:
			ix    = hashmap_index(hx);
			hx    = hashmap_shift_hash(hx,lvl,key);
			size += HAMT_HEAD_ARRAY_SZ;
			ESTACK_PUSH2(*sp, ix, node);
			node  = ptr[ix+2];
			break;
		    case HAMT_SUBTAG_NODE_BITMAP:
			hval = MAP_HEADER_VAL(hdr);
			ix   = hashmap_index(hx);
                        bp   = 1 << ix;
                        if (hval == 0xffff) {
                            slot = ix;
                            n = 16;
                        } else {
                            slot = hashmap_bitcount(hval & (bp - 1));
                            n    = hashmap_bitcount(hval);
                        }

                        ESTACK_PUSH4(*sp, n, bp, slot, node);

                        if (!(bp & hval)) { /* not occupied */
                            if (is_update) {
                                return 0;
                            }
                            size += HAMT_NODE_BITMAP_SZ(n+1);
                            goto unroll;
                        }

                        hx    = hashmap_shift_hash(hx,lvl,key);
                        node  = ptr[slot+1];
                        ASSERT(HAMT_NODE_BITMAP_SZ(n) <= 17);
                        size += HAMT_NODE_BITMAP_SZ(n);
                        break;

		    case HAMT_SUBTAG_HEAD_BITMAP:
			hval = MAP_HEADER_VAL(hdr);
			ix   = hashmap_index(hx);
			bp   = 1 << ix;
			slot = hashmap_bitcount(hval & (bp - 1));
			n    = hashmap_bitcount(hval);

			ESTACK_PUSH4(*sp, n, bp, slot, node);

			/* occupied */
			if (bp & hval) {
			    hx    = hashmap_shift_hash(hx,lvl,key);
			    node  = ptr[slot+2];
			    ASSERT(HAMT_HEAD_BITMAP_SZ(n) <= 18);
			    size += HAMT_HEAD_BITMAP_SZ(n);
			    break;
			}
			/* not occupied */
			if (is_update) {
			    return 0;
			}
			size += HAMT_HEAD_BITMAP_SZ(n+1);
			goto unroll;
                    default:
                        ERTS_ASSERT(is_arity_value(hdr));
                        n = arityval(hdr);
                        ASSERT(n >= 2);
                        for (slot = 0; slot < n; slot++) {
                            Eterm* kv = list_val(ptr[1+slot]);
                            Sint c;
                            ckey = CAR(kv);
                            c = CMP_TERM(key, ckey);
                            if (c == 0) {
                                if (CDR(kv) == value) {
                                    *sz = 0;
                                    return 1;
                                }
                                *update_size = 0;
                                size += HAMT_COLLISION_NODE_SZ(n);
                                ESTACK_PUSH3(*sp, slot, 0, node);
                                goto unroll;
                            }
                            if (c < 0)
                                break;
                        }
                        if (is_update) {
                            return 0;
                        }
                        size += HAMT_COLLISION_NODE_SZ(n+1);
                        ESTACK_PUSH3(*sp, slot, 1, node);
                        goto unroll;
		}
		break;
	    default:
		erts_exit(ERTS_ERROR_EXIT, "bad primary tag %p\r\n", node);
		break;
	}
    }
insert_subnodes:
    if (lvl < HAMT_MAX_LEVEL) {
        clvl  = lvl;
        chx   = hashmap_restore_hash(clvl,ckey);
        do {
            ix    = hashmap_index(hx);
            cix   = hashmap_index(chx);
            if (cix != ix) {
                size += HAMT_NODE_BITMAP_SZ(2);
                ESTACK_PUSH4(*sp, cix, ix, 0, node);
                goto unroll;
            }
            ESTACK_PUSH4(*sp, 0, 1 << ix, 0, MAP_HEADER_HAMT_NODE_BITMAP(0));
            size += HAMT_NODE_BITMAP_SZ(1);
            hx    = hashmap_shift_hash(hx,lvl,key);
            chx   = hashmap_shift_hash(chx,clvl,ckey);
        } while (lvl < HAMT_MAX_LEVEL);
    }
    size += HAMT_COLLISION_NODE_SZ(2);
    ESTACK_PUSH2(*sp, 1, node);

unroll:
    *sz = size + /* res cons */ 2;
    return 1;
}

Eterm erts_hashmap_insert_up(Eterm *hp, Eterm key, Eterm value,
			     Uint update_size, ErtsEStack *sp) {
    Eterm node, *ptr, hdr;
    Eterm res;
    Eterm *nhp = NULL;
    Uint32 ix, cix, bp, hval;
    Uint slot, n;
    Eterm fake;

    res = CONS(hp, key, value); hp += 2;

    do {
	node = ESTACK_POP(*sp);
	switch(primary_tag(node)) {
            case TAG_PRIMARY_LIST: {
                const int is_collision_node = (int) ESTACK_POP(*sp);
                if (is_collision_node) {
                    nhp = hp;
                    *hp++ = MAP_HEADER_HAMT_COLLISION_NODE(2);
                    if (CMP_TERM(key, CAR(list_val(node))) < 0){
                        *hp++ = res;
                        *hp++ = node;
                    } else {
                        *hp++ = node;
                        *hp++ = res;
                    }
                    res = make_hashmap(nhp);
                    break;
                }
                ix  = (Uint32)ESTACK_POP(*sp);
		cix = (Uint32) ESTACK_POP(*sp);

		nhp   = hp;
		*hp++ = MAP_HEADER_HAMT_NODE_BITMAP((1 << ix) | (1 << cix));
		if (ix < cix) {
		    *hp++ = res;
		    *hp++ = node;
		} else {
		    *hp++ = node;
		    *hp++ = res;
		}
		res = make_hashmap(nhp);
		break;
            }
	    case TAG_PRIMARY_HEADER:
		/* subnodes, fake it */
		fake = node;
		node  = make_boxed(&fake);
	    case TAG_PRIMARY_BOXED:
		ptr = boxed_val(node);
		hdr = *ptr;
		ASSERT(is_header(hdr));

		switch(hdr & _HEADER_MAP_SUBTAG_MASK) {
		    case HAMT_SUBTAG_HEAD_ARRAY:
			slot  = (Uint) ESTACK_POP(*sp);
			nhp   = hp;
			n     = HAMT_HEAD_ARRAY_SZ - 2;
			*hp++ = MAP_HEADER_HAMT_HEAD_ARRAY; ptr++;
			*hp++ = (*ptr++) + update_size;
			while(n--) { *hp++ = *ptr++; }
			nhp[slot+2] = res;
			res = make_hashmap(nhp);
			break;
		    case HAMT_SUBTAG_NODE_BITMAP:
			slot  = (Uint)   ESTACK_POP(*sp);
			bp    = (Uint32) ESTACK_POP(*sp);
			n     = (Uint32) ESTACK_POP(*sp);
			hval  = MAP_HEADER_VAL(hdr);
			nhp   = hp;
			*hp++ = MAP_HEADER_HAMT_NODE_BITMAP(hval | bp); ptr++;

			n -= slot;
			while(slot--) { *hp++ = *ptr++; }
			*hp++ = res;
			if (hval & bp) { ptr++; n--; }
			while(n--) { *hp++ = *ptr++; }

			res = make_hashmap(nhp);
			break;
		    case HAMT_SUBTAG_HEAD_BITMAP:
			slot  = (Uint)   ESTACK_POP(*sp);
			bp    = (Uint32) ESTACK_POP(*sp);
			n     = (Uint32) ESTACK_POP(*sp);
			hval  = MAP_HEADER_VAL(hdr);
			nhp   = hp;
			*hp++ = MAP_HEADER_HAMT_HEAD_BITMAP(hval | bp); ptr++;
			*hp++ = (*ptr++) + update_size;

			n -= slot;
			while(slot--) { *hp++ = *ptr++; }
			*hp++ = res;
			if (hval & bp) { ptr++; n--; }
			while(n--) { *hp++ = *ptr++; }

			if ((hval | bp) == 0xffff) {
			    *nhp = MAP_HEADER_HAMT_HEAD_ARRAY;
			}
			res = make_hashmap(nhp);
			break;
                    default: {
                        int is_insert;
                        ERTS_ASSERT(is_arity_value(hdr));
                        n = arityval(hdr);
                        ASSERT(n >= 2);
                        is_insert = (int)  ESTACK_POP(*sp);
                        slot      = (Uint) ESTACK_POP(*sp);
                        nhp = hp;
                        n += is_insert;
                        *hp++ = MAP_HEADER_HAMT_COLLISION_NODE(n); ptr++;
                        ix = 0;
                        while (ix++ < slot)
                            *hp++ = *ptr++;
                        *hp++ = res;
                        if (!is_insert)
                            ptr++;
                        while (ix++ < n)
                            *hp++ = *ptr++;
                        res = make_hashmap(nhp);
                        break;
                    }
		}
		break;
	    default:
		erts_exit(ERTS_ERROR_EXIT, "bad primary tag %x\r\n", primary_tag(node));
		break;
	}

    } while(!ESTACK_ISEMPTY(*sp));

    UnUseTmpHeapNoproc(1);
    return res;
}

/* **Important Note**
 *
 * A yielding version of this function is generated with YCF. This
 * means that the code has to follow some restrictions. See note about
 * YCF near the top of the file for more information.
 */
#ifdef INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS
static Eterm hashmap_keys(Process* p, Eterm node) {
    Eterm stack_default_wstack[16];
    ErtsWStack stack;
    hashmap_head_t* root;
    Eterm *hp;
    Eterm *kv;
    Eterm res = NIL;
#if DEF_WSTACK_SIZE != (16)
#error "The macro DEF_WSTACK_SIZE has changed from 16 (need to change constant above)"
    /* We cannot use "UWord stack_default_wstack[DEF_WSTACK_SIZE];"
       because macros are not expanded before the code is passed to
       YCF, and YCF needs to know the size of the array */
#endif
    stack = WSTACK_DEFAULT_VALUE(stack_default_wstack, ERTS_ALC_T_MAP_TRAP);
    YCF_SPECIAL_CODE_START(ON_SAVE_YIELD_STATE);
    {
        ENSURE_WSTACK_HEAP_STACK_ARRAY(stack, stack_default_wstack);
    }
    YCF_SPECIAL_CODE_END();
    YCF_SPECIAL_CODE_START(ON_DESTROY_STATE);
    {
        DESTROY_WSTACK_EXPLICIT_DEFAULT_ARRAY(stack, stack_default_wstack);
    }
    YCF_SPECIAL_CODE_END();
    root = (hashmap_head_t*) boxed_val(node);
    hp  = HAlloc(p, root->size * 2);
    hashmap_iterator_init(&stack, node, 0);
    while ((kv=hashmap_iterator_next(&stack)) != NULL) {
	res = CONS(hp, CAR(kv), res);
	hp += 2;
    }
    DESTROY_WSTACK_EXPLICIT_DEFAULT_ARRAY(stack, stack_default_wstack);
    return res;
}
#endif /* INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS */

/* **Important Note**
 *
 * A yielding version of this function is generated with YCF. This
 * means that the code has to follow some restrictions. See note about
 * YCF near the top of the file for more information.
 */
#ifdef INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS
static Eterm hashmap_values(Process* p, Eterm node) {
    Eterm stack_default_wstack[16];
    ErtsWStack stack;
    hashmap_head_t* root;
    Eterm *hp;
    Eterm *kv;
    Eterm res = NIL;
#if DEF_WSTACK_SIZE != (16)
#error "The macro DEF_WSTACK_SIZE has changed from 16 (need to change constant above)"
    /* We cannot use "UWord stack_default_wstack[DEF_WSTACK_SIZE];"
       because macros are not expanded before the code is passed to
       YCF, and YCF needs to know the size of the array */
#endif
    stack = WSTACK_DEFAULT_VALUE(stack_default_wstack, ERTS_ALC_T_MAP_TRAP);
    YCF_SPECIAL_CODE_START(ON_SAVE_YIELD_STATE);
    {
        ENSURE_WSTACK_HEAP_STACK_ARRAY(stack, stack_default_wstack);
    }
    YCF_SPECIAL_CODE_END();
    YCF_SPECIAL_CODE_START(ON_DESTROY_STATE);
    {
        DESTROY_WSTACK_EXPLICIT_DEFAULT_ARRAY(stack, stack_default_wstack);
    }
    YCF_SPECIAL_CODE_END();

    root = (hashmap_head_t*) boxed_val(node);
    hp  = HAlloc(p, root->size * 2);
    hashmap_iterator_init(&stack, node, 0);
    while ((kv=hashmap_iterator_next(&stack)) != NULL) {
	res = CONS(hp, CDR(kv), res);
	hp += 2;
    }
    DESTROY_WSTACK_EXPLICIT_DEFAULT_ARRAY(stack, stack_default_wstack);
    return res;
}
#endif /* INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS */

static Eterm hashmap_delete(Process *p, erts_ihash_t hx, Eterm key,
                            Eterm map, Eterm *value) {
    Eterm *hp = NULL, *nhp = NULL, *hp_end = NULL;
    Eterm *ptr;
    Eterm hdr, res = map, node = map;
    Uint32 ix, bp, hval;
    Uint slot, lvl = 0;
    Uint size = 0, n = 0;
    DECLARE_ESTACK(stack);

    for (;;) {
	switch(primary_tag(node)) {
	    case TAG_PRIMARY_LIST:
		if (EQ(CAR(list_val(node)), key)) {
                    if (value) {
                        *value = CDR(list_val(node));
                    }
		    goto unroll;
		}
                res = THE_NON_VALUE;
		goto not_found;
	    case TAG_PRIMARY_BOXED:
		ptr = boxed_val(node);
		hdr = *ptr;
		ASSERT(is_header(hdr));

		switch(hdr & _HEADER_MAP_SUBTAG_MASK) {
		    case HAMT_SUBTAG_HEAD_ARRAY:
			ix    = hashmap_index(hx);
			hx    = hashmap_shift_hash(hx,lvl,key);
			size += HAMT_HEAD_ARRAY_SZ;
			ESTACK_PUSH2(stack, ix, node);
			node  = ptr[ix+2];
			break;
		    case HAMT_SUBTAG_NODE_BITMAP:
			hval = MAP_HEADER_VAL(hdr);
			ix   = hashmap_index(hx);
			bp   = 1 << ix;
                        if (hval == 0xffff) {
                            slot = ix;
                            n = 16;
                        } else if (bp & hval) {
                            slot = hashmap_bitcount(hval & (bp - 1));
                            n    = hashmap_bitcount(hval);
                        } else {
                            /* not occupied */
                            res = THE_NON_VALUE;
                            goto not_found;
                        }

			ESTACK_PUSH4(stack, n, bp, slot, node);

                        hx    = hashmap_shift_hash(hx,lvl,key);
                        node  = ptr[slot+1];
                        ASSERT(HAMT_NODE_BITMAP_SZ(n) <= 17);
                        size += HAMT_NODE_BITMAP_SZ(n);
                        break;

		    case HAMT_SUBTAG_HEAD_BITMAP:
			hval = MAP_HEADER_VAL(hdr);
			ix   = hashmap_index(hx);
			bp   = 1 << ix;
			slot = hashmap_bitcount(hval & (bp - 1));
			n    = hashmap_bitcount(hval);

			ESTACK_PUSH4(stack, n, bp, slot, node);

			/* occupied */
			if (bp & hval) {
			    hx    = hashmap_shift_hash(hx,lvl,key);
			    node  = ptr[slot+2];
			    ASSERT(HAMT_HEAD_BITMAP_SZ(n) <= 18);
			    size += HAMT_HEAD_BITMAP_SZ(n);
			    break;
			}
			/* not occupied */
                        res = THE_NON_VALUE;
			goto not_found;
                    default: /* collision node */
                        ERTS_ASSERT(is_arity_value(hdr));
                        ASSERT(lvl == HAMT_MAX_LEVEL);
                        n = arityval(hdr);
                        ASSERT(n >= 2);
                        for (slot = 0; slot < n; slot++) {
                            Eterm* kv = list_val(ptr[1+slot]);
                            if (EQ(key, CAR(kv))) {
                                if (value)
                                    *value = CDR(kv);
                                ESTACK_PUSH2(stack, slot, node);
                                size += HAMT_COLLISION_NODE_SZ(n);
                                goto unroll;
                            }
                        }
                        res = THE_NON_VALUE;
                        goto not_found;
		}
		break;
	    default:
		erts_exit(ERTS_ERROR_EXIT, "bad primary tag %p\r\n", node);
		break;
	}
    }

unroll:
    /* the size is bounded and at least one less than the previous size */
    size -= 1;
    n     = hashmap_size(map) - 1;

    if (n <= MAP_SMALL_MAP_LIMIT) {
	DECLARE_WSTACK(wstack);
	Eterm *kv, *ks, *vs;
	flatmap_t *mp;
	Eterm keys;

	DESTROY_ESTACK(stack);

	/* build flat structure */
	hp    = HAlloc(p, 3 + 1 + (2 * n));
	keys  = make_tuple(hp);
	*hp++ = make_arityval(n);
	ks    = hp;
	hp   += n;
	mp    = (flatmap_t*)hp;
	hp   += MAP_HEADER_FLATMAP_SZ;
	vs    = hp;

	mp->thing_word = MAP_HEADER_FLATMAP;
	mp->size = n;
	mp->keys = keys;

	hashmap_iterator_init(&wstack, map, 0);

	while ((kv=hashmap_iterator_next(&wstack)) != NULL) {
	    if (EQ(CAR(kv),key))
		continue;
	    *ks++ = CAR(kv);
	    *vs++ = CDR(kv);
	}

	/* it cannot have multiple keys */
	erts_validate_and_sort_flatmap(mp);

	DESTROY_WSTACK(wstack);
        UnUseTmpHeapNoproc(2);
	return make_flatmap(mp);
    }

    ASSERT(!ESTACK_ISEMPTY(stack));

    hp     = HAlloc(p, size);
    hp_end = hp + size;
    res    = THE_NON_VALUE;

    do {
	node = ESTACK_POP(stack);

	/* all nodes are things */
	ptr = boxed_val(node);
	hdr = *ptr;
	ASSERT(is_header(hdr));

	switch(hdr & _HEADER_MAP_SUBTAG_MASK) {
	    case HAMT_SUBTAG_HEAD_ARRAY:
		ix  = (Uint) ESTACK_POP(stack);
		nhp = hp;
		if (res == THE_NON_VALUE) {
		    n     = 16;
		    n    -= ix;
		    *hp++ = MAP_HEADER_HAMT_HEAD_BITMAP(0xffff ^ (1 << ix)); ptr++;
		    *hp++ = (*ptr++) - 1;
		    while(ix--) { *hp++ = *ptr++; }
		    ptr++; n--;
		    while(n--) { *hp++ = *ptr++; }
		    res = make_hashmap(nhp);
		} else {
		    n     = 16;
		    *hp++ = MAP_HEADER_HAMT_HEAD_ARRAY; ptr++;
		    *hp++ = (*ptr++) - 1;
		    while(n--) { *hp++ = *ptr++; }
		    nhp[ix+2] = res;
		    res = make_hashmap(nhp);
		}
		break;
	    case HAMT_SUBTAG_NODE_BITMAP:
		slot = (Uint)   ESTACK_POP(stack);
		bp   = (Uint32) ESTACK_POP(stack);
		n    = (Uint32) ESTACK_POP(stack);
		nhp  = hp;

		/* bitmap change matrix
		 * res | none    leaf    bitmap
		 * ----------------------------
		 * n=1 | remove  remove  keep
		 * n=2 | other   keep    keep
		 * n>2 | shrink  keep    keep
		 *
		 * other: (remember, n is 2)
		 *   shrink if the other bitmap value is a bitmap node
		 *   remove if the other bitmap value is a leaf
		 *
		 * remove:
		 *   this bitmap node is removed, res is moved up in tree (could be none)
		 *   this is a special case of shrink
		 *
		 * keep:
		 *   the current path index is still used down in the tree, need to keep it
		 *   copy as usual with the updated res
		 *
		 * shrink:
		 *   the current path index is no longer used down in the tree, remove it (shrink)
		 */
		if (res == THE_NON_VALUE) {
		    if (n == 1) {
			break;
		    } else if (n == 2) {
			if (slot == 0) {
			    ix = 2; /* off by one 'cause hdr */
			} else {
			    ix = 1; /* off by one 'cause hdr */
			}
			if (primary_tag(ptr[ix]) == TAG_PRIMARY_LIST) {
			    res = ptr[ix];
			} else {
			    hval  = MAP_HEADER_VAL(hdr);
			    *hp++ = MAP_HEADER_HAMT_NODE_BITMAP(hval ^ bp);
			    *hp++ = ptr[ix];
			    res = make_hashmap(nhp);
			}
		    } else {
			/* n > 2 */
			hval  = MAP_HEADER_VAL(hdr);
			*hp++ = MAP_HEADER_HAMT_NODE_BITMAP(hval ^ bp); ptr++;
			n    -= slot;
			while(slot--) { *hp++ = *ptr++; }
			ptr++; n--;
			while(n--) { *hp++ = *ptr++; }
			res = make_hashmap(nhp);
		    }
		} else if (primary_tag(res) == TAG_PRIMARY_LIST && n == 1) {
		    break;
		} else {
		    /* res is bitmap or leaf && n > 1, keep */
		    n    -= slot;
		    *hp++ = *ptr++;
		    while(slot--) { *hp++ = *ptr++; }
		    *hp++ = res;
		    ptr++; n--;
		    while(n--) { *hp++ = *ptr++; }
		    res = make_hashmap(nhp);
		}
		break;
	    case HAMT_SUBTAG_HEAD_BITMAP:
		slot = (Uint)   ESTACK_POP(stack);
		bp   = (Uint32) ESTACK_POP(stack);
		n    = (Uint32) ESTACK_POP(stack);
		nhp  = hp;

		if (res != THE_NON_VALUE) {
		    *hp++ = *ptr++;
		    *hp++ = (*ptr++) - 1;
		    n    -= slot;
		    while(slot--) { *hp++ = *ptr++; }
		    *hp++ = res;
		    ptr++; n--;
		    while(n--) { *hp++ = *ptr++; }
		} else {
		    hval  = MAP_HEADER_VAL(hdr);
		    *hp++ = MAP_HEADER_HAMT_HEAD_BITMAP(hval ^ bp); ptr++;
		    *hp++ = (*ptr++) - 1;
		    n    -= slot;
		    while(slot--) { *hp++ = *ptr++; }
		    ptr++; n--;
		    while(n--) { *hp++ = *ptr++; }
		}
		res = make_hashmap(nhp);
		break;
            default: /* collision node */
                ERTS_ASSERT(is_arity_value(hdr));
                n = arityval(hdr);
                ASSERT(n >= 2);
                slot = (Uint) ESTACK_POP(stack);
                ASSERT(slot < n);
                if (n > 2) { /* Shrink collision node */
                    nhp = hp;
                    *hp++ = MAP_HEADER_HAMT_COLLISION_NODE(n-1); ptr++;
                    n -= slot + 1;
                    while (slot--) { *hp++ = *ptr++; }
                    ptr++;
                    while(n--) { *hp++ = *ptr++; }
                    res = make_hashmap(nhp);
                }
                else { /* Collapse collision node */
                    ASSERT(res == THE_NON_VALUE);
                    res = ptr[1 + (1-slot)];
                }
		break;
	}
    } while(!ESTACK_ISEMPTY(stack));
    HRelease(p, hp_end, hp);
not_found:
    DESTROY_ESTACK(stack);
    UnUseTmpHeapNoproc(2);
    return res;
}


int erts_validate_and_sort_flatmap(flatmap_t* mp)
{
    Eterm *ks  = flatmap_get_keys(mp);
    Eterm *vs  = flatmap_get_values(mp);
    Uint   sz  = flatmap_get_size(mp);
    Uint   ix,jx;
    Eterm  tmp;
    Sint c;

    /* sort */

    for (ix = 1; ix < sz; ix++) {
	jx = ix;
	while( jx > 0 && (c = erts_cmp_flatmap_keys(ks[jx],ks[jx-1])) <= 0 ) {
	    /* identical key -> error */
	    if (c == 0) return 0;

	    tmp = ks[jx];
	    ks[jx] = ks[jx - 1];
	    ks[jx - 1] = tmp;

	    tmp = vs[jx];
	    vs[jx] = vs[jx - 1];
	    vs[jx - 1] = tmp;

	    jx--;
	}
    }
    return 1;
}

void erts_usort_flatmap(flatmap_t* mp)
{
    Eterm *ks  = flatmap_get_keys(mp);
    Eterm *vs  = flatmap_get_values(mp);
    Uint   sz  = flatmap_get_size(mp);
    Uint   ix,jx;
    Eterm  tmp;
    Sint c;

    /* sort and shrink */

    for (ix = 1; ix < sz; ix++) {
	jx = ix;
	while( jx > 0 && (c = erts_cmp_flatmap_keys(ks[jx],ks[jx-1])) <= 0 ) {
	    /* identical key -> remove it */
	    if (c == 0) {
                sys_memmove(ks+jx-1,ks+jx,(sz-ix)*sizeof(Eterm));
                sys_memmove(vs+jx-1,vs+jx,(sz-ix)*sizeof(Eterm));
                sz--;
                ix--;
                break;
            }

	    tmp = ks[jx];
	    ks[jx] = ks[jx - 1];
	    ks[jx - 1] = tmp;

	    tmp = vs[jx];
	    vs[jx] = vs[jx - 1];
	    vs[jx - 1] = tmp;

	    jx--;
	}
    }
    mp->size = sz;
    *tuple_val(mp->keys) = make_arityval(sz);
}

#if 0 /* Can't get myself to remove this beautiful piece of code
         for probabilistic overestimation of nr of nodes in a hashmap */

/* Really rough estimate of sqrt(x)
 * Guaranteed not to be less than sqrt(x)
 */
static int int_sqrt_ceiling(Uint x)
{
    int n;

    if (x <= 2)
	return x;

    n = erts_fit_in_bits_uint(x-1);
    if (n & 1) {
	/* Calc: sqrt(2^n) = 2^(n/2) * sqrt(2) ~= 2^(n/2) * 3 / 2 */
	return (1 << (n/2 - 1)) * 3;
    }
    else {
	/* Calc: sqrt(2^n) = 2^(n/2) */
	return 1 << (n / 2);
    }
}

/* May not be enough if hashing is broken (not uniform)
 * or if hell freezes over.
 */
Uint hashmap_overestimated_node_count(Uint k)
{
    /* k is nr of key-value pairs.
       N(k) is expected nr of nodes in hamt.

       Observation:
       For uniformly distributed hash values, average of N varies between
       0.3*k and 0.4*k (with a beautiful sine curve)
       and standard deviation of N is about sqrt(k)/3.

       Assuming normal probability distribution, we overestimate nr of nodes
       by 15 std.devs above the average, which gives a probability for overrun
       less than 1.0e-49 (same magnitude as a git SHA1 collision).
     */
    return 2*k/5 + 1 + (15/3)*int_sqrt_ceiling(k);
}
#endif

BIF_RETTYPE erts_debug_map_info_1(BIF_ALIST_1) {
    if (is_hashmap(BIF_ARG_1)) {
	BIF_RET(hashmap_info(BIF_P,BIF_ARG_1));
    } else if (is_flatmap(BIF_ARG_1)) {
	BIF_ERROR(BIF_P, BADARG);
    } else {
	BIF_P->fvalue = BIF_ARG_1;
	BIF_ERROR(BIF_P, BADMAP);
    }
}

/*
 * erts_internal:map_to_tuple_keys/1
 *
 * Used in erts_debug:size/1
 */

BIF_RETTYPE erts_internal_map_to_tuple_keys_1(BIF_ALIST_1) {
    if (is_flatmap(BIF_ARG_1)) {
	flatmap_t *mp = (flatmap_t*)flatmap_val(BIF_ARG_1);
	BIF_RET(mp->keys);
    } else if (is_hashmap(BIF_ARG_1)) {
	BIF_ERROR(BIF_P, BADARG);
    } else {
	BIF_P->fvalue = BIF_ARG_1;
	BIF_ERROR(BIF_P, BADMAP);
    }
}

/*
 * erts_internal:term_type/1
 *
 * Used in erts_debug:size/1
 */

BIF_RETTYPE erts_internal_term_type_1(BIF_ALIST_1) {
    Eterm obj = BIF_ARG_1;
    switch (primary_tag(obj)) {
        case TAG_PRIMARY_LIST:
            BIF_RET(ERTS_MAKE_AM("list"));
        case TAG_PRIMARY_BOXED: {
            Eterm hdr = *boxed_val(obj);
            ASSERT(is_header(hdr));
            switch (hdr & _TAG_HEADER_MASK) {
                case ARITYVAL_SUBTAG:
                    BIF_RET(ERTS_MAKE_AM("tuple"));
                case FUN_SUBTAG:
                    {
                        ErlFunThing *funp = (ErlFunThing *)fun_val(obj);

                        if (is_local_fun(funp)) {
                            BIF_RET(ERTS_MAKE_AM("fun"));
                        } else {
                            BIF_RET(ERTS_MAKE_AM("export"));
                        }
                    }
                case MAP_SUBTAG:
                    switch (MAP_HEADER_TYPE(hdr)) {
                        case MAP_HEADER_TAG_FLATMAP_HEAD :
                            BIF_RET(ERTS_MAKE_AM("flatmap"));
                        case MAP_HEADER_TAG_HAMT_HEAD_BITMAP :
                        case MAP_HEADER_TAG_HAMT_HEAD_ARRAY :
                            BIF_RET(ERTS_MAKE_AM("hashmap"));
                        case MAP_HEADER_TAG_HAMT_NODE_BITMAP :
                            BIF_RET(ERTS_MAKE_AM("hashmap_node"));
                        default:
                            erts_exit(ERTS_ABORT_EXIT, "term_type: bad map header type %d\n", MAP_HEADER_TYPE(hdr));
                    }
                case BIN_REF_SUBTAG:
                    BIF_RET(ERTS_MAKE_AM("refc_binary"));
                case HEAP_BITS_SUBTAG:
                    BIF_RET(ERTS_MAKE_AM("heap_binary"));
                case SUB_BITS_SUBTAG:
                    BIF_RET(ERTS_MAKE_AM("sub_binary"));
                case POS_BIG_SUBTAG:
                case NEG_BIG_SUBTAG:
                    BIF_RET(ERTS_MAKE_AM("bignum"));
                case REF_SUBTAG:
                    BIF_RET(ERTS_MAKE_AM("reference"));
                case EXTERNAL_REF_SUBTAG:
                    BIF_RET(ERTS_MAKE_AM("external_reference"));
                case EXTERNAL_PID_SUBTAG:
                    BIF_RET(ERTS_MAKE_AM("external_pid"));
                case EXTERNAL_PORT_SUBTAG:
                    BIF_RET(ERTS_MAKE_AM("external_port"));
                case FLOAT_SUBTAG:
                    BIF_RET(ERTS_MAKE_AM("hfloat"));
                default:
                    erts_exit(ERTS_ABORT_EXIT, "term_type: Invalid tag (0x%X)\n", hdr);
            }
        }
        case TAG_PRIMARY_IMMED1:
            switch (obj & _TAG_IMMED1_MASK) {
                case _TAG_IMMED1_SMALL:
                    BIF_RET(ERTS_MAKE_AM("fixnum"));
                case _TAG_IMMED1_PID:
                    BIF_RET(ERTS_MAKE_AM("pid"));
                case _TAG_IMMED1_PORT:
                    BIF_RET(ERTS_MAKE_AM("port"));
                case _TAG_IMMED1_IMMED2:
                    switch (obj & _TAG_IMMED2_MASK) {
                        case _TAG_IMMED2_ATOM:
                            BIF_RET(ERTS_MAKE_AM("atom"));
                        case _TAG_IMMED2_CATCH:
                            BIF_RET(ERTS_MAKE_AM("catch"));
                        case _TAG_IMMED2_NIL:
                            BIF_RET(ERTS_MAKE_AM("nil"));
                        default:
                            erts_exit(ERTS_ABORT_EXIT, "term_type: Invalid tag (0x%X)\n", obj);
                    }
                default:
                    erts_exit(ERTS_ABORT_EXIT, "term_type: Invalid tag (0x%X)\n", obj);
            }
        default:
            erts_exit(ERTS_ABORT_EXIT, "term_type: Invalid tag (0x%X)\n", obj);
    }
}

/*
 * erts_internal:map_hashmap_children/1
 *
 * Used in erts_debug:size/1
 */

BIF_RETTYPE erts_internal_map_hashmap_children_1(BIF_ALIST_1) {
    if (is_map(BIF_ARG_1)) {
        Eterm node = BIF_ARG_1;
        Eterm *ptr, hdr, *hp, res = NIL;
        Uint  sz = 0;
        ptr = boxed_val(node);
        hdr = *ptr;
        ASSERT(is_header(hdr));

        switch(hdr & _HEADER_MAP_SUBTAG_MASK) {
            case HAMT_SUBTAG_HEAD_FLATMAP:
                BIF_ERROR(BIF_P, BADARG);
            case HAMT_SUBTAG_HEAD_BITMAP:
                ptr++;
            case HAMT_SUBTAG_NODE_BITMAP:
                ptr++;
                sz = hashmap_bitcount(MAP_HEADER_VAL(hdr));
                break;
            case HAMT_SUBTAG_HEAD_ARRAY:
                sz   = 16;
                ptr += 2;
                break;
            default: /* collision node */
                ERTS_ASSERT(is_arity_value(hdr));
                sz = arityval(hdr);
                ASSERT(sz >= 2);
                break;
        }
        ASSERT(sz < 17);
        hp = HAlloc(BIF_P, 2*sz);
        while(sz--) { res = CONS(hp, *ptr++, res); hp += 2; }
        BIF_RET(res);
    }
    BIF_P->fvalue = BIF_ARG_1;
    BIF_ERROR(BIF_P, BADMAP);
}


static Eterm hashmap_info(Process *p, Eterm node) {
    Eterm *hp;
    Eterm res = NIL, info = NIL;
    Eterm *ptr, tup, hdr;
    Uint sz;
    DECL_AM(depth);
    DECL_AM(leafs);
    DECL_AM(bitmaps);
    DECL_AM(arrays);
    DECL_AM(collisions);
    Uint nleaf=0, nbitmap=0, narray=0, ncollision = 0;
    Uint bitmap_usage[16];
    Uint collision_usage[16];
    Uint leaf_usage[HAMT_MAX_LEVEL + 2];
    Uint max_depth = 0, clvl;
    DECLARE_ESTACK(stack);

    sys_memzero(bitmap_usage, sizeof(bitmap_usage));
    sys_memzero(collision_usage, sizeof(collision_usage));
    sys_memzero(leaf_usage, sizeof(leaf_usage));

    ptr = boxed_val(node);
    ESTACK_PUSH(stack, 0);
    ESTACK_PUSH(stack, node);
    do {
	node = ESTACK_POP(stack);
	clvl = ESTACK_POP(stack);
	switch(primary_tag(node)) {
	    case TAG_PRIMARY_LIST:
		nleaf++;
		leaf_usage[clvl] += 1;
		break;
	    case TAG_PRIMARY_BOXED:
		ptr = boxed_val(node);
		hdr = *ptr;
		ASSERT(is_header(hdr));
		switch(hdr & _HEADER_MAP_SUBTAG_MASK) {
		    case HAMT_SUBTAG_NODE_BITMAP:
			nbitmap++;
			sz = hashmap_bitcount(MAP_HEADER_VAL(hdr));
			ASSERT(sz < 17);
			bitmap_usage[sz-1] += 1;
			break;
		    case HAMT_SUBTAG_HEAD_BITMAP:
			nbitmap++;
			sz = hashmap_bitcount(MAP_HEADER_VAL(hdr));
			bitmap_usage[sz-1] += 1;
                        ptr++;
			break;
		    case HAMT_SUBTAG_HEAD_ARRAY:
			narray++;
			sz = 16;
                        ptr++;
			break;
                    default: /* collision node */
                        ERTS_ASSERT(is_arity_value(hdr));
                        ASSERT(clvl == HAMT_MAX_LEVEL);
                        ncollision++;
                        sz = arityval(hdr);
                        ASSERT(sz >= 2);
                        collision_usage[(sz > 16 ? 16 : sz) - 1] += 1;
                        break;
		}
                ASSERT(sz >= 1);
                clvl++;
                ASSERT(clvl <= HAMT_MAX_LEVEL+1);
                if (max_depth < clvl)
                    max_depth = clvl;
                while(sz--) {
                    ESTACK_PUSH(stack, clvl);
                    ESTACK_PUSH(stack, ptr[sz+1]);
                }
	}
    } while(!ESTACK_ISEMPTY(stack));


    /* size */
    sz = 0;
    hashmap_bld_tuple_uint(NULL, &sz, HAMT_MAX_LEVEL+2, leaf_usage);
    hashmap_bld_tuple_uint(NULL, &sz, 16, bitmap_usage);
    hashmap_bld_tuple_uint(NULL, &sz, 16, collision_usage);

    /* alloc */
    hp   = HAlloc(p, 2+3 + 4*(2+4) + sz);

    info = hashmap_bld_tuple_uint(&hp, NULL, HAMT_MAX_LEVEL+2, leaf_usage);
    tup  = TUPLE3(hp, AM_leafs, make_small(nleaf),info); hp += 4;
    res  = CONS(hp, tup, res); hp += 2;

    info = hashmap_bld_tuple_uint(&hp,NULL,16,bitmap_usage);
    tup  = TUPLE3(hp, AM_bitmaps, make_small(nbitmap), info); hp += 4;
    res  = CONS(hp, tup, res); hp += 2;

    info = hashmap_bld_tuple_uint(&hp, NULL, 16, collision_usage);
    tup  = TUPLE3(hp, AM_collisions, make_small(ncollision), info); hp += 4;
    res  = CONS(hp, tup, res); hp += 2;

    tup  = TUPLE3(hp, AM_arrays, make_small(narray),NIL); hp += 4;
    res  = CONS(hp, tup, res); hp += 2;

    tup  = TUPLE2(hp, AM_depth, make_small(max_depth)); hp += 3;
    res  = CONS(hp, tup, res); hp += 2;

    DESTROY_ESTACK(stack);
    ERTS_HOLE_CHECK(p);
    return res;
}

static Eterm hashmap_bld_tuple_uint(Uint **hpp, Uint *szp, Uint n, Uint nums[]) {
    Eterm res = THE_NON_VALUE;
    Eterm *ts = (Eterm *)erts_alloc(ERTS_ALC_T_TMP, n * sizeof(Eterm));
    Uint i;

    for (i = 0; i < n; i++) {
	ts[i] = erts_bld_uint(hpp, szp, nums[i]);
    }
    res = erts_bld_tuplev(hpp, szp, n, ts);
    erts_free(ERTS_ALC_T_TMP, (void *) ts);
    return res;
}


/**
 * In hashmap the Path is a bit pattern that describes
 * which slot we should traverse in each hashmap node.
 * Since each hashmap node can only be up to 16 elements
 * large we use 4 bits per level in the path.
 *
 * So a Path with value 0x210 will first get the 0:th
 * slot in the head node, and then the 1:st slot in the
 * resulting node and then finally the 2:st slot in the
 * node beneath. If that slot is not a leaf, then the path
 * continues down the 0:th slot until it finds a leaf.
 *
 * Collision nodes may (theoretically and in debug) have more
 * than 16 elements. To not complicate the 4-bit path format
 * we avoid yielding in collision nodes.
 *
 * Once the leaf has been found, the return value is created
 * by traversing the tree using the stack that was built
 * when searching for the first leaf to return.
 *
 * The index can become a bignum, which complicates the code
 * a bit. However it should be very rare that this happens
 * even on a 32bit system as you would need a tree of depth
 * 7 or more.
 *
 * If the number of elements remaining in the map is greater
 * than how many we want to return, we build a new Path, using
 * the stack, that points to the next leaf.
 *
 * The third argument to this function controls how the data
 * is returned.
 *
 * iterator: The key-value associations are to be used by
 *           maps:iterator. The return has this format:
 *             {K1,V1,{K2,V2,none | [Path | Map]}}
 *           this makes the maps:next function very simple
 *           and performant.
 *
 * list(): The key-value associations are to be used by
 *         maps:to_list. The return has this format:
 *             [Path, Map | [{K1,V1},{K2,V2} | BIF_ARG_3]]
 *                 or if no more associations remain
 *             [{K1,V1},{K2,V2} | BIF_ARG_3]
 */

#define PATH_ELEM_SIZE 4
#define PATH_ELEM_MASK 0xf
#define PATH_ELEM(PATH) ((PATH) & PATH_ELEM_MASK)
#define PATH_ELEMS_PER_DIGIT (sizeof(ErtsDigit) * 8 / PATH_ELEM_SIZE)

BIF_RETTYPE erts_internal_map_next_3(BIF_ALIST_3) {

    Eterm path, map;
    enum { iterator, list } type;

    path = BIF_ARG_1;
    map  = BIF_ARG_2;

    if (!is_map(map))
        BIF_ERROR(BIF_P, BADARG);

    if (BIF_ARG_3 == am_iterator) {
        type = iterator;
    } else if (is_nil(BIF_ARG_3) || is_list(BIF_ARG_3)) {
        type = list;
    } else {
        BIF_ERROR(BIF_P, BADARG);
    }

    /* Handle an ordered iterator. */
    if (type == iterator && (is_list(path) || is_nil(path))) {
#ifdef DEBUG
#define ORDERED_ITER_FACTOR 200
#else
#define ORDERED_ITER_FACTOR 32
#endif
        int orig_elems = MAX(1, ERTS_BIF_REDS_LEFT(BIF_P) / ORDERED_ITER_FACTOR);
        int elems = orig_elems;
        Uint needed = 4 * elems + 2;
        Eterm *hp = HAlloc(BIF_P, needed);
        Eterm *hp_end = hp + needed;
        Eterm result = am_none;
        Eterm *patch_ptr = &result;

        while (is_list(path) && elems > 0) {
            Eterm *lst = list_val(path);
            Eterm key = CAR(lst);
            Eterm res = make_tuple(hp);
            const Eterm *value = erts_maps_get(key, map);
            if (!value) {
            ordered_badarg:
                HRelease(BIF_P, hp_end, hp);
                BIF_ERROR(BIF_P, BADARG);
            }
            hp[0] = make_arityval(3);
            hp[1] = key;
            hp[2] = *value;
            *patch_ptr = res;
            patch_ptr = &hp[3];
            hp += 4;
            path = CDR(lst);
            elems--;
        }

        if (is_list(path)) {
            Eterm next = CONS(hp, path, map);
            hp += 2;
            ASSERT(hp == hp_end);
            *patch_ptr = next;
            BUMP_ALL_REDS(BIF_P);
            ASSERT(is_tuple(result));
            BIF_RET(result);
        } else if (is_nil(path)) {
            HRelease(BIF_P, hp_end, hp);
            *patch_ptr = am_none;
            BUMP_REDS(BIF_P, ORDERED_ITER_FACTOR * (orig_elems - elems));
            ASSERT(result == am_none || is_tuple(result));
            BIF_RET(result);
        } else {
            goto ordered_badarg;
        }
    }

    if (is_flatmap(map)) {
        Uint n;
	Eterm *ks,*vs, res, *hp;
	flatmap_t *mp = (flatmap_t*)flatmap_val(map);

	ks  = flatmap_get_keys(mp);
	vs  = flatmap_get_values(mp);
	n   = flatmap_get_size(mp);

        if (!is_small(BIF_ARG_1) || n < unsigned_val(BIF_ARG_1))
            BIF_ERROR(BIF_P, BADARG);

        if (type == iterator) {
            hp  = HAlloc(BIF_P, 4 * n);
            res = am_none;

            while(n--) {
                res = TUPLE3(hp, ks[n], vs[n], res); hp += 4;
            }
        } else {
            hp  = HAlloc(BIF_P, (2 + 3) * n);
            res = BIF_ARG_3;

            while(n--) {
                Eterm tup = TUPLE2(hp, ks[n], vs[n]); hp += 3;
                res = CONS(hp, tup, res); hp += 2;
            }
        }

	BIF_RET(res);
    } else {
        Uint curr_path;
        Uint path_length = 0;
        Uint *path_rest = NULL;
        int i, elems, orig_elems;
        Eterm node = map, res, *patch_ptr = NULL;
        Eterm *hp = NULL;
        Eterm *hp_end;
        Eterm *ptr;
        Uint sz, words_per_elem;
        Uint idx;

        /* A stack WSTACK is used when traversing the hashmap.
         * It contains: node, idx, sz, ptr
         *
         * `node` is not really needed, but it is very nice to
         * have when debugging.
         *
         * `idx` always points to the next un-explored entry in
         * a node. If there are no more un-explored entries,
         * `idx` is equal to `sz`.
         *
         * `sz` is the number of elements in the node.
         *
         * `ptr` is a pointer to where the elements of the node begins.
         */
        DECLARE_WSTACK(stack);

        ASSERT(is_hashmap(node));

/* How many elements we return in one call depends on the number of reductions
 * that the process has left to run. In debug we return fewer elements to test
 * the Path implementation better.
 *
 * Also, when the path is 0 (i.e. for the first call) we limit the number of
 * elements to MAP_SMALL_MAP_LIMIT in order to not use a huge amount of heap
 * when only the first X associations in the hashmap was needed.
 */
#if defined(DEBUG)
#define FCALLS_ELEMS(BIF_P) ((BIF_P->fcalls / 4) & 0xF)
#else
#define FCALLS_ELEMS(BIF_P) (BIF_P->fcalls / 4)
#endif

        if (MAX(FCALLS_ELEMS(BIF_P), 1) < hashmap_size(map))
            elems = MAX(FCALLS_ELEMS(BIF_P), 1);
        else
            elems = hashmap_size(map);

#undef FCALLS_ELEMS

        if (is_small(path)) {
            curr_path = unsigned_val(path);

            if (curr_path == 0 && elems > MAP_SMALL_MAP_LIMIT) {
                elems = MAP_SMALL_MAP_LIMIT;
            }
        } else if (is_big(path)) {
            Eterm *big = big_val(path);
            if (bignum_header_is_neg(*big))
                BIF_ERROR(BIF_P, BADARG);
            path_length = BIG_ARITY(big) - 1;
            curr_path = BIG_DIGIT(big,  0);
            path_rest = BIG_V(big) + 1;
        } else {
            BIF_ERROR(BIF_P, BADARG);
        }

        /* First we look for the leaf to start at using the
           path given. While doing so, we push each map node
           and the index onto the stack to use later. */
        for (i = 1; ; i++) {
            Eterm hdr;

            ptr = hashmap_val(node);
            hdr = *ptr++;

            sz = hashmap_node_size(hdr, &ptr);

            idx = PATH_ELEM(curr_path);
            if (idx >= sz)
                goto badarg;

            if (is_list(ptr[idx])) {
                /* We have found a leaf, return it and the next X elements */
                break;
            }

            WSTACK_PUSH4(stack, node, idx+1, sz, (UWord)ptr);

            node = ptr[idx];

            curr_path >>= PATH_ELEM_SIZE;

            if (i == PATH_ELEMS_PER_DIGIT) {
                /* Switch to next bignum word if available,
                   otherwise just follow 0 path */
                i = 0;
                if (path_length) {
                    curr_path = *path_rest;
                    path_length--;
                    path_rest++;
                } else {
                    curr_path = 0;
                }
            }
        }

        if (type == iterator) {
            /*
             * Iterator uses the format {K1, V1, {K2, V2, {K3, V3, [Path | Map]}}},
             * so each element is 4 words large.
             * To make iteration order independent of input reductions
             * the KV-pairs are here built in DESTRUCTIVE non-reverse order.
             */
            words_per_elem = 4;
            patch_ptr = &res;
        } else {
            /*
             * List used the format [Path, Map, {K3,V3}, {K2,V2}, {K1,V1} | BIF_ARG_3],
             * so each element is 2+3 words large.
             * To make list order independent of input reductions
             * the KV-pairs are here built in FUNCTIONAL reverse order
             * as this is how the list as a whole is constructed.
             */
            words_per_elem = 2 + 3;
            res = BIF_ARG_3;
        }
        hp = HAlloc(BIF_P, words_per_elem * elems);
        hp_end = hp + words_per_elem * elems;

        orig_elems = elems;

        /* We traverse the hashmap and return at most `elems` elements */
        while(1) {

            if (idx == 0) {
                if (elems < sz && is_arity_value(*hashmap_val(node))) {
                    /*
                     * This is a collision node!
                     * Make sure 'elems' is large enough not to yield in the
                     * middle of it. Collision nodes may be larger than 16
                     * and that would complicate the 4-bit path format.
                     */
                    elems = sz;
                    HRelease(BIF_P, hp_end, hp);
                    hp = HAlloc(BIF_P, words_per_elem * elems);
                    hp_end = hp + words_per_elem * elems;
                }
            }
            else
                ASSERT(!is_arity_value(*hashmap_val(node)));

            while (idx < sz && elems != 0 && is_list(ptr[idx])) {
                Eterm *lst = list_val(ptr[idx]);
                if (type == iterator) {
                    *patch_ptr = make_tuple(hp);
                    hp[0] = make_arityval(3);
                    hp[1] = CAR(lst);
                    hp[2] = CDR(lst);
                    patch_ptr = &hp[3];
                    hp += 4;
                } else {
                    Eterm tup = TUPLE2(hp, CAR(lst), CDR(lst)); hp += 3;
                    res = CONS(hp, tup, res); hp += 2;
                }
                elems--;
                idx++;
            }

            ASSERT(idx == sz || !is_arity_value(*hashmap_val(node)));

            if (elems == 0) {
                if (idx < sz) {
                    /* There are more elements in this node to explore */
                    WSTACK_PUSH4(stack, node, idx+1, sz, (UWord)ptr);
                } else {
                    /* pop stack to find the next value */
                    while (!WSTACK_ISEMPTY(stack)) {
                        Eterm *ptr = (Eterm*)WSTACK_POP(stack);
                        Uint sz = (Uint)WSTACK_POP(stack);
                        Uint idx = (Uint)WSTACK_POP(stack);
                        Eterm node = (Eterm)WSTACK_POP(stack);
                        if (idx < sz) {
                            WSTACK_PUSH4(stack, node, idx+1, sz, (UWord)ptr);
                            break;
                        }
                    }
                }
                break;
            }
            else if (idx < sz) {
                Eterm hdr;
                /* Push next idx in current node */
                WSTACK_PUSH4(stack, node, idx+1, sz, (UWord)ptr);

                /* Continue with first idx in child node */
                node = ptr[idx];
                ptr = hashmap_val(ptr[idx]);
                hdr = *ptr++;
                sz = hashmap_node_size(hdr, &ptr);
                idx = 0;
            }
            else if (!WSTACK_ISEMPTY(stack)) {
                ptr = (Eterm*)WSTACK_POP(stack);
                sz = (Uint)WSTACK_POP(stack);
                idx = (Uint)WSTACK_POP(stack);
                node = (Eterm)WSTACK_POP(stack);
            }
            else {
                /* There are no more element in the hashmap */
                break;
            }
        }

        if (!WSTACK_ISEMPTY(stack)) {
            Uint depth = WSTACK_COUNT(stack) / 4 + 1;
            /* +1 because we already have the first element in curr_path */
            Eterm *path_digits = NULL;
            Uint curr_path = 0;

            /* If the path cannot fit in a small, we allocate a bignum */
            if (depth >= PATH_ELEMS_PER_DIGIT) {
                /* We need multiple ErtsDigit's to represent the path */
                int big_size = BIG_NEED_FOR_BITS(depth * PATH_ELEM_SIZE);
                hp = HAlloc(BIF_P, big_size);
                hp[0] = make_pos_bignum_header(big_size - BIG_NEED_SIZE(0));
                path_digits = hp + big_size - 1;
            }


            /* Pop the stack to create the complete path to the next leaf */
            while(!WSTACK_ISEMPTY(stack)) {
                Uint idx;

                (void)WSTACK_POP(stack);
                (void)WSTACK_POP(stack);
                idx = (Uint)WSTACK_POP(stack)-1;
                /* idx - 1 because idx in the stack is pointing to
                   the next element to fetch. */
                (void)WSTACK_POP(stack);

                depth--;
                if (depth % PATH_ELEMS_PER_DIGIT == 0) {
                    /* Switch to next bignum element */
                    path_digits[0] = curr_path;
                    path_digits--;
                    curr_path = 0;
                }

                curr_path <<= PATH_ELEM_SIZE;
                curr_path |= idx;
            }

            if (path_digits) {
                path_digits[0] = curr_path;
                path = make_big(hp);
            } else {
                /* The Uint could be too large for a small */
                path = erts_make_integer(curr_path, BIF_P);
            }

            if (type == iterator) {
                hp = HAlloc(BIF_P, 2);
                *patch_ptr = CONS(hp, path, map); hp += 2;
            } else {
                hp = HAlloc(BIF_P, 4);
                res = CONS(hp, map, res); hp += 2;
                res = CONS(hp, path, res); hp += 2;
            }
        } else {
            if (type == iterator)
                *patch_ptr = am_none;
            HRelease(BIF_P, hp_end, hp);
        }
        BIF_P->fcalls -= 4 * (orig_elems - elems);
        DESTROY_WSTACK(stack);
        BIF_RET(res);

    badarg:
        ASSERT(hp == NULL);
        DESTROY_WSTACK(stack);
        BIF_ERROR(BIF_P, BADARG);
    }
}

/* Implementation of builtin emulations */

#if defined(ARCH_64) && (ERTS_AT_LEAST_GCC_VSN__(5, 1, 0) ||                  \
                         __has_builtin(__builtin_bswap64))
#  define hashmap_byte_swap(N) __builtin_bswap64((Uint64)(N))
#elif defined(ARCH_32) && (ERTS_AT_LEAST_GCC_VSN__(5, 1, 0) ||                \
                         __has_builtin(__builtin_bswap32))
#  define hashmap_byte_swap(N) __builtin_bswap32((Uint32)(N))
#elif defined(_MSC_VER) && _MSC_VER >= 1900
/* UCRT intrinsics are spread throughout the ordinary C headers, strangely
 * enough. */
#  include <stdlib.h>

#  if defined(ARCH_64)
#    define hashmap_byte_swap(N) _byteswap_uint64((Uint64)(N))
#  elif defined(ARCH_32)
#    define hashmap_byte_swap(N) _byteswap_ulong((Uint32)(N))
#  endif
#else
/* No byte-swap intrinsic available. Fall back to C and hope that the compiler
 * turns it into something efficient. */
static ERTS_INLINE erts_ihash_t hashmap_byte_swap(erts_ihash_t hash) {
    erts_ihash_t result = 0;

    for (int i = 0; i < sizeof(hash); i++) {
        ERTS_CT_ASSERT(CHAR_BIT == 8);
        result |= (((hash) >> i * 8) & 0xFF) << ((sizeof(hash) - i - 1) * 8);
    }

    return result;
}
#endif

static ERTS_INLINE erts_ihash_t swizzle_map_hash(erts_ihash_t hash) {
    const erts_ihash_t mask = (erts_ihash_t)0xF0F0F0F0F0F0F0F0ull;
    erts_ihash_t result;

    /* ABCDEFGH -> GHEFCDAB */
    result = hashmap_byte_swap(hash);

    /* GHEFCDAB -> HGFEDCBA */
    return ((result & mask)) >> 4 | ((result & (mask >> 4)) << 4);
}

/* Count leading zeros emulation */
#ifndef hashmap_clz
erts_ihash_t hashmap_clz(erts_ihash_t x) {
    erts_ihash_t y;

#if defined(ARCH_64)
    int n = 64;

    y = x >> 32; if (y != 0) { n = n - 32; x = y; }
#elif defined(ARCH_32)
    int n = 32;
#endif

    y = x >> 16; if (y != 0) { n = n - 16; x = y; }
    y = x >> 8; if (y != 0) { n = n - 8; x = y; }
    y = x >> 4; if (y != 0) { n = n - 4; x = y; }
    y = x >> 2; if (y != 0) { n = n - 2; x = y; }
    y = x >> 1; if (y != 0) { return n - 2; }

    return n - x;
}
#endif

/* CTPOP emulation */
#ifndef hashmap_bitcount
erts_ihash_t hashmap_bitcount(erts_ihash_t x) {
    const erts_ihash_t SK55 = (erts_ihash_t)0x5555555555555555ull;
    const erts_ihash_t SK33 = (erts_ihash_t)0x3333333333333333ull;
    const erts_ihash_t SK0F = (erts_ihash_t)0x0F0F0F0F0F0F0F0Full;
    const erts_ihash_t SK01 = (erts_ihash_t)0x0101010101010101ull;

    x -= ((x >> 1) & SK55);
    x = (x & SK33) + ((x >> 2) & SK33);
    x = ((x + (x >> 4)) & SK0F);
    x *= SK01;

    return x >> (sizeof(erts_ihash_t) - 1) * CHAR_BIT;
}
#endif
