7.7 数据结构-Java Map

5 minute read

数据结构-Map

Map

HashMap

Node<K,V>[] table, 使用数组存储
put(), 将key对应value放到map, 如果map包含对应key则把之前value替换

public V put(K key, V value) {
  return putVal(hash(key), key, value, false, true);
}

    // 实现map的put操作
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
               boolean evict) {
    Node<K,V>[] tab; Node<K,V> p; int n, i;
          ...
    // 1. 通过对象hashcode获取数组下标
    if ((p = tab[i = (n - 1) & hash]) == null)
            // 对应下标无实体, 即无冲突, 则直接创建结点放到table
        tab[i] = newNode(hash, key, value, null);
    else {
        Node<K,V> e; K k;
        // 2. 解决冲突, 如果hash值相等, 且通过== or Object#equals判断key是否相同
        if (p.hash == hash &&
            ((k = p.key) == key || (key != null && key.equals(k))))
            // key值相等则直接替换
            e = p;
        else if (p instanceof TreeNode)
            // 解决冲突, key不同且是树结构, 则插入到树
            e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
        else {
        // 解决冲突, key不同, 查找table[i]位置下next为空的结点并插入
            for (int binCount = 0; ; ++binCount) {
                if ((e = p.next) == null) {
                    p.next = newNode(hash, key, value, null);
                    if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
                        treeifyBin(tab, hash);
                    break;
                }
                    ...
                p = e;
            }
        }
        if (e != null) { // existing mapping for key
            ...
                // 回调, 提供给LinkedHashMap
            afterNodeAccess(e);
            return oldValue;
        }
    }
    ++modCount;
    // table数组的大小超过threshold(capacity * load factor), 默认是12, 即扩容
    if (++size > threshold)
        resize();
    // // 回调, 提供给LinkedHashMap
    afterNodeInsertion(evict);
    return null;
}

get()

public V get(Object key) {
    Node<K,V> e;
    return (e = getNode(hash(key), key)) == null ? null : e.value;
}

// 核心是用过hash找到table数组下标, 再通过== 或者Object#equals找到key相当的结点
final Node<K,V> getNode(int hash, Object key) {
    Node<K,V>[] tab; Node<K,V> first, e; int n; K k;
    ...
    if (first.hash == hash && // always check first node
        ((k = first.key) == key || (key != null && key.equals(k))))
        return first;
    if ((e = first.next) != null) {
        ...
        do {
            if (e.hash == hash &&
                ((k = e.key) == key || (key != null && key.equals(k))))
        } while ((e = e.next) != null);
    }
    ...
}

resize()扩容机制

  void resize() {
    Entry[] oldTable = table;
    int oldCapacity = oldTable.length;
    ...
    // 新的Entry数组
    Entry[] newTable = new Entry[newCapacity];
    // 数据迁移到新数组
    transfer(newTable);
    table = newTable;
    // 修改阈值
    threshold = (int)(newCapacity * loadFactor);
  }

  void transfer(Entry[] newTable) {
    Entry[] src = table;
    int newCapacity = newTable.length;
    // 遍历旧的Entry数组
    for (int j = 0; j < src.length; j++) {
      Entry<K,V> e = src[j];
      if (e != null) {
        src[j] = null;
        do {
          Entry<K, V> next = e.next;
          // 重新计算每个元素在数组的位置
          int i = indexFor(e.hash, newCapacity);
          // 标记[i]
          // newTable[i]的引用赋给了e.next，也就是使用了单链表的头插入方式，同一位置上新元素总会被放在链表的头部位置；这样先放在一个索引上的元素终会被放到Entry链的尾部(如果发生了hash冲突的话），在旧数组中同一条Entry链上的元素，通过重新计算索引位置后，有可能被放到了新数组的不同位置上。
          e.next = newTable[i];
          // 将元素放到数组上
          newTable[i] = e;
          // 更新为下一个元素
          e = next;
        } while(e != null);
      }
    }
  }

ArrayMap

Object[] mArray;, 使用存储元素
int[] mHashes;, 存储key的hash值
static Object[] mBaseCache;缓存大小为4的ArrayMap
static Object[] mTwiceBaseCache;缓存大小为8的ArrayMap
与HashMap比较, 优势在于更省内存, HashMap则对每个结点均保存next指针, 而ArrayMap则不需要, 另外ArrayMap多了缓存; 劣势在于, ArrayMap读写时间复杂度是O(lgn)
put()

  //
  public V put(K key, V value) {
    final int osize = mSize;
    final int hash;
    int index;
    if (key == null) {
        hash = 0;
        index = indexOfNull();
    } else {
        ...
        hash = mIdentityHashCode ? System.identityHashCode(key) : key.hashCode();
        // 通过key和hash值二分查找index
        index = indexOf(key, hash);
    }
    // hash值和key相等, 则更新值即
    if (index >= 0) {
        ...
        mArray[index] = value;
        return old;
    }

    index = ~index;
    // 当mSize大于等于mHash数组长度, 则扩容
    if (osize >= mHashes.length) {
        final int n = osize >= (BASE_SIZE*2) ? (osize+(osize>>1))
                : (osize >= BASE_SIZE ? (BASE_SIZE*2) : BASE_SIZE);

        ...

        final int[] ohashes = mHashes;
        final Object[] oarray = mArray;
        // 需要扩容, 优先考虑拿缓存
        allocArrays(n);
        ...

        // 释放老内存
        freeArrays(ohashes, oarray, osize);
    }

    ...
    // hash, key, value放到数组
    mHashes[index] = hash;
    mArray[index<<1] = key;
    mArray[(index<<1)+1] = value;
    mSize++;
    return null;
}

allocArrays(), 减少频繁地创建和回收

  private void allocArrays(final int size) {
    ...
    // 扩容时当size是 8 则使用mTwiceBaseCache
    if (size == (BASE_SIZE*2)) {
        synchronized (ArrayMap.class) {
            if (mTwiceBaseCache != null) {
                final Object[] array = mTwiceBaseCache; // 获得缓存大小
                mArray = array;
                mTwiceBaseCache = (Object[])array[0]; // 指向上一条缓存
                mHashes = (int[])array[1];
                array[0] = array[1] = null;
                mTwiceBaseCacheSize--;
                ...
            }
        }
    } else if (size == BASE_SIZE) {
      // 扩容时当size是 4 则使用mBaseCache, 类似上述代码
        ...
    }

    // 未命中缓存
    mHashes = new int[size];
    mArray = new Object[size<<1];
}

freeArrays(), 减少频繁地创建和回收

  private static void freeArrays(final int[] hashes, final Object[] array, final int size) {
    // hash数组大小等于8, 释放内存时, 则存到mTwiceBaseCacheSize或mBaseCacheSize缓存中
    // 供后续使用减少频繁地创建和回收
    if (hashes.length == (BASE_SIZE*2)) {
        synchronized (ArrayMap.class) {
            if (mTwiceBaseCacheSize < CACHE_SIZE) {
                array[0] = mTwiceBaseCache;
                array[1] = hashes;
                // 省略了清空array[2]数组数据
                ...
                mTwiceBaseCache = array;
                mTwiceBaseCacheSize++;
                ...
            }
        }
    } else if (hashes.length == BASE_SIZE) {
        // 逻辑类似上述代码
        ...
    }
}

get()

  // 二分查找获取index
  public V get(Object key) {
    final int index = indexOfKey(key);
    return index >= 0 ? (V)mArray[(index<<1)+1] : null;
  }

LinkedHashMap

LinkedHashMapEntry<K,V> head双向链表头结点
LinkedHashMapEntry<K,V> tail双向链表尾结点
accessOrder, 调整访问顺序标识, true则调整, 一般用于LRU缓存
LinkedHashMap继承HashMap, 只扩展了HashMap.Node, get, afterNodeAccess等
afterNodeAccess()

  void afterNodeAccess(Node<K,V> e) { // move node to last
    LinkedHashMapEntry<K,V> last;
    if (accessOrder && (last = tail) != e) {
        LinkedHashMapEntry<K,V> p =
            (LinkedHashMapEntry<K,V>)e, b = p.before, a = p.after;
        p.after = null;
        // e的前置结点为空, 则头结点指向e结点的尾结点, 即可e为头结点
        if (b == null)
            head = a;
        else
            // 否则e的前置结点的后置指针指向e的后置结点a
            b.after = a;
        if (a != null)
            // 后置结点不为空, 则后置结点的前置指针指向e的前置结点b
            a.before = b;
        else
            last = b;
        if (last == null)
            head = p;
        else {
            //
            p.before = last;
            last.after = p;
        }
        tail = p;
        ++modCount;
    }
}

get()

SparseArray

int[] mKeys;, 存储key的数组
Object[] mValues;, 存储value的数组
优势, 内存比HashMap、ArrayMap小, key省去装箱和拆箱的过程, 缺点是put, get需要二分查找, 时间复杂度是O(lgn)
put()

  public void put(int key, E value) {
    // 查找key对应的index
    int i = ContainerHelpers.binarySearch(mKeys, mSize, key);

    // 大于0, 则说明key对应的坑位已有value
    if (i >= 0) {
        mValues[i] = value;
    } else {
        i = ~i;

        if (i < mSize && mValues[i] == DELETED) {
            mKeys[i] = key;
            mValues[i] = value;
            return;
        }
        ...
        // 插入key和value
        mKeys = GrowingArrayUtils.insert(mKeys, mSize, i, key);
        mValues = GrowingArrayUtils.insert(mValues, mSize, i, value);
        mSize++;
    }
}

get()

  public E get(int key) {
    // 二分查找
    int i = ContainerHelpers.binarySearch(mKeys, mSize, key);
    ...
    return (E) mValues[i];
}

ConcurrentHashMap

Segment<K,V>[] segments; , 存储元素以及实现分段加锁
Segment<K,V>继承ReentrantLock, 且HashEntry<K,V>[] table, 存储元素

put()

  public V put(K key, V value) {
      ...
      int hash = hash(key.hashCode());
      // 通过hash值找到对应Segment
      return segmentFor(hash).put(key, hash, value, false);
  }
  final Segment<K,V> segmentFor(int hash) {
      // 将散列值右移动segmentShift位, 再&segmentMask获得
      // segments的位置
      return segments[(hash >>> segmentShift) & segmentMask];
  }
  // Segment
  V put(K key, int hash, V value, boolean onlyIfAbsent) {
      // 对段加锁
      lock();

      try {
         ...

         HashEntry<K,V>[] tab = table;
         // 通过hash找到index
         int index = hash & (tab.length - 1);
         HashEntry<K,V> first = tab[index];

         HashEntry<K,V> e = first;
         // 通过hash和Object#equals, 找到无冲突结点
         while (e != null && (e.hash != hash || !key.equals(e.key)))
             e = e.next;

         V oldValue;
         if (e != null) {    
             // 存在冲突且key一致, 则替换原来的值        
             oldValue = e.value;
             if (!onlyIfAbsent)
                 e.value = value;    
         }
         else {                        
             ...
             // 无冲突则插入
             tab[index] = new HashEntry<K,V>(key, hash, first, value);
         }
         return oldValue;
     } finally {
         unlock();                   
     }
  }

get()

      V get(Object key, int hash) {
          if(count != 0) {   
              HashEntry<K,V> e = getFirst(hash);
              while(e != null) {
                  if(e.hash == hash && key.equals(e.key)) {
                      V v = e.value;
                      if(v != null)            
                          return v;
                      // 如果读到 value 域为 null，说明发生了重排序，加锁后重新读取
                      return readValueUnderLock(e);
                  }
                  e = e.next;
              }
          }
     return null;
      }

TreeMap
HashTable, 类似HashMap, 区别是方法是同步的, key和value不能为空值
- table即Entry[]数组类型, 是单项链表
- count是HashTable的大小
- threshold是HashTable的阈值, 用于扩容
- loadFactor加载因子

venciallee

7.7 数据结构-Java Map

数据结构-Map

参考

You may also enjoy