atashbahar.com

thoughts, ideas and ...

Multiple indexes over an in memory collection for faster search

I have a big in-memory collection of following simplified class:

public class Product 
{
    public int Id { get; set; }
    public string UserName { get; set; }
    public int CategoryId { get; set; }
    public string Title { get; set; }
    public string Description { get; set; }
}

I need to search for products based on different properties like UserName or CategoryId. One way of searching would be using linq to objects like:

var userProducts = products.Where(x => x.UserName == "SomeValue")

This takes some processing when collection is too big and in my case it would be called hundreds of time each second.

What I came up with was to introduce a new collection that supports indexing over different properties:

public class FastCollection<T> : IEnumerable<T>
{
    private IList<T> _items;
    private IList<Expression<Func<T, object>>> _lookups;
    private Dictionary<string, ILookup<object, T>> _indexes;

    public FastCollection(IList<T> data)
    {
        _items = data;
        _lookups = new List<Expression<Func<T, object>>>();
        _indexes = new Dictionary<string, ILookup<object, T>>();
    }

    public void AddIndex(Expression<Func<T, object>> property)
    {
        _lookups.Add(property);
        _indexes.Add(property.ToString(), _items.ToLookup(property.Compile()));
    }

    public void Add(T item)
    {
        _items.Add(item);
        RebuildIndexes();
    }

    public void Remove(T item)
    {
        _items.Remove(item);
        RebuildIndexes();
    }

    public void RebuildIndexes()
    {
        if (_lookups.Count > 0)
        {
            _indexes = new Dictionary<string, ILookup<object, T>>();
            foreach (var lookup in _lookups)
            {
                _indexes.Add(lookup.ToString(), _items.ToLookup(lookup.Compile()));
            }
        }
    }

    public IEnumerable<T> FindValue<TProperty>(Expression<Func<T, TProperty>> property, TProperty value)
    {
        var key = property.ToString();
        if(_indexes.ContainsKey(key))
        {
            return _indexes[key][value];
        }
        else
        {
            var c = property.Compile();
            return _items.Where(x => c(x).Equals(value));
        }
    }

    public IEnumerator<T> GetEnumerator()
    {
        return _items.GetEnumerator();
    }

    System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
    {
        return GetEnumerator();
    }
}

You can initialize the collection like this:

var fc = new FastCollection<Product>(products);
fc.AddIndex(x => x.Id);
fc.AddIndex(x => x.UserName);
fc.AddIndex(x => x.CategoryId);

And finally you can search the collection like this:

And finally you can search the collection like this:

The fast collection makes a big difference when it comes to performance.