What data structure using O(n) storage with O(log n) query time should I use for Range Minimum Queries?
If the set were sorted, you would not need a tree. The smallest element in the range [i,j] would have index i.
So suppose the elements of your sequence were stored in order of their indices at the leaves of a tree. Can you store any additional information (ahem, perhaps some kind of min and max) at each interior node to facilitate your query?
If so, then if the tree is balanced, and if you can answer your query by looking only at the two paths from the root to the two elements at {i,j}, then you will achieve your O(log N) lookup cost. Since a balanced binary tree with N leaves contains (2N-1) total nodes, you will also satisfy your O(N) storage limit.
MORE DETAIL: Consider computing the minimum value in the range [i,j].
At each interior node A of the tree, keep the minimum value for all the leaves beneath it. This can be computed bottom up when the tree is first built.
Now start at leaf i. Walk up the tree, keeping as your candidate minimum value the value at i or anything known to be both right of i and left of j. Stop one node below the mutual ancestor of i and j.
Start again at leaf j. Walk up the tree, once again keeping as your candidate minimum the value at j or anything known to be both left of j and right of i.
Your minimum for [i,j] is then the minimum of the two values you've computed. Computing the maximum is analogous. Total storage requirement is 2 value per interior node plus two pointers per interior node plus one value per leaf, which is N + 4(N-1) for a full tree.
The path you travel up the tree from leaf i, is the same path you would travel down the tree if you were searching for leaf i.
C# CODE FOR SEARCH:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace RangeSearch
{
public class RangeSearch
{
int[] tree;
int N;
int LeafLocation(int leafNumber) { return leafNumber + N - 1; }
int LeafValue(int leafNumber) { return tree[ LeafLocation(leafNumber)]; }
int LeftChild(int x) { return 2*x + 1; }
int RightChild(int x) { return 2*x + 2; }
int Parent(int x) { return (x-1)/2; }
bool IsPowerOf2(int x) { while (x > 0) { if (x == 1) return true; if ((x & 1) == 1 ) return false; x = x >> 1; } return false; }
bool IsAncestorOf( int x, int y ) { if( x>y ) return false; return x==y || IsAncestorOf(LeftChild(x), y) || IsAncestorOf(RightChild(x),y); } // note: violating time bound for legibility, can fix by storing min/max descendant index at each node
public RangeSearch(params int[] vals)
{
if (!IsPowerOf2(vals.Length))
throw new ArgumentException("this implementation restricted to N being power of 2");
N = vals.Length;
tree = new int[2 * N - 1];
// the right half of the array contains the leaves
vals.CopyTo(tree, N - 1);
// the left half of the array contains the interior nodes, each of which holds the minimum of all its children
for (int i = N - 2; i >= 0; i--)
tree[i] = Math.Min(tree[LeftChild(i)], tree[RightChild(i)]);
}
public int FindMin(int a, int b)
{
if( a>b )
throw new ArgumentException( "FindMin expects a range [a,b] with a<=b" );
int x = Walk( a, true, b);
int y = Walk( b, false, a);
return Math.Min(x, y);
}
int Walk( int leafNumber, bool leftSide, int otherLeafNumber )
{
int minSoFar = LeafValue(leafNumber);
int leafLocation = LeafLocation(leafNumber);
int otherLeafLocation = LeafLocation(otherLeafNumber);
int parent = Parent(leafLocation);
bool cameFromLeft = (leafLocation == LeftChild(parent));
return Walk2(minSoFar, parent, cameFromLeft, leftSide, otherLeafLocation);
}
int Walk2(int minSoFar, int node, bool cameFromLeft, bool leftSide, int otherLeafLocation)
{
if (IsAncestorOf(node, otherLeafLocation))
return minSoFar;
if (leftSide)
minSoFar = !cameFromLeft ? minSoFar : Math.Min(minSoFar, tree[RightChild(node)]);
else
minSoFar = cameFromLeft ? minSoFar : Math.Min(minSoFar, tree[LeftChild(node)]);
return Walk2(minSoFar, Parent(node), node == LeftChild(Parent(node)), leftSide, otherLeafLocation);
}
}
}
C# CODE TO TEST IT:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace RangeSearch
{
class Program
{
static void Main(string[] args)
{
RangeSearch rngA = new RangeSearch(9, 3, 7, 1);
System.Diagnostics.Trace.Assert(3 == rngA.FindMin(0, 2) );
System.Diagnostics.Trace.Assert(1 == rngA.FindMin(0, 3));
System.Diagnostics.Trace.Assert(1 == rngA.FindMin(1, 3));
RangeSearch rngB = new RangeSearch(1, 7, 3, 9);
System.Diagnostics.Trace.Assert(3 == rngB.FindMin(1, 3));
System.Diagnostics.Trace.Assert(1 == rngB.FindMin(0, 3));
System.Diagnostics.Trace.Assert(1 == rngB.FindMin(0, 2));
RangeSearch rngC = new RangeSearch(17, 21, 77, 70, 58, 79, 79, 89);
System.Diagnostics.Trace.Assert(21 == rngC.FindMin(1, 7));
RangeSearch rngD = new RangeSearch(94, 78, 88, 72, 95, 97, 89, 83);
System.Diagnostics.Trace.Assert(72 == rngD.FindMin(1, 6));
RangeSearch rngE = new RangeSearch(0, 66, 6, 43, 34, 34, 63, 49);
System.Diagnostics.Trace.Assert(34 == rngE.FindMin(3, 4));
Random rnd = new Random();
for (int i = 0; i < 1000000; i++)
{
int[] tmp = new int[64];
for (int j = 0; j < tmp.Length; j++)
tmp[j] = rnd.Next(0, 100);
int a = rnd.Next(0, tmp.Length);
int b = rnd.Next(a, tmp.Length);
RangeSearch rng = new RangeSearch(tmp);
System.Diagnostics.Trace.Assert(Min(tmp, a, b) == rng.FindMin(a, b));
}
}
static int Min(int[] ar, int a, int b)
{
int x = ar[a];
for (int i = a + 1; i <= b; i++)
x = Math.Min(x, ar[i]);
return x;
}
}
}
OK, I think I have a good start for you, and I learned something new in the process.
I would take a look at the Wikipedia entry on Cartesian trees. I'm not going to tell you more than that for fear of doing your homework for you, but you seem like a smart guy so I figure you can work it out.
Thanks for helping me learn a new data structure, by the way!