/** * Hash functions should: * Spread nearby values apart * Distribute uniformly * O(1) * * * NOTE: This has not been extensively tested, and may still be full of * subtle bugs. I take no responsibility for the quality of this * code, it is provided as an example only. Use at your own risk. * -Titus Winters */ #include #include // We use pair to associate the strings we are hashing with the "dirty-bits" #include #include "hashing.h" using namespace std; const int WORD_SIZE = 32; const int SHIFT_SIZE = 5; // Default constructor Hash::Hash() { curSizeIndex = 0; curUsed = 0; // Allocate our table. This is an array of pairs, where each pair // is a string and a bool. The strings are the values in the // table, the bools are the associated "dirty bits". This could // easily be implemented just by having another array of bools, // but I chose to do this to introduce some helpful STL stuff. table = new pair[sizes[curSizeIndex]]; } // Constructor that estimates appropriate size Hash::Hash(unsigned int size) { // Find the first size in our list that is at least as big // as the estimate provided by the user curSizeIndex = 0; while (sizes[curSizeIndex] < size) curSizeIndex++; // Now initialize as before curUsed = 0; table = new pair[sizes[curSizeIndex]]; } // Destructor Hash::~Hash() { // Free up our table delete[] table; } // This is writen as a helper function so that when we resize the table // we can actually use the same code to re-insert as we do to perform // regular insertions. // The first parameter is the table itself. // The second parameter is the value to insert bool Hash::insertHelper(pair* thisTable, string toInsert) { // Hash the string unsigned int k = hashString(toInsert); // Perform quadratic probing for (int i = 0; 1; i++) { // Check if position (k + i^2) % table size is available unsigned int curPos = (k + i * i) % sizes[curSizeIndex]; if (thisTable[curPos].second == false) { // We know that this spot is available thisTable[curPos].second = true; thisTable[curPos].first = toInsert; curUsed++; return true; } // If we already found it in the table, that is the only // way such an insertion can fail. if (thisTable[curPos].first == toInsert) return false; } // This is just here to appease the compiler return false; } // Insert a value into a hash table bool Hash::insert(string toInsert) { // Make sure that there is actually room to insert into the table // We use integer math here to ensure that everything is easy to // understand and we don't have to worry about annoying casting // problems. if (curUsed * 2 > sizes[curSizeIndex]) { // We need to resize // Go to the next-size-up table size curSizeIndex++; // Create a new table with the new size pair* newTable = new pair[sizes[curSizeIndex]]; // Note that we have nothing in the new table curUsed = 0; // Go through our old table, find all values, insert them // into the new table for (unsigned int i = 0; i < sizes[curSizeIndex - 1]; i++) { if (table[i].second && table[i].first != "") { insertHelper(newTable, table[i].first); } } // Free up memory delete[] table; // Perform a nice little aliasing step table = newTable; } // Do the insertion return insertHelper(table, toInsert); } // Find a value in a hash table bool Hash::search(string toFind) { // Hash the value unsigned int k = hashString(toFind); // For the current position unsigned int curPos = k % sizes[curSizeIndex]; unsigned int i = 0; // Loop while we haven't found it but there is a trail of // dirty bits while (table[curPos].second) { // Calculate the next position curPos = (k + i * i) % sizes[curSizeIndex]; // If we found it, return true if (table[curPos].first == toFind) return true; // Otherwise, keep looking i++; } // If we got to somewhere that the dirty-bit was not set, return false return false; } // Remove a value from a hash table bool Hash::remove(string toRemove) { // Hash it unsigned int k = hashString(toRemove); // Find the initial table lookup position unsigned int curPos = k % sizes[curSizeIndex]; unsigned int i = 0; // Loop for it while (table[curPos].second) { curPos = (k + i * i) % sizes[curSizeIndex]; // If we find it if (table[curPos].first == toRemove) { // Remove it table[curPos].first = ""; curUsed--; // Note that we are successful return true; } // Keep looking i++; } // Failure return false; } // Simple. Returns how many items are in the table int Hash::getSize() { return curUsed; } // Returns the capacity of the table int Hash::getTableSize() { return sizes[curSizeIndex]; } // A hash function for strings unsigned int Hash::hashString(string n) { // This is actually an old checksumming routine used // by phone companies to ensure data integrity unsigned int cur = 0; // Presumably the "string" class has O(1) lookup for size, but // are you sure? unsigned int size = n.size(); // This is a circular bit-shift of SHIFT_SIZE bits, // followed by XOR'ing in one byte of the string at a // time. for (unsigned int i = 0; i < size; i++) { unsigned int temp = cur >> (WORD_SIZE - SHIFT_SIZE); cur ^= n[i]; cur <<= SHIFT_SIZE; cur ^= temp; } // Return the value return cur; } // A simple function for printing the contents of a hash table void Hash::printContents() { // Loop through the hash for (unsigned int i = 0; i < sizes[curSizeIndex]; i++) { // If the dirty bit is set and the value is non-empty, print it if (table[i].second && table[i].first != "") cout << table[i].first << endl; } cout << endl; } int main() { Hash myNewHash; if (myNewHash.insert("flower")) { cout << "Inserted flower" << endl; } else { cout << "Failed!" << endl; } myNewHash.insert("table"); myNewHash.insert("cat"); myNewHash.insert("dog"); if (myNewHash.remove("table")) { cout << "Removed table" << endl; } else { cout << "failed" << endl; } myNewHash.remove("cat"); myNewHash.insert("able"); myNewHash.insert("stable"); myNewHash.insert("clown"); cout << myNewHash.getSize() << endl; cout << myNewHash.getTableSize() << endl; myNewHash.printContents(); if (myNewHash.search("cat")) { cout << "Found cat" << endl; } else { cout << "Didn't find cat" << endl; } if (myNewHash.search("clown")) { cout << "Found clown" << endl; } else { cout << "Didn't find clown" << endl; } if (myNewHash.search("arizona")) { cout << "Found arizona" << endl; } else { cout << "Didn't find arizona" << endl; } return 0; }