using System;
using System.Collections.Generic;
using System.Text;
namespace Huddled.Huddle
{
///
/// A "Natural Sort" for .NET
///
/// Copyright (c) 2006, Joel Bennett
///
/// Permission is hereby granted, free of charge, to any person obtaining a copy
/// of this software and associated documentation files (the "Software"), to
/// deal in the Software without restriction, including without limitation the
/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
/// sell copies of the Software, and to permit persons to whom the Software is
/// furnished to do so, subject to the following conditions:
///
/// The above copyright notice and this permission notice shall be included in all
/// copies or substantial portions of the Software.
///
/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
/// IN THE SOFTWARE.
///
public class StringNaturalComparer :StringComparer
{
// the string comparison
private StringComparison comparison;
#region static instance getters
private static StringNaturalComparer naturalCurrentCulture;
private static StringNaturalComparer naturalCurrentCultureIgnoreCase;
private static StringNaturalComparer naturalInvariantCulture;
private static StringNaturalComparer naturalInvariantCultureIgnoreCase;
private static StringNaturalComparer naturalOrdinal;
private static StringNaturalComparer naturalOrdinalIgnoreCase;
public static StringNaturalComparer NaturalCurrentCulture
{
get
{
if( null == naturalCurrentCulture ) {
naturalCurrentCulture = new StringNaturalComparer( StringComparison.CurrentCulture );
}
return naturalCurrentCulture;
}
}
public static StringNaturalComparer NaturalCurrentCultureIgnoreCase
{
get
{
if( null == naturalCurrentCulture ) {
naturalCurrentCulture = new StringNaturalComparer( StringComparison.CurrentCultureIgnoreCase );
}
return naturalCurrentCulture;
}
}
public static StringNaturalComparer NaturalInvariantCulture
{
get
{
if( null == naturalCurrentCulture ) {
naturalCurrentCulture = new StringNaturalComparer( StringComparison.InvariantCulture );
}
return naturalCurrentCulture;
}
}
public static StringNaturalComparer NaturalInvariantCultureIgnoreCase
{
get
{
if( null == naturalCurrentCulture ) {
naturalCurrentCulture = new StringNaturalComparer( StringComparison.InvariantCultureIgnoreCase );
}
return naturalCurrentCulture;
}
}
public static StringNaturalComparer NaturalOrdinal
{
get
{
if( null == naturalCurrentCulture ) {
naturalCurrentCulture = new StringNaturalComparer( StringComparison.Ordinal );
}
return naturalCurrentCulture;
}
}
public static StringNaturalComparer NaturalOrdinalIgnoreCase
{
get
{
if( null == naturalCurrentCulture ) {
naturalCurrentCulture = new StringNaturalComparer( StringComparison.OrdinalIgnoreCase );
}
return naturalCurrentCulture;
}
}
#endregion
public StringNaturalComparer( StringComparison comparison )
{
this.comparison = comparison;
}
///
/// Compares two strings using natural numerical ordering
///
/// The first string
/// The second string
/// A signed number indicating the relative values of this instance and the value parameter.
///
///
/// Return Value
/// Description
///
/// -
/// Less than zero
/// The first string is less than the second string
///
/// -
/// Zero
/// The two strings are equal
///
/// -
/// Greater than zero
/// The first string is greater than the second string
///
///
///
/// Please note I use a char comparison here, I have not yet run into
/// any problems with this, but it *is* possible that it should be
/// converted to a Unicode Locale-aware string comparison.
///
public override int Compare( string one, string two )
{
int retVal = 0;
// System.Diagnostics.Trace.WriteLine(one + "," + two);
if( !String.IsNullOrEmpty( one ) && !String.IsNullOrEmpty( two ) ) {
int c1 = 0, c2 = 0;
int len1 = one.Length, len2 = two.Length;
while( len1 > c1 ) {
if( !( len2 > c2 ) ) { retVal = 1; break; }
// IsDigit determines if a Char is a radix-10 digit. (only DecimalDigitNumber)
// IsNumber determines if a Char is of any numeric Unicode category. (including LetterNumber, or OtherNumber)
if( char.IsDigit( one[c1] ) ) {
if( !( char.IsDigit( two[c2] ) ) ) { retVal = -1; break; }
// TakeNumber is also aware of unicode numbers
retVal = TakeNumber( one, ref c1 ).CompareTo( TakeNumber( two, ref c2 ) );
if( retVal != 0 )
break;
} else if( char.IsDigit( two[c2] ) ) {
retVal = 1;
break;
} else {
// use the locale-aware string compare...
retVal = String.Compare( one, c1, two, c2, 1, comparison );
if( retVal != 0 )
break;
++c1;
++c2;
}
}
if( 0 == retVal && len2 > c2 ) { retVal = -1; };
}
return retVal;
}
/// Parse a number of indeterminate length from a string
/// A string with a number in it
/// The index to start parsing at (is set to the index of the first non-numerical character - might be beyond the length of the string)
/// The number parsed from the string
private static int TakeNumber( string numerical, ref int index )
{
//// this test is only needed if it's possible to call the method incorrectly
//if( !char.IsNumber( numerical[index] ) ) {
// throw new InvalidOperationException( "Character at index " + index + " of '" + numerical + "' is not numerical" );
//}
// make a copy of the starting point
int start = index;
while( ++index < numerical.Length && char.IsDigit( numerical[index] ) ) ;
// If provider is null, the NumberFormatInfo for the current culture is used.
return int.Parse( numerical.Substring( start, index - start ), null );
}
#region Accuracy Test
///
/// NUnit Test case. (uncomment the attribute)
/// Tests the sort method with a simple series of strings,
/// requires manual visual validation of the output
///
// [Test]
public static void TestStringComparer()
{
string[] correct = { "1one", "2two", "10ten", "12twelve", "20twenty", "(1 file)", "[1 file]", "_1 file_", "=1 file=", "a1b", "a2b", "a10b", "a11b", "b1", "b2b", "b10b", "bat1", "bat2", "bat10", "cat1", "cat2", "cat10" };
string[] strings = new string[correct.Length];
correct.CopyTo( strings, 0 );
Console.WriteLine( "Before Sort, they're actually in order." );
foreach( string s in strings )
Console.WriteLine( s );
Array.Sort( strings, StringComparer.CurrentCultureIgnoreCase );
Console.WriteLine();
Console.WriteLine( "After the default sort, they're in the wrong order." );
foreach( string s in strings )
Console.WriteLine( s );
Array.Sort( strings, StringNaturalComparer.NaturalCurrentCultureIgnoreCase );
Console.WriteLine();
Console.WriteLine( "After my sort, they're back in the correct order." );
foreach( string s in strings )
Console.WriteLine( s );
for( int s = 0; s < strings.Length; ++s ) {
// Assert.AreEqual( correct[s], strings[s], "Strings aren't sorted into the correct order" );
if( !correct[s].Equals( strings[s] ) ) {
Console.WriteLine( "ERROR at position " + s + ", " + strings[s] + " is out of order." );
break;
}
}
}
#endregion
#region Performance Test
/////
///// NUnit Test case. (uncomment the attribute)
///// Tests the sort method with a simple series of strings to test the performance
///// With 9 * 100k comparisons in two tests:
/////
/////
///// Your results may vary, but just for the sake of comparison, on my computer:
/////
///// With numerical strings:
///// Using my compare: 00:00:02.6407095
///// Using StringCompare: 00:00:00.1093785
///// Using StrCmpLogicalW: 00:00:06.6095865
/////
///// With non-numerical strings:
///// Using my compare: 00:00:00.5937690
///// Using StringCompare: 00:00:00.1093785
///// Using StrCmpLogicalW: 00:00:06.6095865
/////
//// [Test]
//public static void PerformanceComparison()
//{
// DateTime end, start;
// string one, two, three;
// StringNaturalComparer natural = new StringNaturalComparer( StringComparison.InvariantCultureIgnoreCase );
// StringComparer builtin = StringComparer.InvariantCultureIgnoreCase;
// one = "simple 10 test";
// two = "simple 20 test";
// three = "simple 03 test";
// #region comparisons
// start = DateTime.Now;
// for( int i = 0; i < 100000; ++i ) {
// natural.Compare( one, one );
// natural.Compare( one, two );
// natural.Compare( one, three );
// natural.Compare( two, one );
// natural.Compare( two, two );
// natural.Compare( two, three );
// natural.Compare( three, one );
// natural.Compare( three, two );
// natural.Compare( three, three );
// }
// end = DateTime.Now;
// Console.WriteLine( "Using my compare: " + (TimeSpan)( end - start ) );
// start = DateTime.Now;
// for( int i = 0; i < 100000; ++i ) {
// builtin.Compare( one, one );
// builtin.Compare( one, two );
// builtin.Compare( one, three );
// builtin.Compare( two, one );
// builtin.Compare( two, two );
// builtin.Compare( two, three );
// builtin.Compare( three, one );
// builtin.Compare( three, two );
// builtin.Compare( three, three );
// }
// end = DateTime.Now;
// Console.WriteLine( "Using StringCompare: " + (TimeSpan)( end - start ) );
// start = DateTime.Now;
// for( int i = 0; i < 100000; ++i ) {
// StrCmpLogicalW( one, one );
// StrCmpLogicalW( one, two );
// StrCmpLogicalW( one, three );
// StrCmpLogicalW( two, one );
// StrCmpLogicalW( two, two );
// StrCmpLogicalW( two, three );
// StrCmpLogicalW( three, one );
// StrCmpLogicalW( three, two );
// StrCmpLogicalW( three, three );
// }
// end = DateTime.Now;
// Console.WriteLine( "Using StrCmpLogicalW: " + (TimeSpan)( end - start ) );
// #endregion
// one = "This is a simple string without numbers";
// two = "Test";
// three = "This is another one with no numbers";
// #region comparisons
// start = DateTime.Now;
// for( int i = 0; i < 100000; ++i ) {
// natural.Compare( one, one );
// natural.Compare( one, two );
// natural.Compare( one, three );
// natural.Compare( two, one );
// natural.Compare( two, two );
// natural.Compare( two, three );
// natural.Compare( three, one );
// natural.Compare( three, two );
// natural.Compare( three, three );
// }
// end = DateTime.Now;
// Console.WriteLine( "Using my compare: " + (TimeSpan)( end - start ) );
// start = DateTime.Now;
// for( int i = 0; i < 100000; ++i ) {
// builtin.Compare( one, one );
// builtin.Compare( one, two );
// builtin.Compare( one, three );
// builtin.Compare( two, one );
// builtin.Compare( two, two );
// builtin.Compare( two, three );
// builtin.Compare( three, one );
// builtin.Compare( three, two );
// builtin.Compare( three, three );
// }
// end = DateTime.Now;
// Console.WriteLine( "Using StringCompare: " + (TimeSpan)( end - start ) );
// start = DateTime.Now;
// for( int i = 0; i < 100000; ++i ) {
// StrCmpLogicalW( one, one );
// StrCmpLogicalW( one, two );
// StrCmpLogicalW( one, three );
// StrCmpLogicalW( two, one );
// StrCmpLogicalW( two, two );
// StrCmpLogicalW( two, three );
// StrCmpLogicalW( three, one );
// StrCmpLogicalW( three, two );
// StrCmpLogicalW( three, three );
// }
// end = DateTime.Now;
// Console.WriteLine( "Using StrCmpLogicalW: " + (TimeSpan)( end - start ) );
// #endregion
//}
//[System.Runtime.InteropServices.DllImport( "shlwapi.dll", CharSet = System.Runtime.InteropServices.CharSet.Unicode, ExactSpelling = true, SetLastError = false )]
//private static extern int StrCmpLogicalW( string strA, string strB );
#endregion
public override bool Equals( string x, string y )
{
return x.Equals( y );
}
public override int GetHashCode( string obj )
{
return obj.GetHashCode();
}
}
}