Change DIR_CACHE by removing the need to cache Sweet strings in the

DIR_LIB_SOURCE. It only needs to be able to read Sweet strings quickly. @todo: Change public API regarding part rev string.

Change DIR_CACHE by removing the need to cache Sweet strings in the
DIR_LIB_SOURCE. It only needs to be able to read Sweet strings quickly. @todo: Change public API regarding part rev string.
597f6775 · Dick Hollenbeck · 020e8759 · 325a13a7 · 597f6775 · 597f6775
Commit 597f6775 authored Dec 20, 2010 by Dick Hollenbeck
Hide whitespace changes
Inline Side-by-side

Showing with 229 additions and 122 deletions

CMakeLists.txt new/CMakeLists.txt +1 -1

sch_dir_lib_source.cpp new/sch_dir_lib_source.cpp +173 -101

sch_dir_lib_source.h new/sch_dir_lib_source.h +55 -20

No files found.
--- a/new/CMakeLists.txt
+++ b/new/CMakeLists.txt
@@ -58,7 +58,7 @@ endif()
 include_directories( ${CMAKE_CURRENT_SOURCE_DIR} )
-add_executable( test_dir_lib_source sch_dir_lib_source.cpp  ${PROJECT_SOURCE_DIR}/common/richio.cpp )
+add_executable( test_dir_lib_source sch_dir_lib_source.cpp )
 #add_executable( test_dir_lib_source EXCLUDE_FROM_ALL sch_dir_lib_source.cpp )
 target_link_libraries( test_dir_lib_source ${wxWidgets_LIBRARIES} )

--- a/new/sch_dir_lib_source.cpp
+++ b/new/sch_dir_lib_source.cpp
@@ -125,38 +125,85 @@ static const char* strrstr( const char* haystack, const char* needle )
    return ret;
 }
 /**
 * Function endsWithRev
- * returns a pointer to the final string segment: "revN..." or NULL if none.
+ * returns a pointer to the final string segment: "revN[N..]" or NULL if none.
 * @param start is the beginning of string segment to test, the partname or
 *  any middle portion of it.
- * @param tail is a pointer to the terminating nul.
+ * @param tail is a pointer to the terminating nul, or one past inclusive end of
+ *  segment, i.e. the string segment of interest is [start,tail)
 * @param separator is the separating byte, expected: '.' or '/', depending on context.
 */
 static const char* endsWithRev( const char* start, const char* tail, char separator )
 {
    bool    sawDigit = false;
-    while( isdigit(*--tail) && tail>start )
+    while( tail>start && isdigit(*--tail) )
    {
        sawDigit = true;
    }
-    if( sawDigit && tail-3 >= start && tail[-3] == separator )
+    // if sawDigit, tail points to the 'v' here.
+    if( sawDigit && tail-3 >= start )
    {
-        tail -= 2;
+        tail -= 3;
-        if( tail[0]=='r' && tail[1]=='e' && tail[2]=='v' )
+        if( tail[0]==separator && tail[1]=='r' && tail[2]=='e' && tail[3]=='v' )
        {
-            return tail;
+            return tail+1;  // omit separator, return "revN[N..]"
        }
    }
    return 0;
 }
+// see struct BY_REV
+bool BY_REV::operator() ( const STRING& s1, const STRING& s2 ) const
+{
+    // avoid instantiating new STRINGs, and thank goodness that c_str() is const.
+    const char* rev1 = endsWithRev( s1.c_str(), s1.c_str()+s1.size(), '/' );
+    const char* rev2 = endsWithRev( s2.c_str(), s2.c_str()+s2.size(), '/' );
+    int rootLen1 =  rev1 ? rev1 - s1.c_str() : s1.size();
+    int rootLen2 =  rev2 ? rev2 - s2.c_str() : s2.size();
+    int r = memcmp( s1.c_str(), s2.c_str(), min( rootLen1, rootLen2 ) );
+    if( r )
+    {
+        return r < 0;
+    }
+    if( rootLen1 != rootLen2 )
+    {
+        return rootLen1 < rootLen2;
+    }
+    // root strings match at this point, compare the revision number numerically,
+    // and chose the higher numbered version as "less", according to std::set lingo.
+    if( bool(rev1) != bool(rev2) )
+    {
+        return bool(rev1) < bool(rev2);
+    }
+    if( rev1 && rev2 )
+    {
+        int rnum1 = atoi( rev1+3 );
+        int rnum2 = atoi( rev2+3 );
+        return rnum1 > rnum2;
+    }
+    return false;   // strings are equal, and they don't have a rev
+}
-bool DIR_LIB_SOURCE::makePartFileName( const char* aEntry,
+bool DIR_LIB_SOURCE::makePartName( STRING* aPartName, const char* aEntry,
-                        const STRING& aCategory, STRING* aPartName )
+                        const STRING& aCategory )
 {
    const char* cp = strrstr( aEntry, ".part" );
@@ -165,19 +212,7 @@ bool DIR_LIB_SOURCE::makePartFileName( const char* aEntry,
    {
        const char* limit = cp + strlen( cp );
-        // if file extension is exactly ".part", and no rev
+        // If versioning, then must find a trailing "revN.." type of string.
-        if( cp==limit-5 )
-        {
-            if( aCategory.size() )
-                *aPartName = aCategory + "/";
-            else
-                aPartName->clear();
-            aPartName->append( aEntry, cp - aEntry );
-            return true;
-        }
-        // if versioning, test for a trailing "revN.." type of string
        if( useVersioning )
        {
            const char* rev = endsWithRev( cp + sizeof(".part") - 1, limit, '.' );
@@ -194,14 +229,52 @@ bool DIR_LIB_SOURCE::makePartFileName( const char* aEntry,
                return true;
            }
        }
+        // If using versioning, then all valid partnames must have a rev string,
+        // so we don't even bother to try and load any other partfile down here.
+        else
+        {
+            // if file extension is exactly ".part", and no rev
+            if( cp==limit-5 )
+            {
+                if( aCategory.size() )
+                    *aPartName = aCategory + "/";
+                else
+                    aPartName->clear();
+                aPartName->append( aEntry, cp - aEntry );
+                return true;
+            }
+        }
    }
    return false;
 }
-static bool isCategoryName( const char* aName )
+STRING DIR_LIB_SOURCE::makeFileName( const STRING& aPartName )
 {
-    return true;
+    // create a fileName for the sweet string, using a reversible
+    // partname <-> fileName conversion protocol:
+    STRING  fileName = sourceURI + "/";
+    const char* rev = endsWithRev( aPartName.c_str(), aPartName.c_str()+aPartName.size(), '/' );
+    if( rev )
+    {
+        int basePartLen = rev - aPartName.c_str() - 1;  // omit '/' separator
+        fileName.append( aPartName, 0,  basePartLen );
+        fileName += ".part.";    // add '.' separator before rev
+        fileName += rev;
+    }
+    else
+    {
+        fileName += aPartName;
+        fileName += ".part";
+    }
+    return fileName;
 }
@@ -211,9 +284,9 @@ void DIR_LIB_SOURCE::readSExpression( STRING* aResult, const STRING& aFilename )
    if( fw == -1 )
    {
-        STRING  msg = aFilename;
+        STRING  msg = strerror( errno );
-        msg += " cannot be open()ed for reading";
+        msg += "; cannot open(O_RDONLY) file " + aFilename;
-        throw IO_ERROR( msg.c_str() );
+        throw( IO_ERROR( msg.c_str() ) );
    }
    struct stat     fs;
@@ -224,34 +297,43 @@ void DIR_LIB_SOURCE::readSExpression( STRING* aResult, const STRING& aFilename )
    if( fs.st_size > (1*1024*1024) )
    {
        STRING msg = aFilename;
-        msg += " seems too big.  ( > 1mbyte )";
+        msg += " seems too big.  ( > 1 mbyte )";
        throw IO_ERROR( msg.c_str() );
    }
-    // we reuse the same readBuffer, which is not thread safe, but the API
+    // reuse same readBuffer, which is not thread safe, but the API
-    // is not expected to be thread safe.
+    // is not advertising thread safe (yet, if ever).
-    readBuffer.resize( fs.st_size );
+    if( (int) fs.st_size > (int) readBuffer.size() )
+        readBuffer.resize( fs.st_size + 1000 );
-    size_t count = read( fw, &readBuffer[0], fs.st_size );
+    int count = read( fw, &readBuffer[0], fs.st_size );
-    if( count != (size_t) fs.st_size )
+    if( count != (int) fs.st_size )
    {
-        STRING msg = aFilename;
+        STRING  msg = strerror( errno );
-        msg += " cannot be read";
+        msg += "; cannot read file " + aFilename;
-        throw IO_ERROR( msg.c_str() );
+        throw( IO_ERROR( msg.c_str() ) );
    }
-    // std::string chars are not gauranteed to be contiguous in
+    // std::string chars are not guaranteed to be contiguous in
    // future implementations of C++, so this is why we did not read into
    // aResult directly.
    aResult->assign( &readBuffer[0], count );
 }
+void DIR_LIB_SOURCE::cache() throw( IO_ERROR )
+{
+    partnames.clear();
+    categories.clear();
+    cacheOneDir( "" );
+}
 DIR_LIB_SOURCE::DIR_LIB_SOURCE( const STRING& aDirectoryPath,
-                                bool doUseVersioning ) throw( IO_ERROR ) :
+                                const STRING& aOptions ) throw( IO_ERROR ) :
-    readBuffer( 512 )
+    useVersioning( strstr( aOptions.c_str(), "useVersioning" ) )
 {
-    useVersioning = doUseVersioning;
    sourceURI     = aDirectoryPath;
    sourceType    = "dir";
@@ -264,17 +346,12 @@ DIR_LIB_SOURCE::DIR_LIB_SOURCE( const STRING& aDirectoryPath,
    if( strchr( "/\\", sourceURI[sourceURI.size()-1] ) )
        sourceURI.erase( sourceURI.size()-1 );
-    doOneDir( "" );
+    cache();
 }
 DIR_LIB_SOURCE::~DIR_LIB_SOURCE()
 {
-    // delete the sweet STRINGS, which "sweets" owns by pointer.
-    for( DIR_CACHE::iterator it = sweets.begin();  it != sweets.end();  ++it )
-    {
-        delete it->second;
-    }
 }
@@ -288,26 +365,34 @@ void DIR_LIB_SOURCE::GetCategoricalPartNames( STRINGS* aResults, const STRING& a
        STRING  lower = aCategory + "/";
        STRING  upper = aCategory + char( '/' + 1 );
-        DIR_CACHE::const_iterator limit = sweets.upper_bound( upper );
+        PART_CACHE::const_iterator limit = partnames.upper_bound( upper );
-        for( DIR_CACHE::const_iterator it = sweets.lower_bound( lower );  it!=limit;  ++it )
+        for( PART_CACHE::const_iterator it = partnames.lower_bound( lower );  it!=limit;  ++it )
        {
-            const char* start = it->first.c_str();
+            /*
-            size_t      len   = it->first.size();
+            const char* start = it->c_str();
+            size_t      len   = it->size();
-            if( !endsWithRev( start, start+len, '/' ) )
+            if( endsWithRev( start, start+len, '/' ) )
-                aResults->push_back( it->first );
+                continue;
+            */
+            aResults->push_back( *it );
        }
    }
    else
    {
-        for( DIR_CACHE::const_iterator it = sweets.begin();  it!=sweets.end();  ++it )
+        for( PART_CACHE::const_iterator it = partnames.begin();  it!=partnames.end();  ++it )
        {
-            const char* start = it->first.c_str();
+            /*
-            size_t      len   = it->first.size();
+            const char* start = it->c_str();
+            size_t      len   = it->size();
            if( !endsWithRev( start, start+len, '/' ) )
-                aResults->push_back( it->first );
+                continue;
+            */
+            aResults->push_back( *it );
        }
    }
 }
@@ -321,29 +406,20 @@ void DIR_LIB_SOURCE::ReadPart( STRING* aResult, const STRING& aPartName, const S
    if( aRev.size() )
        partname += "/" + aRev;
-    DIR_CACHE::iterator it = sweets.find( partname );
+    PART_CACHE::const_iterator it = partnames.find( partname );
-    if( it == sweets.end() )    // part not found
+    if( it == partnames.end() )    // part not found
    {
        partname += " not found.";
        throw IO_ERROR( partname.c_str() );
    }
-    if( !it->second )   // if the sweet string is not loaded yet
+    // create a fileName for the sweet string
-    {
+    STRING  fileName = makeFileName( aPartName );
-        STRING  filename = sourceURI + "/" + aPartName + ".part";
-        if( aRev.size() )
-        {
-            filename += "." + aRev;
-        }
-        it->second = new STRING();
+    // @todo what about aRev?, and define the public API wrt to aRev better.
-        readSExpression( it->second, filename );
+    readSExpression( aResult, fileName );
-    }
-    *aResult = *it->second;
 }
@@ -362,40 +438,36 @@ void DIR_LIB_SOURCE::ReadParts( STRINGS* aResults, const STRINGS& aPartNames )
 void DIR_LIB_SOURCE::GetCategories( STRINGS* aResults ) throw( IO_ERROR )
 {
-    *aResults = categories;
+    aResults->clear();
+    // caller fetches them sorted.
+    for( NAME_CACHE::const_iterator it = categories.begin();  it!=categories.end();  ++it )
+    {
+        aResults->push_back( *it );
+    }
 }
 #if defined(DEBUG)
-#include <richio.h>
 void DIR_LIB_SOURCE::Show()
 {
    printf( "Show categories:\n" );
-    for( STRINGS::const_iterator it = categories.begin();  it!=categories.end();  ++it )
+    for( NAME_CACHE::const_iterator it = categories.begin();  it!=categories.end();  ++it )
        printf( " '%s'\n", it->c_str() );
    printf( "\n" );
    printf( "Show parts:\n" );
-    for( DIR_CACHE::const_iterator it = sweets.begin();  it != sweets.end();  ++it )
+    for( PART_CACHE::const_iterator it = partnames.begin();  it != partnames.end();  ++it )
    {
-        printf( " '%s'\n", it->first.c_str() );
+        printf( " '%s'\n", it->c_str() );
-        if( it->second )
-        {
-            STRING_LINE_READER  slr( *it->second, wxString( wxConvertMB2WX( it->first.c_str() ) ) );
-            while( slr.ReadLine() )
-            {
-                printf( "    %s", (char*) slr );
-            }
-            printf( "\n" );
-        }
    }
 }
 #endif
-void DIR_LIB_SOURCE::doOneDir( const STRING& aCategory ) throw( IO_ERROR )
+void DIR_LIB_SOURCE::cacheOneDir( const STRING& aCategory ) throw( IO_ERROR )
 {
    STRING      curDir = sourceURI;
@@ -425,29 +497,29 @@ void DIR_LIB_SOURCE::doOneDir( const STRING& aCategory ) throw( IO_ERROR )
        if( !stat( fileName.c_str(), &fs ) )
        {
-            if( S_ISREG( fs.st_mode ) && makePartFileName( entry->d_name, aCategory, &partName ) )
+            // is this a valid part name?
+            if( S_ISREG( fs.st_mode ) && makePartName( &partName, entry->d_name, aCategory ) )
            {
-                /*
+                std::pair<NAME_CACHE::iterator, bool> pair = partnames.insert( partName );
-                if( sweets.find( partName ) != sweets.end() )
+                if( !pair.second )
                {
                    STRING  msg = partName;
                    msg += " has already been encountered";
                    throw IO_ERROR( msg.c_str() );
                }
-                */
-                sweets[partName] = NULL;  // NULL for now, load the sweet later.
            }
+            // is this an acceptable category name?
            else if( S_ISDIR( fs.st_mode ) && !aCategory.size() && isCategoryName( entry->d_name ) )
            {
                // only one level of recursion is used, controlled by the
                // emptiness of aCategory.
-                categories.push_back( entry->d_name );
+                categories.insert( entry->d_name );
                // somebody needs to test Windows (mingw), make sure it can
                // handle opendir() recursively
-                doOneDir( entry->d_name );
+                cacheOneDir( entry->d_name );
            }
            else
            {
@@ -467,14 +539,15 @@ int main( int argc, char** argv )
    try
    {
-        DIR_LIB_SOURCE  uut( argv[1] ? argv[1] : "", true );
+//        DIR_LIB_SOURCE  uut( argv[1] ? argv[1] : "", "" );
+        DIR_LIB_SOURCE  uut( argv[1] ? argv[1] : "", "useVersioning" );
-        // initially, only the DIR_CACHE sweets and STRING categories are loaded:
+        // initially, only the NAME_CACHE sweets and STRING categories are loaded:
        uut.Show();
        uut.GetCategoricalPartNames( &partnames, "Category" );
-        printf( "GetCategoricalPartNames(Category):\n" );
+        printf( "\nGetCategoricalPartNames( aCatagory = 'Category' ):\n" );
        for( STRINGS::const_iterator it = partnames.begin();  it!=partnames.end();  ++it )
        {
            printf( " '%s'\n", it->c_str() );
@@ -482,11 +555,10 @@ int main( int argc, char** argv )
        uut.ReadParts( &sweets, partnames );
        // fetch the part names for ALL categories.
        uut.GetCategoricalPartNames( &partnames );
-        printf( "GetCategoricalPartNames(ALL):\n" );
+        printf( "\nGetCategoricalPartNames( aCategory = '' i.e. ALL):\n" );
        for( STRINGS::const_iterator it = partnames.begin();  it!=partnames.end();  ++it )
        {
            printf( " '%s'\n", it->c_str() );
@@ -494,7 +566,7 @@ int main( int argc, char** argv )
        uut.ReadParts( &sweets, partnames );
-        printf( "Sweets for ALL parts:\n" );
+        printf( "\nSweets for ALL parts:\n" );
        STRINGS::const_iterator pn = partnames.begin();
        for( STRINGS::const_iterator it = sweets.begin();  it!=sweets.end();  ++it, ++pn )
        {

--- a/new/sch_dir_lib_source.h
+++ b/new/sch_dir_lib_source.h
@@ -28,17 +28,35 @@
 #include <sch_lib.h>
-#include <map>
+#include <set>
 #include <vector>
 /**
- * Type DIR_CACHE
+ * struct BY_REV
- * is a tuple, where the key is partname (prefixed with the category if any),
+ * is here to provide a custom way to compare STRINGs.  Namely, the revN[N..]
- * and value is pointer to Sweet string which is loaded lazily, so can be NULL
+ * string if present, is collated according to a 'higher revision first', but
- * until loaded.
+ * any part string without a revision, is even 'before' that.
 */
-typedef std::map< STRING, STRING* >     DIR_CACHE;
+struct BY_REV
+{
+    bool operator() ( const STRING& s1, const STRING& s2 ) const;
+};
+/**
+ * Type PART_CACHE
+ * holds a set of part names in sorted order, according to the sort
+ * order given by struct BY_REV.
+ */
+typedef std::set< STRING, BY_REV >  PART_CACHE;
+/**
+ * Type NAME_CACHE
+ * holds a set of categories in sorted order.
+ */
+typedef std::set< STRING >          NAME_CACHE;
 namespace SCH {
@@ -55,29 +73,40 @@ class DIR_LIB_SOURCE : public LIB_SOURCE
    bool                useVersioning;  ///< use files with extension ".revNNN..", else not
-    DIR_CACHE           sweets;         ///< @todo, don't really need to cache the sweets, only the partnames.
+    /// normal partnames, some of which may be prefixed with a category,
+    /// and some of which may have legal "revN[N..]" type strings.
+    PART_CACHE          partnames;
+    /// categories which we expect to find in the set of @a partnames
+    NAME_CACHE          categories;
-    STRINGS             categories;
    std::vector<char>   readBuffer;     ///< used by readSExpression()
+    /**
+     * Function cache
+     * [re-]loads the directory cache(s).
+     */
+    void cache() throw( IO_ERROR );
    /**
-     * Function isPartFileName
+     * Function isCategoryName
-     * returns true iff aName is a valid part file name.
+     * returns true iff aName is a valid category name.
     */
-    bool  isPartFileName( const char* aName );
+    bool isCategoryName( const char* aName )
+    {
+        return true;
+    }
    /**
-     * Function makePartFileName
+     * Function makePartName
     * returns true iff aEntry holds a valid part filename, in the form of
     * "someroot.part[.revNNNN]"  where NNN are number characters [0-9]
     * @param aEntry is the raw directory entry without path information.
     * @param aCategory is the last portion of the directory path.
-     * @param aPartName is where to put a part name, assuming aEntry is legal.
+     * @param aPartName is where to put a part name, assuming @a aEntry is legal.
     * @return bool - true only if aEntry is a legal part file name.
     */
-    bool makePartFileName( const char* aEntry,
+    bool makePartName( STRING* aPartName, const char* aEntry, const STRING& aCategory );
-                           const STRING& aCategory, STRING* aPartName );
    /**
     * Function readSExpression
@@ -87,14 +116,20 @@ class DIR_LIB_SOURCE : public LIB_SOURCE
    /**
-     * Function doOneDir
+     * Function cacheOneDir
     * loads part names [and categories] from a directory given by
     * "sourceURI + '/' + category"
     * Categories are only loaded if processing the top most directory because
     * only one level of categories are supported.  We know we are in the
     * top most directory if aCategory is empty.
     */
-    void doOneDir( const STRING& aCategory ) throw( IO_ERROR );
+    void cacheOneDir( const STRING& aCategory ) throw( IO_ERROR );
+    /**
+     * Function makeFileName
+     * converts a part name into a filename and returns it.
+     */
+    STRING makeFileName( const STRING& aPartName );
 //protected:
 public:
@@ -112,14 +147,14 @@ public:
     * @param doUseVersioning if true means support versioning in the directory tree, otherwise
     *  only a single version of each part is recognized.
     */
-    DIR_LIB_SOURCE( const STRING& aDirectoryPath, bool doUseVersioning = false )
+    DIR_LIB_SOURCE( const STRING& aDirectoryPath, const STRING& aOptions = StrEmpty )
        throw( IO_ERROR );
    ~DIR_LIB_SOURCE();
    //-----<LIB_SOURCE implementation functions >------------------------------
-    void ReadPart( STRING* aResult, const STRING& aPartName, const STRING& aRev=StrEmpty )
+    void ReadPart( STRING* aResult, const STRING& aPartName, const STRING& aRev = StrEmpty )
        throw( IO_ERROR );
    void ReadParts( STRINGS* aResults, const STRINGS& aPartNames )
@@ -127,7 +162,7 @@ public:
    void GetCategories( STRINGS* aResults ) throw( IO_ERROR );
-    void GetCategoricalPartNames( STRINGS* aResults, const STRING& aCategory=StrEmpty )
+    void GetCategoricalPartNames( STRINGS* aResults, const STRING& aCategory = StrEmpty )
        throw( IO_ERROR );
    void GetRevisions( STRINGS* aResults, const STRING& aPartName ) throw( IO_ERROR )