dsnlexer.h 18.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*
 * This program source code file is part of KICAD, a free EDA CAD application.
 *
 * Copyright (C) 2007-2010 SoftPLC Corporation, Dick Hollenbeck <dick@softplc.com>
 * Copyright (C) 2007 Kicad Developers, see change_log.txt for contributors.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, you may find one here:
 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
 * or you may search the http://www.gnu.org website for the version 2 license,
 * or you may write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 */

25 26
#ifndef DSNLEXER_H_
#define DSNLEXER_H_
27

28
#include <stdio.h>
29
#include <string>
30
#include <vector>
31
#include <hashtables.h>
32

33
#include <richio.h>
34

35
#ifndef SWIG
36 37 38 39 40 41 42 43 44
/**
 * Struct KEYWORD
 * holds a keyword string and its unique integer token.
 */
struct KEYWORD
{
    const char* name;       ///< unique keyword.
    int         token;      ///< a zero based index into an array of KEYWORDs
};
45
#endif
46 47 48 49 50 51 52 53 54 55 56 57 58

// something like this macro can be used to help initialize a KEYWORD table.
// see SPECCTRA_DB::keywords[] as an example.

//#define TOKDEF(x)    { #x, T_##x }


/**
 * Enum DSN_SYNTAX_T
 * lists all the DSN lexer's tokens that are supported in lexing.  It is up
 * to the parser if it wants also to support them.
 */
enum DSN_SYNTAX_T {
59 60
    DSN_NONE = -11,
    DSN_COMMENT = -10,
61 62 63 64 65 66 67 68
    DSN_STRING_QUOTE = -9,
    DSN_QUOTE_DEF = -8,
    DSN_DASH = -7,
    DSN_SYMBOL = -6,
    DSN_NUMBER = -5,
    DSN_RIGHT = -4,           // right bracket, ')'
    DSN_LEFT = -3,            // left bracket, '('
    DSN_STRING = -2,          // a quoted string, stripped of the quotes
jean-pierre charras's avatar
jean-pierre charras committed
69
    DSN_EOF = -1              // special case for end of file
70 71 72 73
};


/**
74
 * Class DSNLEXER
75 76
 * implements a lexical analyzer for the SPECCTRA DSN file format.  It
 * reads lexical tokens from the current LINE_READER through the NextTok()
77
 * function.
78 79 80
 */
class DSNLEXER
{
81
#ifndef SWIG
Dick Hollenbeck's avatar
Dick Hollenbeck committed
82
protected:
83
    bool                iOwnReaders;            ///< on readerStack, should I delete them?
Dick Hollenbeck's avatar
Dick Hollenbeck committed
84 85 86
    const char*         start;
    const char*         next;
    const char*         limit;
Dick Hollenbeck's avatar
Dick Hollenbeck committed
87
    char                dummy[1];               ///< when there is no reader.
88

89
    typedef std::vector<LINE_READER*>  READER_STACK;
90

91 92
    READER_STACK        readerStack;            ///< all the LINE_READERs by pointer.
    LINE_READER*        reader;                 ///< no ownership. ownership is via readerStack, maybe, if iOwnReaders
Dick Hollenbeck's avatar
Dick Hollenbeck committed
93 94 95 96 97 98 99 100

    bool                specctraMode;           ///< if true, then:
                                                ///< 1) stringDelimiter can be changed
                                                ///< 2) Kicad quoting protocol is not in effect
                                                ///< 3) space_in_quoted_tokens is functional
                                                ///< else not.

    char                stringDelimiter;
101
    bool                space_in_quoted_tokens; ///< blank spaces within quoted strings
Dick Hollenbeck's avatar
Dick Hollenbeck committed
102

dickelbeck's avatar
dickelbeck committed
103
    bool                commentsAreTokens;      ///< true if should return comments as tokens
104

105 106
    int                 prevTok;                ///< curTok from previous NextTok() call.
    int                 curOffset;              ///< offset within current line of the current token
107

108 109
    int                 curTok;                 ///< the current token obtained on last NextTok()
    std::string         curText;                ///< the text of the current token
110

111 112 113
    const KEYWORD*      keywords;               ///< table sorted by CMake for bsearch()
    unsigned            keywordCount;           ///< count of keywords table
    KEYWORD_MAP         keyword_hash;           ///< fast, specialized "C string" hashtable
114

115
    void init();
116

117
    int readLine() throw( IO_ERROR )
118
    {
Dick Hollenbeck's avatar
Dick Hollenbeck committed
119 120
        if( reader )
        {
121 122 123
            reader->ReadLine();

            unsigned len = reader->Length();
124

Dick Hollenbeck's avatar
Dick Hollenbeck committed
125 126 127
            // start may have changed in ReadLine(), which can resize and
            // relocate reader's line buffer.
            start = reader->Line();
128

Dick Hollenbeck's avatar
Dick Hollenbeck committed
129 130
            next  = start;
            limit = next + len;
131

Dick Hollenbeck's avatar
Dick Hollenbeck committed
132 133 134
            return len;
        }
        return 0;
135 136 137 138
    }

    /**
     * Function findToken
Dick Hollenbeck's avatar
Dick Hollenbeck committed
139
     * takes aToken string and looks up the string in the keywords table.
140
     *
Dick Hollenbeck's avatar
Dick Hollenbeck committed
141 142 143
     * @param aToken is a string to lookup in the keywords table.
     * @return int - with a value from the enum DSN_T matching the keyword text,
     *         or DSN_SYMBOL if @a aToken is not in the kewords table.
144
     */
Dick Hollenbeck's avatar
Dick Hollenbeck committed
145
    int findToken( const std::string& aToken );
146 147 148 149 150 151 152 153 154 155 156

    bool isStringTerminator( char cc )
    {
        if( !space_in_quoted_tokens && cc==' ' )
            return true;

        if( cc == stringDelimiter )
            return true;

        return false;
    }
157

158
#endif
159 160 161 162

public:

    /**
163
     * Constructor ( FILE*, const wxString& )
164 165 166 167 168
     * intializes a DSN lexer and prepares to read from aFile which
     * is already open and has aFilename.
     *
     * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount.  This
     *  token table need not contain the lexer separators such as '(' ')', etc.
169
     * @param aKeywordCount is the count of tokens in aKeywordTable.
170 171
     * @param aFile is an open file, which will be closed when this is destructed.
     * @param aFileName is the name of the file
172
     */
173 174
    DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount,
              FILE* aFile, const wxString& aFileName );
175

176
    /**
177
     * Constructor ( const KEYWORD*, unsigned, const std::string&, const wxString& )
178 179 180 181
     * intializes a DSN lexer and prepares to read from @a aSExpression.
     *
     * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount.  This
     *  token table need not contain the lexer separators such as '(' ')', etc.
182
     * @param aKeywordCount is the count of tokens in aKeywordTable.
183 184 185
     * @param aSExpression is text to feed through a STRING_LINE_READER
     * @param aSource is a description of aSExpression, used for error reporting.
     */
186
    DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount,
187
              const std::string& aSExpression, const wxString& aSource = wxEmptyString );
188

189 190 191 192 193 194 195 196 197 198
    /**
     * Constructor ( const std::string&, const wxString& )
     * intializes a DSN lexer and prepares to read from @a aSExpression.  Use this
     * one without a keyword table with the DOM parser in ptree.h.
     *
     * @param aSExpression is text to feed through a STRING_LINE_READER
     * @param aSource is a description of aSExpression, used for error reporting.
     */
    DSNLEXER( const std::string& aSExpression, const wxString& aSource = wxEmptyString );

199 200 201 202 203 204 205 206
    /**
     * Constructor ( LINE_READER* )
     * intializes a DSN lexer and prepares to read from @a aLineReader which
     * is already open, and may be in use by other DSNLEXERs also.  No ownership
     * is taken of @a aLineReader. This enables it to be used by other DSNLEXERs also.
     *
     * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount.  This
     *  token table need not contain the lexer separators such as '(' ')', etc.
207
     *
208
     * @param aKeywordCount is the count of tokens in aKeywordTable.
209
     *
210
     * @param aLineReader is any subclassed instance of LINE_READER, such as
211
     *  STRING_LINE_READER or FILE_LINE_READER.  No ownership is taken.
212 213
     */
    DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount,
214
              LINE_READER* aLineReader = NULL );
215 216

    virtual ~DSNLEXER();
217

218 219 220 221 222 223 224 225 226 227
    /**
     * Useable only for DSN lexers which share the same LINE_READER
     * Synchronizes the pointers handling the data read by the LINE_READER
     * Allows 2 DNSLEXER to share the same current line, when switching from a
     * DNSLEXER to an other DNSLEXER
     * @param aLexer = the model
     * @return true if the sync can be made ( at least the same line reader )
     */
    bool SyncLineReaderWith( DSNLEXER& aLexer );

Dick Hollenbeck's avatar
Dick Hollenbeck committed
228 229 230 231 232 233 234 235 236 237 238 239 240 241
    /**
     * Function SetSpecctraMode
     * changes the behavior of this lexer into or out of "specctra mode".  If
     * specctra mode, then:
     * 1) stringDelimiter can be changed
     * 2) Kicad quoting protocol is not in effect
     * 3) space_in_quoted_tokens is functional
     * else none of the above are true.  The default mode is non-specctra mode, meaning:
     * 1) stringDelimiter cannot be changed
     * 2) Kicad quoting protocol is in effect
     * 3) space_in_quoted_tokens is not functional
     */
    void SetSpecctraMode( bool aMode );

242 243 244
    /**
     * Function PushReader
     * manages a stack of LINE_READERs in order to handle nested file inclusion.
245
     * This function pushes aLineReader onto the top of a stack of LINE_READERs and makes
246
     * it the current LINE_READER with its own GetSource(), line number and line text.
247 248 249 250
     * A grammar must be designed such that the "include" token (whatever its various names),
     * and any of its parameters are not followed by anything on that same line,
     * because PopReader always starts reading from a new line upon returning to
     * the original LINE_READER.
251 252 253 254 255 256 257 258 259
     */
    void PushReader( LINE_READER* aLineReader );

    /**
     * Function PopReader
     * deletes the top most LINE_READER from an internal stack of LINE_READERs and
     * in the case of FILE_LINE_READER this means the associated FILE is closed.
     * The most recently used former LINE_READER on the stack becomes the
     * current LINE_READER and its previous position in its input stream and the
260
     * its latest line number should pertain.  PopReader always starts reading
261 262 263 264
     * from a new line upon returning to the previous LINE_READER.  A pop is only
     * possible if there are at least 2 LINE_READERs on the stack, since popping
     * the last one is not supported.
     *
Dick Hollenbeck's avatar
Dick Hollenbeck committed
265 266 267
     * @return LINE_READER* - is the one that was in use before the pop, or NULL
     *   if there was not at least two readers on the stack and therefore the
     *   pop failed.
268
     */
Dick Hollenbeck's avatar
Dick Hollenbeck committed
269
    LINE_READER* PopReader();
270

271 272 273 274 275 276 277 278 279 280 281 282
    // Some functions whose return value is best overloaded to return an enum
    // in a derived class.
    //-----<overload return values to tokens>------------------------------

    /**
     * Function NextTok
     * returns the next token found in the input file or DSN_EOF when reaching
     * the end of file.  Users should wrap this function to return an enum
     * to aid in grammar debugging while running under a debugger, but leave
     * this lower level function returning an int (so the enum does not collide
     * with another usage).
     * @return int - the type of token found next.
283
     * @throw IO_ERROR - only if the LINE_READER throws it.
284
     */
285
    int NextTok() throw( IO_ERROR );
286 287 288 289 290

    /**
     * Function NeedSYMBOL
     * calls NextTok() and then verifies that the token read in
     * satisfies bool IsSymbol().
291
     * If not, an IO_ERROR is thrown.
292
     * @return int - the actual token read in.
293
     * @throw IO_ERROR, if the next token does not satisfy IsSymbol()
294
     */
295
    int NeedSYMBOL() throw( IO_ERROR );
296 297 298 299 300

    /**
     * Function NeedSYMBOLorNUMBER
     * calls NextTok() and then verifies that the token read in
     * satisfies bool IsSymbol() or tok==DSN_NUMBER.
301
     * If not, an IO_ERROR is thrown.
302
     * @return int - the actual token read in.
303
     * @throw IO_ERROR, if the next token does not satisfy the above test
304
     */
305
    int NeedSYMBOLorNUMBER() throw( IO_ERROR );
306

Dick Hollenbeck's avatar
Dick Hollenbeck committed
307 308 309 310 311 312 313 314 315
    /**
     * Function NeedNUMBER
     * calls NextTok() and then verifies that the token read is type DSN_NUMBER.
     * If not, and IO_ERROR is thrown using text from aExpectation.
     * @return int - the actual token read in.
     * @throw IO_ERROR, if the next token does not satisfy the above test
     */
    int NeedNUMBER( const char* aExpectation ) throw( IO_ERROR );

316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
    /**
     * Function CurTok
     * returns whatever NextTok() returned the last time it was called.
     */
    int CurTok()
    {
        return curTok;
    }

    /**
     * Function PrevTok
     * returns whatever NextTok() returned the 2nd to last time it was called.
     */
    int PrevTok()
    {
        return prevTok;
    }

    //-----</overload return values to tokens>-----------------------------

336

337 338 339 340 341 342 343
    /**
     * Function SetStringDelimiter
     * changes the string delimiter from the default " to some other character
     * and returns the old value.
     * @param aStringDelimiter The character in lowest 8 bits.
     * @return int - The old delimiter in the lowest 8 bits.
     */
Dick Hollenbeck's avatar
Dick Hollenbeck committed
344
    char SetStringDelimiter( char aStringDelimiter )
345 346
    {
        int old = stringDelimiter;
Dick Hollenbeck's avatar
Dick Hollenbeck committed
347 348
        if( specctraMode )
            stringDelimiter = aStringDelimiter;
349 350 351 352 353 354 355 356 357 358 359 360
        return old;
    }

    /**
     * Function SetSpaceInQuotedTokens
     * changes the setting controlling whether a space in a quoted string is
     * a terminator.
     * @param val If true, means
     */
    bool SetSpaceInQuotedTokens( bool val )
    {
        bool old = space_in_quoted_tokens;
Dick Hollenbeck's avatar
Dick Hollenbeck committed
361 362
        if( specctraMode )
            space_in_quoted_tokens = val;
363 364 365
        return old;
    }

366 367 368 369 370 371 372 373
    /**
     * Function SetCommentsAreTokens
     * changes the handling of comments.  If set true, comments are returns
     * as single line strings with a terminating newline, else they are
     * consumed by the lexer and not returned.
     */
    bool SetCommentsAreTokens( bool val )
    {
dickelbeck's avatar
dickelbeck committed
374 375
        bool old = commentsAreTokens;
        commentsAreTokens = val;
376 377 378
        return old;
    }

379 380 381 382 383 384 385 386 387 388 389 390 391
    /**
     * Function ReadCommentLines
     * checks the next sequence of tokens and reads them into a wxArrayString
     * if they are comments.  Reading continues until a non-comment token is
     * encountered, and such last read token remains as CurTok() and as CurText().
     * No push back or "un get" mechanism is used for this support.  Upon return
     * you simply avoid calling NextTok() for the next token, but rather CurTok().
     *
     * @return wxArrayString* - heap allocated block of comments, or NULL if none;
     *   caller owns the allocation and must delete if not NULL.
     */
    wxArrayString* ReadCommentLines() throw( IO_ERROR );

392 393 394 395 396 397 398
    /**
     * Function IsSymbol
     * tests a token to see if it is a symbol.  This means it cannot be a
     * special delimiter character such as DSN_LEFT, DSN_RIGHT, DSN_QUOTE, etc.  It may
     * however, coincidentally match a keyword and still be a symbol.
     */
    static bool IsSymbol( int aTok );
399 400

    /**
401
     * Function Expecting
402
     * throws an IO_ERROR exception with an input file specific error message.
403
     * @param aTok is the token/keyword type which was expected at the current input location.
404
     * @throw IO_ERROR with the location within the input file of the problem.
405
     */
406
    void Expecting( int aTok ) throw( IO_ERROR );
407 408 409

    /**
     * Function Expecting
410
     * throws an IO_ERROR exception with an input file specific error message.
Dick Hollenbeck's avatar
Dick Hollenbeck committed
411 412
     * @param aTokenList is the token/keyword type which was expected at the
     *         current input location, e.g.  "pin|graphic|property"
413
     * @throw IO_ERROR with the location within the input file of the problem.
414
     */
Dick Hollenbeck's avatar
Dick Hollenbeck committed
415
    void Expecting( const char* aTokenList ) throw( IO_ERROR );
416 417 418

    /**
     * Function Unexpected
419
     * throws an IO_ERROR exception with an input file specific error message.
420 421
     * @param aTok is the token/keyword type which was not expected at the
     *         current input location.
422
     * @throw IO_ERROR with the location within the input file of the problem.
423
     */
424
    void Unexpected( int aTok ) throw( IO_ERROR );
425

Dick Hollenbeck's avatar
Dick Hollenbeck committed
426
    /**
427 428 429
     * Function Unexpected
     * throws an IO_ERROR exception with an input file specific error message.
     * @param aToken is the token which was not expected at the
Dick Hollenbeck's avatar
Dick Hollenbeck committed
430 431 432
     *         current input location.
     * @throw IO_ERROR with the location within the input file of the problem.
     */
433
    void Unexpected( const char* aToken ) throw( IO_ERROR );
Dick Hollenbeck's avatar
Dick Hollenbeck committed
434

435
    /**
436 437 438 439
     * Function Duplicate
     * throws an IO_ERROR exception with a message saying specifically that aTok
     * is a duplicate of one already seen in current context.
     * @param aTok is the token/keyword type which was not expected at the
440
     *         current input location.
441
     * @throw IO_ERROR with the location within the input file of the problem.
442
     */
443
    void Duplicate( int aTok ) throw( IO_ERROR );
444

445 446 447
    /**
     * Function NeedLEFT
     * calls NextTok() and then verifies that the token read in is a DSN_LEFT.
448 449
     * If it is not, an IO_ERROR is thrown.
     * @throw IO_ERROR, if the next token is not a DSN_LEFT
450
     */
451
    void NeedLEFT() throw( IO_ERROR );
452 453 454 455

    /**
     * Function NeedRIGHT
     * calls NextTok() and then verifies that the token read in is a DSN_RIGHT.
456 457
     * If it is not, an IO_ERROR is thrown.
     * @throw IO_ERROR, if the next token is not a DSN_RIGHT
458
     */
459
    void NeedRIGHT() throw( IO_ERROR );
460

461 462
    /**
     * Function GetTokenText
463 464 465 466
     * returns the C string representation of a DSN_T value.
     */
    const char* GetTokenText( int aTok );

467 468 469 470 471
    /**
     * Function GetTokenString
     * returns a quote wrapped wxString representation of a token value.
     */
    wxString GetTokenString( int aTok );
472

473
    static const char* Syntax( int aTok );
474 475 476 477 478 479 480 481 482 483

    /**
     * Function CurText
     * returns a pointer to the current token's text.
     */
    const char* CurText()
    {
        return curText.c_str();
    }

484 485 486 487 488 489 490 491 492
    /**
     * Function CurStr
     * returns a reference to current token in std::string form.
     */
    const std::string& CurStr()
    {
        return curText;
    }

Dick Hollenbeck's avatar
Dick Hollenbeck committed
493 494 495 496 497 498 499 500 501 502
    /**
     * Function FromUTF8
     * returns the current token text as a wxString, assuming that the input
     * byte stream is UTF8 encoded.
     */
    wxString FromUTF8()
    {
        return wxString::FromUTF8( curText.c_str() );
    }

503 504 505 506 507 508
    /**
     * Function CurLineNumber
     * returns the current line number within my LINE_READER
     */
    int CurLineNumber()
    {
509
        return reader->LineNumber();
510 511
    }

Dick Hollenbeck's avatar
Dick Hollenbeck committed
512 513 514 515 516 517 518 519 520 521
    /**
     * Function CurLine
     * returns the current line of text, from which the CurText() would return
     * its token.
     */
    const char* CurLine()
    {
        return (const char*)(*reader);
    }

522 523
    /**
     * Function CurFilename
524 525 526
     * returns the current LINE_READER source.
     * @return const wxString& - the source of the lines of text,
     *   e.g. a filename or "clipboard".
527
     */
528
    const wxString& CurSource()
529
    {
530
        return reader->GetSource();
531 532 533 534
    }

    /**
     * Function CurOffset
Dick Hollenbeck's avatar
Dick Hollenbeck committed
535
     * returns the byte offset within the current line, using a 1 based index.
536 537 538 539 540 541 542 543
     * @return int - a one based index into the current line.
     */
    int CurOffset()
    {
        return curOffset + 1;
    }
};

544
#endif  // DSNLEXER_H_