/*
* Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
*
* This file is part of Jam - see jam.c for Copyright information.
*/
/*
* scan.c - the jam yacc scanner
*
* 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk.
* 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc.
* Also handle tokens abutting EOF by remembering
* to return EOF now matter how many times yylex()
* reinvokes yyline().
* 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT.
* 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is
* defined before Linux's yacc tries to redefine it.
* 01/10/01 (seiwald) - \ can now escape any whitespace char
* 11/04/02 (seiwald) - const-ing for string literals
*/
# include "jam.h"
# include "lists.h"
# include "parse.h"
# include "scan.h"
# include "jamgram.h"
# include "jambase.h"
# include "newstr.h"
#ifdef GRAPHISOFT_LEXSCAN
/***********************************************************************
separators:
"(", ")"
"[", "]"
"{", "}"
":",";"
"!","!=",
"?","?=",
"&&",
"||",
"=",
" ", "\t", "\n", "\r", EOF
todo:
+ should be too, but C++ is used in jamfile...
***********************************************************************/
char tokenseparator [ 256 ] ;
char tokenseparatornext [ 256 ] ; // characters that forms a token if the follow a tokenseparator
#endif
#ifdef GRAPHISOFT_MPW_FIX
#if defined (macintosh)
# include "CursorCtl.h"
static int totalline = 0;
#endif
#endif
struct keyword {
const char *word;
int type;
} keywords[] = {
# include "jamgramtab.h"
{ 0, 0 }
} ;
#ifdef GRAPHISOFT_FIX_NONNATIVENEWLINE
#define XFILEBUFSIZE 4096
typedef struct XFILE {
char buffer [XFILEBUFSIZE];
size_t start;
size_t end;
FILE* f;
} XFILE;
static XFILE* xfopen ( const char* file, const char* mode)
{
FILE* f = fopen (file, mode);
if (f == NULL)
return NULL;
XFILE* result = (XFILE*)(malloc (sizeof (XFILE)));
if (result == NULL) {
fclose (f);
return NULL;
}
result->f = f;
result->start = 0;
result->end = 0;
return result;
}
static char* xfgets (char* buf, size_t size, XFILE* f)
{
size_t remaining = size;
while (remaining > 0) {
if (f->start == f->end) {
/* read to buffer */
f->start = 0;
f->end = fread (f->buffer, sizeof(char), XFILEBUFSIZE, f->f);
if (f->end == 0)
return NULL; /* EOF */
}
int i = f->start;
int end = (f->end > (f->start+size-1)) ? (f->start+size-1) : f->end ;
char* bufd = buf-(f->start)+(size-remaining);
char* bufs = f->buffer;
while (i < end) {
if (bufs[i] == '\n' || bufs[i] == '\r') {
bufd[i] = '\n';
bufd[i+1] = 0;
const char first = ('\n' == 0x0D) ? '\n' : '\r' ;
const char second = ('\r' == 0x0D) ? '\r' : '\n' ;
if (first == bufs[i]) {
if (i+1 >= f->end) {
f->start = 0;
f->end = fread (f->buffer, sizeof(char), XFILEBUFSIZE, f->f);
i = f->start-1;
}
if ((i+1 < f->end) && (second == bufs[i+1]))
i++;
}
f->start = i+1;
return buf;
} else {
bufd[i] = bufs[i];
}
i++;
}
remaining -= end-f->start;
f->start = end;
}
return buf;
}
static void xfclose (XFILE* f)
{
fclose (f->f);
free ((void*)f);
}
#endif
struct include {
struct include *next; /* next serial include file */
const char *string; /* pointer into current line */
char **strings; /* for yyfparse() -- text to parse */
#ifdef GRAPHISOFT_FIX_NONNATIVENEWLINE
XFILE *file; /* for yyfparse() -- file being read */
#else
FILE *file; /* for yyfparse() -- file being read */
#endif
const char *fname; /* for yyfparse() -- file name */
int line; /* line counter for error messages */
char buf[ 512 ]; /* for yyfparse() -- line buffer */
} ;
static struct include *incp = 0; /* current file; head of chain */
static int scanmode = SCAN_NORMAL;
static int anyerrors = 0;
static char *symdump( YYSTYPE *s );
#ifdef GRAPHISOFT_LEXSCAN
static int lexscanmode = LEXSCANMODE_COMPATIBLE;
#endif
# define BIGGEST_TOKEN 10240 /* no single token can be larger */
#ifdef GRAPHISOFT_LEXSCAN
/*
* Set up token separator:
*/
void initscan (int inlexscanmode)
{
int i ;
for (i = 0; i < 256; i++) {
tokenseparator[i] = 0;
tokenseparatornext[i] = 0;
}
tokenseparator[(unsigned char)'['] = 1;
tokenseparator[(unsigned char)']'] = 1;
tokenseparator[(unsigned char)'('] = 1;
tokenseparator[(unsigned char)')'] = 1;
tokenseparator[(unsigned char)'{'] = 1;
tokenseparator[(unsigned char)'}'] = 1;
tokenseparator[(unsigned char)'!'] = 1; /* Note: != should be parsed!!*/
tokenseparator[(unsigned char)'?'] = 1; /* Note: ?= should be parsed!!*/
tokenseparator[(unsigned char)'='] = 1; /* Note: !=, ?= should be parsed!*/
tokenseparator[(unsigned char)'+'] = 3; /* Note: + is not a valid token , but "+=" is!!*/ /* This is problematic.. += shall be a separator,
but ++ shouldn't */
tokenseparator[(unsigned char)':'] = 1;
tokenseparator[(unsigned char)';'] = 1;
tokenseparator[(unsigned char)' '] = 1;
tokenseparator[(unsigned char)'\t'] = 1;
tokenseparator[(unsigned char)'\n'] = 1;
tokenseparator[(unsigned char)'\r'] = 1;
tokenseparator[(unsigned char)'<'] = 1; /* Note: <= should be parsed!!*/
tokenseparator[(unsigned char)'>'] = 1; /* Note: >= should be parsed!!*/
tokenseparator[(unsigned char)'&'] = 2; /* Note: & is not a valid token , but "&&" is!!*/
tokenseparator[(unsigned char)'|'] = 2; /* Note: | is not a valid token , but "||" is!!*/
tokenseparatornext[(unsigned char)'?'] = '=' ;
tokenseparatornext[(unsigned char)'!'] = '=' ;
tokenseparatornext[(unsigned char)'+'] = '=' ;
tokenseparatornext[(unsigned char)'<'] = '=' ;
tokenseparatornext[(unsigned char)'>'] = '=' ;
tokenseparatornext[(unsigned char)'&'] = '&' ;
tokenseparatornext[(unsigned char)'|'] = '|' ;
lexscanmode = inlexscanmode;
}
#endif
/*
* Set parser mode: normal, string, or keyword
*/
void
yymode( int n )
{
scanmode = n;
}
void
yyerror( const char *s )
{
#ifdef GRAPHISOFT_JAM
#ifdef OS_MAC
if( incp )
printf ("File '%s' ; line %d \n",incp->fname,incp->line);
printf( "# %s at %s\n", s, symdump( &yylval ) );
#elif defined (NT)
if (incp)
printf ("%s(%d) : ", incp->fname,incp->line);
printf ("%s at %s\n", s, symdump (&yylval) );
#else
if( incp )
printf( "%s: line %d: ", incp->fname, incp->line );
printf( "%s at %s\n", s, symdump( &yylval ) );
#endif
#else
if( incp )
printf( "%s: line %d: ", incp->fname, incp->line );
printf( "%s at %s\n", s, symdump( &yylval ) );
#endif
++anyerrors;
}
#ifdef GRAPHISOFT_JAM
void
yywarning( const char *s )
{
#ifdef OS_MAC
if( incp )
printf ("File '%s' ; line %d \n",incp->fname,incp->line);
printf( "# %s at %s\n", s, symdump( &yylval ) );
#else
if( incp )
printf( "%s: line %d: ", incp->fname, incp->line );
printf( "%s at %s\n", s, symdump( &yylval ) );
#endif
}
#endif
int
yyanyerrors()
{
return anyerrors != 0;
}
void
yyfparse( const char *s )
{
struct include *i = (struct include *)malloc( sizeof( *i ) );
/* Push this onto the incp chain. */
i->string = "";
i->strings = 0;
i->file = 0;
i->fname = copystr( s );
i->line = 0;
i->next = incp;
incp = i;
/* If the filename is "+", it means use the internal jambase. */
if( !strcmp( s, "+" ) )
i->strings = jambase;
}
/*
* yyline() - read new line and return first character
*
* Fabricates a continuous stream of characters across include files,
* returning EOF at the bitter end.
*/
int
yyline()
{
struct include *i = incp;
if( !incp )
return EOF;
#ifdef GRAPHISOFT_MPW_FIX
#if defined (macintosh)
if (!(++totalline % 1000))
SpinCursor (1);
#endif
#endif
/* Once we start reading from the input stream, we reset the */
/* include insertion point so that the next include file becomes */
/* the head of the list. */
/* If there is more data in this line, return it. */
if( *i->string )
return *i->string++;
/* If we're reading from an internal string list, go to the */
/* next string. */
if( i->strings )
{
if( !*i->strings )
goto next;
i->line++;
i->string = *(i->strings++);
return *i->string++;
}
/* If necessary, open the file */
if( !i->file )
{
#ifdef GRAPHISOFT_FIX_NONNATIVENEWLINE
XFILE *f = NULL;
#else
FILE *f = stdin;
#endif
#ifdef GRAPHISOFT_FIX_NONNATIVENEWLINE
if( strcmp( i->fname, "-" ) && !( f = xfopen( i->fname, "r" ) ) )
#else
if( strcmp( i->fname, "-" ) && !( f = fopen( i->fname, "r" ) ) )
#endif
perror( i->fname );
i->file = f;
}
/* If there's another line in this file, start it. */
#ifdef GRAPHISOFT_FIX_NONNATIVENEWLINE
if( i->file && xfgets( i->buf, sizeof( i->buf ), i->file ) )
#else
if( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
#endif
{
i->line++;
i->string = i->buf;
return *i->string++;
}
next:
/* This include is done. */
/* Free it up and return EOF so yyparse() returns to parse_file(). */
incp = i->next;
/* Close file, free name */
#ifdef GRAPHISOFT_FIX_NONNATIVENEWLINE
if( i->file && i->file != NULL )
xfclose( i->file );
#else
if( i->file && i->file != stdin )
fclose( i->file );
#endif
freestr( i->fname );
free( (char *)i );
return EOF;
}
/*
* yylex() - set yylval to current token; return its type
*
* Macros to move things along:
*
* yychar() - return and advance character; invalid after EOF
* yyprev() - back up one character; invalid before yychar()
*
* yychar() returns a continuous stream of characters, until it hits
* the EOF of the current include file.
*/
# define yychar() ( *incp->string ? *incp->string++ : yyline() )
# define yyprev() ( incp->string-- )
int
yylex()
{
int c;
char buf[BIGGEST_TOKEN];
char *b = buf;
if( !incp )
goto eof;
/* Get first character (whitespace or of token) */
c = yychar();
if( scanmode == SCAN_STRING )
{
/* If scanning for a string (action's {}'s), look for the */
/* closing brace. We handle matching braces, if they match! */
int nest = 1;
while( c != EOF && b < buf + sizeof( buf ) )
{
if( c == '{' )
nest++;
if( c == '}' && !--nest )
break;
*b++ = c;
c = yychar();
}
/* We ate the ending brace -- regurgitate it. */
if( c != EOF )
yyprev();
/* Check obvious errors. */
if( b == buf + sizeof( buf ) )
{
yyerror( "action block too big" );
goto eof;
}
if( nest )
{
yyerror( "unmatched {} in action block" );
goto eof;
}
*b = 0;
yylval.type = STRING;
yylval.string = newstr( buf );
}
else
{
char *b = buf;
struct keyword *k;
int inquote = 0;
#ifdef GRAPHISOFT_LEXSCAN
int invariable = 0; /* If !=0 we are inside a $(..) variable evaluation expression. */
int wasdollar = 0; /* Last charater was a '$' */
int istokenseparator = 0;
#endif
int notkeyword;
/* Eat white space */
for( ;; )
{
/* Skip past white space */
while( c != EOF && isspace( c ) )
c = yychar();
/* Not a comment? Swallow up comment line. */
if( c != '#' )
break;
while( ( c = yychar() ) != EOF && c != '\n' && c != '\r' )
;
}
/* c now points to the first character of a token. */
if( c == EOF )
goto eof;
/* While scanning the word, disqualify it for (expensive) */
/* keyword lookup when we can: $anything, "anything", \anything */
notkeyword = c == '$';
#ifdef GRAPHISOFT_LEXSCAN
/* See if this is a single char token from tokenseparator */
if ((lexscanmode != LEXSCANMODE_OLD) && (c != EOF && tokenseparator[c])) {
char oldc = c;
*b++ = c;
c = yychar();
if (c == tokenseparatornext[oldc]) { /* parse tokens staring with a tokensaparator: ?=, <=, >=, &&, || */
*b++ = c;
c = yychar();
}
if (lexscanmode == LEXSCANMODE_COMPATIBLE) {
if (c != EOF && !isspace (c) && tokenseparator[oldc] != 3) {
if( incp )
printf( "%s: line %d: Warning: %c will be a token separator, use spaces or quotes!\n", incp->fname, incp->line, *(b-1) );
}
}
} else {
#endif
/* look for white space to delimit word */
/* "'s get stripped but preserve white space */
/* \ protects next character */
while(
c != EOF &&
b < buf + sizeof( buf ) &&
( inquote || !istokenseparator ) )
{
int isdollar = 0;
if( c == '"' )
{
/* begin or end " */
inquote = !inquote;
notkeyword = 1;
}
else if( c != '\\' )
{
if (!inquote)
{
if (c == '$') {
isdollar = 1;
} else if (c == '(') {
if (wasdollar && !invariable) {
invariable = 1;
} else if (invariable) {
invariable++;
}
} else if (c == ')' && invariable) {
invariable--;
}
}
/* normal char */
*b++ = c;
}
else if( ( c = yychar()) != EOF )
{
/* \c */
*b++ = c;
notkeyword = 1;
}
else
{
/* \EOF */
break;
}
wasdollar = isdollar;
c = yychar();
istokenseparator = (c == EOF) || (!invariable && !inquote && !(wasdollar && c == '(') && tokenseparator[c] && tokenseparator[c] != 3);
/* incompatiblity in new and old lexscanmode */
if (!inquote && istokenseparator && !isspace (c)) {
if (lexscanmode == LEXSCANMODE_COMPATIBLE) {
if( incp )
printf( "%s: line %d: Warning: %c will be a token separator, use spaces or quotes!\n", incp->fname, incp->line, c );
}
if (lexscanmode == LEXSCANMODE_COMPATIBLE || lexscanmode == LEXSCANMODE_OLD) {
istokenseparator = 0;
}
}
}
}
/* Check obvious errors. */
if( b == buf + sizeof( buf ) )
{
yyerror( "string too big" );
goto eof;
}
if( inquote )
{
yyerror( "unmatched \" in string" );
goto eof;
}
/* We looked ahead a character - back up. */
if( c != EOF )
yyprev();
/* scan token table */
/* don't scan if it's obviously not a keyword or if its */
/* an alphabetic when were looking for punctuation */
*b = 0;
yylval.type = ARG;
if( !notkeyword && !( isalpha( *buf ) && scanmode == SCAN_PUNCT ) )
{
for( k = keywords; k->word; k++ )
if( *buf == *k->word && !strcmp( k->word, buf ) )
{
yylval.type = k->type;
yylval.string = k->word; /* used by symdump */
break;
}
}
if( yylval.type == ARG )
yylval.string = newstr( buf );
}
if( DEBUG_SCAN )
printf( "scan %s\n", symdump( &yylval ) );
return yylval.type;
eof:
yylval.type = EOF;
return yylval.type;
}
static char *
symdump( YYSTYPE *s )
{
static char buf[ BIGGEST_TOKEN + 20 ];
switch( s->type )
{
case EOF:
sprintf( buf, "EOF" );
break;
case 0:
sprintf( buf, "unknown symbol %s", s->string );
break;
case ARG:
sprintf( buf, "argument %s", s->string );
break;
case STRING:
sprintf( buf, "string \"%s\"", s->string );
break;
default:
sprintf( buf, "keyword %s", s->string );
break;
}
return buf;
}
| # | Change | User | Description | Committed | |
|---|---|---|---|---|---|
| #9 | 2985 | Miklos Fazekas | Scan.c bugfix | ||
| #8 | 2983 | Miklos Fazekas | Fixed error in handling cr/lf problem | ||
| #7 | 2642 | Miklos Fazekas | Sync to 2.5rc2 | ||
| #6 | 2579 | Miklos Fazekas | GSJam to 2.5rc1 integration | ||
| #5 | 2578 | Miklos Fazekas | Integrate new lexical scanner code to GSJam | ||
| #4 | 2539 | Miklos Fazekas | Updated sources | ||
| #3 | 2519 | Miklos Fazekas | Sync to 2.5rc1 | ||
| #2 | 1395 | Miklos Fazekas | Merge with main jam | ||
| #1 | 1212 | Miklos Fazekas | Created a Jam branch | ||
| //guest/perforce_software/jam/src/scan.c | |||||
| #2 | 486 | Perforce staff |
Jam 2.3. See RELNOTES for a list of changes from 2.2.x. Just about every source file was touched when jam got ANSI-fied. |
||
| #1 | 2 | laura | Add Jam/MR 2.2 source | ||