#!/usr/bin/perl -w

#############################################################
# Copyright 1998 VMware, Inc.  All rights reserved. -- VMware Confidential
#############################################################
#
# Tokenizer.pm
# 
# A basic class that is used to perform tokenization.  
#

package VMware::Tokenizer;
use strict;

#####################################################################
# Defined Constants/Global Variables                                #
#####################################################################

# Sample escape sequence mapping
# my %EscapeSequenceMapping = ( 
#	 		      '\\', "\\",
#			      '\n', "\n",
#			      '\t', "\t",
#			      '\"', "\""
#			      );

#####################################################################
# Public Static Methods (indep. of each object                      #
#####################################################################

sub new() {
    my $proto = shift;
    my $class = ref($proto) || $proto;
    my $self = {};

    $self->{ESCAPE_CHAR} = "";
    $self->{ESCAPE_MAPPING} = [];
    $self->{DELIM_CHARS} = "";
    $self->{COUNT} = 0;

    bless($self);
    return($self);
}



# Tokenizes a string.  Can be given a string of delimiter characters
# that will act as tokens and be returned.
sub tokenize($) {
    my $self = shift;
    my ($s) = @_;

    # Use the tokenizer parameters to set up the tokenizer
    my (@delim_arr) = split(//, $self->getDelimiters());
    my ($esc) = $self->getEscapeChar();
    my ($esc_map) = $self->getEscapeMapping();

    my $c;             # Character buffer

    my $curtoken = -1; # Index of current token
    my $curpos = 0;    # Current position for each item is recorded
    my @tokens = ();   # List of tokens to be returned

    my $state = 0;     # Variable containing info about the state of the tokenizer

    while( $s ne "" ) {
	$s =~ s/(.)(.*)/$2/;
#	$c = substr($s, 0, 1);
#	$s = substr($s, 1);
	$c = $1;
	($self->{COUNT})++;

	if( $state == 0 ) {       # Initial state.  Looking for the start of the key.
	    $curpos = $self->getCount();

	    if( isDelim($c, \@delim_arr) ) {
		$curtoken++;
		$tokens[$curtoken] = VMware::Token->new($c, $self->getCount());
	    }
	    elsif( $esc ne "" && $c eq $esc ) { # Encountered an escape character
		if( $s ne "" ) {
		    # Get next character and unescape it using the escape mapping.
		    $s =~ s/(.)(.*)/$2/;
		    $c = $1;
		    ($self->{COUNT})++;
		
		    $curtoken++;
		    my $newchar = fromEscapeChar("$esc$c", $esc_map);

		    # Add escaped char to the token
		    $tokens[$curtoken] = VMware::Token->new($newchar, $self->getCount());
		    $state = 2;
		}
		else {
		    # Error? Escape sequence at the end of the string.
		}
	    }
	    elsif( $c =~ /\s/ ) {
		next;             # Skip white spaces
	    }
	    elsif( $c eq '"' ) {   
		$curtoken++;
		$tokens[$curtoken] = VMware::Token->new("", $self->getCount());
		$state = 1;       # Found a quote, find the next one.
	    }
	    else {
		$curtoken++;
                # Found a char. Build the token
		$tokens[$curtoken] = VMware::Token->new($c, $self->getCount());   
		$state = 2;
	    }
	}
	elsif( $state == 1 ) {     # Looking for the other quote in the key
	    if( $c eq '"' ) {
		$state = 2;       # Found the other quote. 
	    }
	    elsif( $esc ne "" && $c eq $esc ) {  # Encountered an escape character
		if( $s ne "" ) {
		    $s =~ s/(.)(.*)/$2/;
		    $c = $1;
		    ($self->{COUNT})++;
		
		    my $newchar = fromEscapeChar("$esc$c", $esc_map);
		    $tokens[$curtoken]->append($newchar);  # Add escaped char to the key
		}
		else {
		    # Error? Escape sequence at the end of the string.
		}
	    }
	    else {
		$tokens[$curtoken]->append($c);  # Add this to the key
	    }
	}
	elsif( $state == 2 ) {     # Building the key
	    if( isDelim($c, \@delim_arr) ) {
		$curtoken++;
		$tokens[$curtoken] = VMware::Token->new($c, $self->getCount());
		$state = 0;       # Done looking for token.  Got another one.
	    }
	    elsif( $esc ne "" && $c eq $esc ) {  # Encountered an escape character
		if( $s ne "" ) {
		    $s =~ s/(.)(.*)/$2/;
		    $c = $1;
		    ($self->{COUNT})++;
		
		    my $newchar = fromEscapeChar("$esc$c", $esc_map);
		    $tokens[$curtoken]->append($newchar);  # Add escaped char to the key
		}
		else {
		    # Error?
		}
	    }
	    elsif( $c eq '"' ) {
		$state = 1;       # Go look for the other quote.
	    }
	    elsif( $c =~ /\s/ ) {
		$state = 0;       # Done with this token
	    }
	    else {
		$tokens[$curtoken]->append($c);
	    }
	}  # if( $state
    } # while($s ne "") 

    if( $state == 1 ) {
	print STDERR "End of line reached while looking for a quote.\n";
	return(0);
    }

    return(1, \@tokens);
}


#####################################################################
# Public Methods                                                    #
#####################################################################
sub getEscapeChar() {
    my $self = shift;   
    return( $self->{ESCAPE_CHAR} );
}

sub getEscapeMapping() {
    my $self = shift;   
    return( $self->{ESCAPE_MAPPING} );
}

sub getDelimiters() {
    my $self = shift;   
    return( $self->{DELIM_CHARS} );
}

sub getCount() {
    my $self = shift;   
    return( $self->{COUNT} );
}

# Mutator functions
sub setEscapeChar($) {
    my $self = shift;
    my $esc = shift;

    # Only set if it is just one character
    if( length($esc) == 1 ) {
	$self->{ESCAPE_CHAR} = $esc;
    }
}

sub setEscapeMapping($) {
    my $self = shift;
    $self->{ESCAPE_MAPPING} = shift;
}

sub setDelimiters($) {
    my $self = shift;
    $self->{DELIM_CHARS} = shift;
}

sub resetCount($) {
    my $self = shift;
    $self->{COUNT} = 0;
}

#####################################################################
# Methods not intended for use by user                              #
#####################################################################
    
sub fromEscapeChar($$) {
    my ($c, $mapref) = @_;

    my $e;

    my %map = %$mapref;

    foreach $e (keys(%map)) {
	if( $e eq $c ) {
	    return($map{$e});
	}
    }

    return("");
}

sub isDelim($$) {
    my ($c, $delims) = @_;

    my $d;

    foreach $d (@$delims) {

	if( $d eq $c ) {
	    return(1);
	}
    }

    return(0);
}

#####################################################################
# Token.pm
# 
# A basic class that represents a token.  Information includes a string 
# and a number where the string was started.
#####################################################################

package VMware::Token;
use strict;

sub new($$) {
    my $proto = shift;
    my $class = ref($proto) || $proto;

    my $self = {};
    my ($tok, $pos) = @_;

    $self->{TOK} = $tok;
    $self->{POS} = $pos;

    bless($self);
    return($self);
}

sub token {
    my $self = shift;
    if( @_ ) { $self->{TOK} = shift; }
    return($self->{TOK});
}

sub append($) {
    my $self = shift;
    $self->{TOK} .= shift;
}   

sub pos() {
    my $self = shift;
    if( @_ ) { $self->{POS} = shift; }
    return($self->{POS});
}

sub toString() {
    my $self = shift;
    return("'" . $self->token() . "'" . "(" . $self->pos() . ")");
}

1;
