#!/usr/bin/perl
#-------------------------------------------------------------------------------
# Deterministic finite state parser from regular expression
# Philip R Brenan at gmail dot com, Appa Apps Ltd Inc., 2018
#-------------------------------------------------------------------------------

package Data::DFA;
our $VERSION = "20180323";
require v5.16;
use warnings FATAL => qw(all);
use strict;
use Carp qw(confess);
use Data::Dump qw(dump);
use Data::Table::Text qw(:all);
use utf8;

sub Transitions{0}                                                              # Constants describing a state of the finite state automaton: [{transition label=>new state}, {jump target=>1}, final state if true]
sub Jumps{1}
sub Final{2}

sub Element   {q(element)}                                                      # Components of an expression
sub Sequence  {q(sequence)}
sub Optional  {q(optional)}
sub ZeroOrMore{q(zeroOrMore)}
sub OneOrMore {q(OneOrMore)}
sub Choice    {q(choice)}

#1 Construct regular expression                                                 # Construct a regular expression that defines the language to be parsed using the following combining operations which can all be imported:

my $counter;                                                                    # Counter used to create names for extension states
sub counter{++$counter}

sub element($)                                                                  #S One element.
 {my ($label) = @_;                                                             # Transition label
  [Element, @_]
 }

sub sequence(@)                                                                 #S Sequence of elements.
 {my (@elements) = @_;                                                          # Elements
  [Sequence, @elements]
 }

sub optional($)                                                                 #S Zero or one of an element.
 {my ($element) = @_;                                                           # Element
  [Optional, $element]
 }

sub zeroOrMore($)                                                               #S Zero or more repetitions of an element.
 {my ($element) = @_;                                                           # Element to be repeated
  [ZeroOrMore, $element]
 }

sub oneOrMore($)                                                                #S One or more repetitions of an element.
 {my ($element) = @_;                                                           # Element to be repeated
  [OneOrMore, $element]
 }

sub choice(@)                                                                   #S Choice from amongst one or more elements.
 {my (@elements) = @_;                                                          # Elements to be chosen from
  [Choice, @elements]
 }

#1 Deterministic finite state parser                                            # Create a deterministic finite state automaton to parse sequences of symbols in the language defined by a regular expression.

sub nfaFromExpr2($$)                                                            #P Create a DFA from an expression by pushing it on to the array of state transitions and connecting it up to existing states with jumps.
 {my ($states, $expr) = @_;                                                     # States, Expression to convert to a DFA
  $states       //= {};
  my $next        = sub{scalar keys %$states};                                  # Next state name
  my $last        = sub{&$next - 1};                                            # Last state created
  my $save        = sub{$states->{&$next} = [@_]};                              # Create a new transition
  my $jump        = sub                                                         # Add jumps
   {my ($from, @to) = @_;
    $states->{$from}->[Jumps]->{$_}++ for @to
   };
  my $start       = &$next;
  my ($structure) = @$expr;
  if ($structure eq Element)                                                    # Element
   {my (undef, $label, $data) = @$expr;
    &$save({$label=>$start+1}, undef);
   }
  elsif ($structure eq Sequence)                                                # Sequence of elements
   {my (undef, @elements) = @$expr;
    $states->nfaFromExpr2($_) for @elements;
   }
  elsif ($structure eq Optional)                                                # Optional element
   {my (undef, $element) = @$expr;
    $states->nfaFromExpr2($element);
    &$jump($start, &$next);                                                     # Optional so we have the option of jumping over it
   }
  elsif ($structure eq ZeroOrMore)                                              # Zero or more
   {my (undef, $element) = @$expr;
    $states->nfaFromExpr2($element);
    &$jump($start, &$next+1);                                                   # Optional so we have the option of jumping over it
    &$save(undef, {$start=>1});                                                 # Repeated so we have the option of redoing it
   }
  elsif ($structure eq OneOrMore)                                               # One or more
   {my (undef, @elements) = @$expr;
    $states->nfaFromExpr2($_) for @elements;
    my $N = &$next;
    &$jump($N, $start, $N+1);                                                   # Do it again or move on
   }
  elsif ($structure eq Choice)                                                  # Choice
   {my (undef, @elements) = @$expr;
    my @fix;
    for my $i(keys @elements)                                                   # Each element index
     {my $element = $elements[$i];                                              # Each element separate by a gap so we can not jump in then jump out
      &$jump($start, &$next) if $i;
      $states->nfaFromExpr2($element);                                          # Choice
      if ($i < $#elements)
       {push @fix, &$next;
        &$save();                                                               # Fixed later to jump over subsequent choices
       }
     }
    my $N = &$next;                                                             # Fix intermediates
    &$jump($_, $N) for @fix;
   }
  else                                                                          # Unknown request
   {confess "Unknown structuring operation: $structure";
   }
  $states
 }

sub nfaFromExpr(@)                                                              #PS Create an NFA from an expression.
 {my (@expr) = @_;                                                              # Expressions
  my $states = bless {};
  $states->nfaFromExpr2($_) for @expr;                                          # Create state transitions
  $states->{keys %$states} = [undef, undef, 1];                                 # End state
  $states
 }

sub printNfa($$;$)                                                              #P Print the current state of a NFA.
 {my ($states, $title, $print) = @_;                                            # States, title, print to STDERR if 2 or to STDOUT if 1
  my @o;
  push @o, [qw(Location  F Transitions Jumps)];
  for(sort{$a <=> $b} keys %$states)
   {my @d = @{$states->{$_}};
    my @j = sort {$a <=> $b} keys %{$d[Jumps]};
    push @o, [sprintf("%4d", $_), $d[2]//0,
              dump($d[Transitions]), dump(@j ? [@j]:undef)];
   }
  my $s = "$title\n". formatTableBasic([@o]);
  say STDERR $s if $print and $print == 2;
  say STDOUT $s if $print and $print == 1;
  nws $s
 }

sub printDfa($$;$)                                                              #P Print the current state of a DFA.
 {my ($states, $title, $print) = @_;                                            # States, title, print to STDERR if 2 or to STDOUT if 1
  my @o;
  push @o, [qw(Location  F Transitions)];
  for(sort{$a <=> $b} keys %$states)
   {my @d = @{$states->{$_}};
    push @o, [sprintf("%4d", $_), $d[2]//0, dump($d[Transitions])];
   }
  my $s = "$title\n". formatTableBasic([@o]);
  say STDERR $s if $print and $print == 2;
  say STDOUT $s if $print and $print == 1;
  nws $s
 }

sub print($$;$)                                                                 # Print the current state of the finite automaton. If it is non deterministic, the non deterministic jumps will be shown as well as the transitions table. If deterministic, only the transitions table will be shown.
 {my ($states, $title, $print) = @_;                                            # States, title, print to STDERR if 2 or to STDOUT if 1
  my $j = 0;                                                                    # Number of non deterministic jumps encountered
  for(sort{$a <=> $b} keys %$states)
   {my @d = @{$states->{$_}};
    my @j = sort keys %{$d[Jumps]};
    ++$j if @j > 0;
   }
  if ($j) {&printNfa(@_)} else {&printDfa(@_)}
 }

sub symbols($)                                                                  #P Return an array of all the transition symbols.
 {my ($states) = @_;                                                            # States
  my %s;
  for(keys %$states)
   {my @d = @{$states->{$_}};
    $s{$_}++ for keys %{$d[0]};
   }
  sort keys %s
 }

sub reachableStates($$$;$)                                                      #P Find the names of all the states that can be reached from a specified state via a specified symbol and all the jumps available.
 {my ($states, $stateName, $symbol, $targets) = @_;                             # States, name of start state, symbol, optional array reference of reachable targets so far
  my $state = $$states{$stateName};
  confess "No such state: $stateName" unless $state;
  my ($transitions, $jumps, $final) = @$state;
  $targets //= [];

  if (my $t = $$transitions{$symbol})                                           # Transition on the symbol
   {push @$targets, $t;
   }
  for my $jump(sort keys %$jumps)                                               # Make a jump and try again
   {$states->reachableStates($jump, $symbol, $targets);
   }

  $targets
 }

sub removeJumpsFromState($$)                                                    #P Remove the jumps from a state
 {my ($states, $stateName) = @_;                                                # States, name of the state to be dejumped.
  my $state = $$states{$stateName};
  return unless $state;
  my ($transitions, $jumps, $final) = @$state;
  my @symbol = $states->symbols;

  my @fix;
  for my $symbol(@symbol)
   {my $reach = $states->reachableStates($stateName, $symbol);
    if (0) {}
    elsif (@$reach == 0) {}
    elsif (@$reach == 1)
     {$$states{$stateName}->[Transitions]->{$symbol} = $$reach[0];              # Transition to single target
     }
    else
     {my $name = -&counter;                                                     # New state name
      push @fix, $name;
      $$states{$name} = [undef, {map {$_=>1} @$reach}];                         # Create new named state with a jump to each target
      $$states{$stateName}->[Transitions]->{$symbol} = $name;                   # Transition to new state
      for my $s(sort @$reach)
       {$$states{$name}->[Final] = 1 if $$states{$s}->[Final];                  # Mark the new state as final if it can reach a final state
       }
     }
   }
  $$states{$stateName}->[Jumps] = undef;                                        # Remove jumps
  $states->removeJumpsFromState($_) for sort @fix;                              # Remove jumps from newly created states
 }

sub reachableFrom($$;$)                                                         #P Find the names of all the states that can be reached from a specified state using any symbol.
 {my ($states, $stateName, $targets) = @_;                                      # States, name of start state, optional hash reference of reachable targets so far
  my $state = $$states{$stateName};
  confess "No such state: $stateName" unless $state;
  my ($transitions, $jumps, $final) = @$state;
  $targets //= {0=>1};

  for my $transition(sort keys %$transitions)
   {my $s = $$transitions{$transition};
    $states->reachableFrom($s, $targets) if !$$targets{$s}++;
   }

  $targets
 }

sub removeJumpsFromAllStates($)                                                 #P Remove the jumps from every state.
 {my ($states) = @_;                                                            # States
  $states->removeJumpsFromState($_) for sort keys %$states;
  my $r = $states->reachableFrom(0);
  for my $state(sort keys %$states)
   {delete $$states{$state} unless $$r{$state};
   }
 }

sub removeDuplicateStates($)                                                    #P Remove any states with duplicate transition sets redirecting transitions to the surviving state.
 {my ($states) = @_;                                                            # States
  my %d;                                                                        # Duplicates
  for my $state(sort keys %$states)                                             # Find states with duplicate transitions
   {my @s = @{$$states{$state}};                                                # Details of state
    push @{$d{dump($s[Final], $s[Transitions])}}, $state;                       # Finality and transitions must match for two states to be identical
   }
  for my $d(sort keys %d)                                                       # Each duplicated transition set
   {my ($s, @s) = @{$d{$d}};
    my %s = map {$_=>1} @s;
    delete $$states{$_} for @s;                                                 # Delete duplicates
    for my $state(sort keys %$states)                                           # Each state
     {my $transitions = $$states{$state}->[Transitions];                        # Transitions in state
      for my $transition(sort keys %$transitions)                               # Each  transition in the state
       {my $t = $$transitions{$transition};                                     # Target of transition
        $$transitions{$transition} = $s if $s{$t};                              # Replace references to duplicate transitions with a reference to the surviving state
       }
     }
   }
 }

sub dfaFromExpr(@)                                                              #S Create a DFA from a regular expression.
 {my (@expr) = @_;                                                              # Expression
  my $nfa = nfaFromExpr(@expr);
  $nfa->removeJumpsFromAllStates;
  $nfa->removeDuplicateStates;
  $nfa
 }

sub parser(@)                                                                   #S Create a parser from a deterministic finite state automaton constructed from a regular expression.
 {my ($dfa) = @_;                                                               # Deterministic finite state automaton generated from an expression
  my $state = 0;
  my @processed;                                                                # Parser
   (
#2 Parser methods                                                               # The following subs accept the input sequence of symbols to be validated and describe the current state of the parse. They are returned as an array of subs by L<parser|/parser> when a new parser is constructed.
    sub ($)                                                                     # Accept the next symbol drawn from the symbol set if possible by moving to a new state otherwise confessing with a helpful message
     {my ($symbol) = @_;                                                        # Next symbol to be processed by the finite state automaton
      my $transitions = $dfa->{$state}->[Transitions];
      if (my $nextState = $transitions->{$symbol})
       {$state = $nextState;
        push @processed, $symbol;
        return 0;
       }
      my @next = join ' ', sort keys %$transitions;
      confess join "\n",
       "Already processed: ". join(' ', @processed),
       "Expected one of  : ". join(' ', @next),
       "But was given    : $symbol",
       '';
      },
    sub                                                                         # Returns whether we are currently in a final state or not
     {$dfa->{$state}->[Final] ? 1 : 0;
     },
    sub                                                                         # Returns an array of symbols that would be accepted in the current state
     {my $transitions = $dfa->{$state}->[Transitions];
      sort keys %$transitions
     },
    sub                                                                         # Returns the array of symbols processed so far by this parser
     {@processed
     },
   )
 }

#-------------------------------------------------------------------------------
# Export
#-------------------------------------------------------------------------------

use Exporter qw(import);

use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

@ISA          = qw(Exporter);
@EXPORT_OK    = qw(
choice
dfaFromExpr
element
oneOrMore optional
parser
print
sequence
zeroOrMore
);
%EXPORT_TAGS  = (all=>[@EXPORT, @EXPORT_OK]);

# podDocumentation

=pod

=encoding utf-8

=head1 Name

Data::DFA - Deterministic finite state parser from regular expression

=head1 Synopsis

Create a deterministic finite state parser to recognize sequences of symbols
that match a given regular expression.

For example: to recognize sequences of symbols drawn from B<'a'..'e'> that match
the regular expression: B<a (b|c)+ d? e> proceed as follows:

# Construct a deterministic finite state automaton from the regular expression:

  use Data::DFA qw(:all);
  use Data::Table::Text qw(:all);
  use Test::More qw(no_plan);

  my $dfa = dfaFromExpr
   (element("a"),
    oneOrMore(choice(element("b"), element("c"))),
    optional(element("d")),
    element("e")
   );

# Print the symbols used and the transitions table:

  is_deeply ['a'..'e'], [$dfa->symbols];

  ok $dfa->print("Dfa for a(b|c)+d?e :") eq nws <<END;
Dfa for a(b|c)+d?e :
Location  F  Transitions
       0  0  { a => 1 }
       1  0  { b => 2, c => 2 }
       2  0  { b => 2, c => 2, d => 6, e => 7 }
       6  0  { e => 7 }
       7  1  undef
END

# Create a parser and parse a sequence of symbols with the returned sub:

  my ($parser, $end, $next, $processed) = $dfa->parser;                         # New parser

  eval { &$parser($_) } for(qw(a b a));                                         # Try to parse a b a

  say STDERR $@;                                                                # Error message
#   Already processed: a b
#   Expected one of  : b c d e
#   But was given    : a

  is_deeply [&$next],      [qw(b c d e)];                                       # Next acceptable symbol
  is_deeply [&$processed], [qw(a b)];                                           # Symbols processed
  ok !&$end;                                                                    # Not at the end

=head1 Description

The following sections describe the methods in each functional area of this
module.  For an alphabetic listing of all methods by name see L<Index|/Index>.



=head1 Construct regular expression

Construct a regular expression that defines the language to be parsed using the following combining operations which can all be imported:

=head2 element($)

One element.

     Parameter  Description
  1  $label     Transition label

Example:


  ok dfaFromExpr(element("a"))->print("Element: a") eq nws <<END;
  Element: a
  Location  F  Transitions
         0  0  { a => 1 }
         1  1  undef
  END


This is a static method and so should be invoked as:

  Data::DFA::element


=head2 sequence(@)

Sequence of elements.

     Parameter  Description
  1  @elements  Elements

Example:


  ok dfaFromExpr(sequence(element("a"), element("b")))

  ->print("Sequence: ab") eq nws <<END;
  Sequence: ab
  Location  F  Transitions
         0  0  { a => 1 }
         1  0  { b => 2 }
         2  1  undef
  END


This is a static method and so should be invoked as:

  Data::DFA::sequence


=head2 optional($)

Zero or one of an element.

     Parameter  Description
  1  $element   Element

Example:


  ok dfaFromExpr(element("a"), optional(element("b")), element("c"))

  ->print("Optional: ab?c") eq nws <<END;
  Optional: ab?c
  Location  F  Transitions
         0  0  { a => 1 }
         1  0  { b => 2, c => 3 }
         2  0  { c => 3 }
         3  1  undef
  END

  my $dfa = dfaFromExpr

  (element("a"),

  oneOrMore(choice(element("b"), element("c"))),

  optional(element("d")),

  element("e")

  );

  ok $dfa->print("Dfa for a(b|c)+d?e :") eq nws <<END;
  Dfa for a(b|c)+d?e :
  Location  F  Transitions
         0  0  { a => 1 }
         1  0  { b => 2, c => 2 }
         2  0  { b => 2, c => 2, d => 6, e => 7 }
         6  0  { e => 7 }
         7  1  undef
  END


This is a static method and so should be invoked as:

  Data::DFA::optional


=head2 zeroOrMore($)

Zero or more repetitions of an element.

     Parameter  Description
  1  $element   Element to be repeated

Example:


  ok dfaFromExpr(element("a"), zeroOrMore(element("b")), element("c"))

  ->print("Zero Or More: ab*c") eq nws <<END;
  Zero Or More: ab*c
  Location  F  Transitions
         0  0  { a => 1 }
         1  0  { b => 1, c => 4 }
         4  1  undef
  END


This is a static method and so should be invoked as:

  Data::DFA::zeroOrMore


=head2 oneOrMore($)

One or more repetitions of an element.

     Parameter  Description
  1  $element   Element to be repeated

Example:


  my $dfa = dfaFromExpr(element("a"), oneOrMore(element("b")), element("c"));

  ok $dfa->print("One or More: ab+c") eq nws <<END;
  One or More: ab+c
  Location  F  Transitions
         0  0  { a => 1 }
         1  0  { b => 2 }
         2  0  { b => 2, c => 4 }
         4  1  undef
  END


This is a static method and so should be invoked as:

  Data::DFA::oneOrMore


=head2 choice(@)

Choice from amongst one or more elements.

     Parameter  Description
  1  @elements  Elements to be chosen from

Example:


  my $dfa = dfaFromExpr(element("a"),

  choice(element("b"), element("c")),

  element("d"));

  ok $dfa->print("Choice: (a(b|c)d") eq nws <<END;
  Choice: (a(b|c)d
  Location  F  Transitions
         0  0  { a => 1 }
         1  0  { b => 2, c => 2 }
         2  0  { d => 5 }
         5  1  undef
  END


This is a static method and so should be invoked as:

  Data::DFA::choice


=head1 Deterministic finite state parser

Create a deterministic finite state automaton to parse sequences of symbols in the language defined by a regular expression.

=head2 print($$$)

Print the current state of the finite automaton. If it is non deterministic, the non deterministic jumps will be shown as well as the transitions table. If deterministic, only the transitions table will be shown.

     Parameter  Description
  1  $states    States
  2  $title     Title
  3  $print     Print to STDERR if 2 or to STDOUT if 1

Example:


  my $dfa = dfaFromExpr

  (element("a"),

  oneOrMore(choice(element("b"), element("c"))),

  optional(element("d")),

  element("e")

  );

  ok $dfa->print("Dfa for a(b|c)+d?e :") eq nws <<END;
  Dfa for a(b|c)+d?e :
  Location  F  Transitions
         0  0  { a => 1 }
         1  0  { b => 2, c => 2 }
         2  0  { b => 2, c => 2, d => 6, e => 7 }
         6  0  { e => 7 }
         7  1  undef
  END


=head2 dfaFromExpr(@)

Create a DFA from a regular expression.

     Parameter  Description
  1  @expr      Expression

Example:


  my $dfa = dfaFromExpr

  (element("a"),

  oneOrMore(choice(element("b"), element("c"))),

  optional(element("d")),

  element("e")

  );

  ok $dfa->print("Dfa for a(b|c)+d?e :") eq nws <<END;
  Dfa for a(b|c)+d?e :
  Location  F  Transitions
         0  0  { a => 1 }
         1  0  { b => 2, c => 2 }
         2  0  { b => 2, c => 2, d => 6, e => 7 }
         6  0  { e => 7 }
         7  1  undef
  END


This is a static method and so should be invoked as:

  Data::DFA::dfaFromExpr


=head2 parser(@)

Create a parser from a deterministic finite state automaton constructed from a regular expression.

     Parameter  Description
  1  $dfa       Deterministic finite state automaton generated from an expression

Example:


  my $dfa = dfaFromExpr

  (element("a"),

  oneOrMore(choice(element("b"), element("c"))),

  optional(element("d")),

  element("e")

  );

  ok $dfa->print("Dfa for a(b|c)+d?e :") eq nws <<END;
  Dfa for a(b|c)+d?e :
  Location  F  Transitions
         0  0  { a => 1 }
         1  0  { b => 2, c => 2 }
         2  0  { b => 2, c => 2, d => 6, e => 7 }
         6  0  { e => 7 }
         7  1  undef
  END

  my ($parser, $end) = $dfa->parser;

  &$parser($_) for qw(a b);

  ok !&$end;

  my ($parser, $end) = $dfa->parser;

  &$parser($_) for qw(a b b c e);

  ok &$end;

  my ($parser, $end, $next, $processed) = $dfa->parser;

  eval{&$parser($_)} for(qw(a b a));

  ok !index(nws($@), nws <<END);
  Already processed: a b
  Expected one of  : b c d e
  But was given    : a
  END

  is_deeply [&$next],      [qw(b c d e)];

  is_deeply [&$processed], [qw(a b)];

  is_deeply [&$processed], [qw(a b)];


This is a static method and so should be invoked as:

  Data::DFA::parser


=head2 Parser methods

The following subs accept the input sequence of symbols to be validated and describe the current state of the parse. They are returned by L<parser|/parser> when a new parser is constructed.

=head3 ($)

Accept the next symbol drawn from the symbol set if possible by moving to a new state otherwise confessing with a helpful message

     Parameter  Description
  1  $symbol    Next symbol to be processed by the finite state automaton

=head3 ()

Returns whether we are currently in a final state or not


=head3 ()

Returns an array of symbols that would be accepted in the current state


=head3 ()

Returns the array of symbols processed so far by this parser



=head1 Private Methods

=head2 nfaFromExpr2($$)

Create a DFA from an expression by pushing it on to the array of state transitions and connecting it up to existing states with jumps.

     Parameter  Description
  1  $states    States
  2  $expr      Expression to convert to a DFA

=head2 nfaFromExpr(@)

Create an NFA from an expression.

     Parameter  Description
  1  @expr      Expressions

This is a static method and so should be invoked as:

  Data::DFA::nfaFromExpr


=head2 printNfa($$$)

Print the current state of a NFA.

     Parameter  Description
  1  $states    States
  2  $title     Title
  3  $print     Print to STDERR if 2 or to STDOUT if 1

=head2 printDfa($$$)

Print the current state of a DFA.

     Parameter  Description
  1  $states    States
  2  $title     Title
  3  $print     Print to STDERR if 2 or to STDOUT if 1

=head2 symbols($)

Return an array of all the transition symbols.

     Parameter  Description
  1  $states    States

Example:


  my $dfa = dfaFromExpr

  (element("a"),

  oneOrMore(choice(element("b"), element("c"))),

  optional(element("d")),

  element("e")

  );

  ok $dfa->print("Dfa for a(b|c)+d?e :") eq nws <<END;
  Dfa for a(b|c)+d?e :
  Location  F  Transitions
         0  0  { a => 1 }
         1  0  { b => 2, c => 2 }
         2  0  { b => 2, c => 2, d => 6, e => 7 }
         6  0  { e => 7 }
         7  1  undef
  END

  is_deeply ['a'..'e'], [$dfa->symbols];


=head2 reachableStates($$$$)

Find the names of all the states that can be reached from a specified state via a specified symbol and all the jumps available.

     Parameter   Description
  1  $states     States
  2  $stateName  Name of start state
  3  $symbol     Symbol
  4  $targets    Optional array reference of reachable targets so far

=head2 removeJumpsFromState($$)

Remove the jumps from a state

     Parameter   Description
  1  $states     States
  2  $stateName  Name of the state to be dejumped.

=head2 reachableFrom($$$)

Find the names of all the states that can be reached from a specified state using any symbol.

     Parameter   Description
  1  $states     States
  2  $stateName  Name of start state
  3  $targets    Optional hash reference of reachable targets so far

=head2 removeJumpsFromAllStates($)

Remove the jumps from every state.

     Parameter  Description
  1  $states    States

=head2 removeDuplicateStates($)

Remove any states with duplicate transition sets redirecting transitions to the surviving state.

     Parameter  Description
  1  $states    States


=head1 Index


1 L<|/>

2 L<choice|/choice>

3 L<dfaFromExpr|/dfaFromExpr>

4 L<element|/element>

5 L<nfaFromExpr|/nfaFromExpr>

6 L<nfaFromExpr2|/nfaFromExpr2>

7 L<oneOrMore|/oneOrMore>

8 L<optional|/optional>

9 L<parser|/parser>

10 L<print|/print>

11 L<printDfa|/printDfa>

12 L<printNfa|/printNfa>

13 L<reachableFrom|/reachableFrom>

14 L<reachableStates|/reachableStates>

15 L<removeDuplicateStates|/removeDuplicateStates>

16 L<removeJumpsFromAllStates|/removeJumpsFromAllStates>

17 L<removeJumpsFromState|/removeJumpsFromState>

18 L<sequence|/sequence>

19 L<symbols|/symbols>

20 L<zeroOrMore|/zeroOrMore>

=head1 Installation

This module is written in 100% Pure Perl and, thus, it is easy to read, use,
modify and install.

Standard L<Module::Build> process for building and installing modules:

  perl Build.PL
  ./Build
  ./Build test
  ./Build install

=head1 Author

L<philiprbrenan@gmail.com|mailto:philiprbrenan@gmail.com>

L<http://www.appaapps.com|http://www.appaapps.com>

=head1 Copyright

Copyright (c) 2016-2018 Philip R Brenan.

This module is free software. It may be used, redistributed and/or modified
under the same terms as Perl itself.

=cut



# Tests and documentation

sub test
 {my $p = __PACKAGE__;
  binmode($_, ":utf8") for *STDOUT, *STDERR;
  return if eval "eof(${p}::DATA)";
  my $s = eval "join('', <${p}::DATA>)";
  $@ and die $@;
  eval $s;
  $@ and die $@;
 }

test unless caller;

1;
# podDocumentation
__DATA__
use warnings FATAL=>qw(all);
use strict;
use Test::More tests=>25;

ok dfaFromExpr(element("a"))->print("Element: a") eq nws <<END;                 #Telement
Element: a
Location  F  Transitions
       0  0  { a => 1 }
       1  1  undef
END

ok dfaFromExpr(sequence(element("a"), element("b")))                            #Tsequence
 ->print("Sequence: ab") eq nws <<END;                                          #Tsequence
Sequence: ab
Location  F  Transitions
       0  0  { a => 1 }
       1  0  { b => 2 }
       2  1  undef
END

ok dfaFromExpr(element("a"), optional(element("b")), element("c"))              #Toptional
  ->print("Optional: ab?c") eq nws <<END;                                       #Toptional
Optional: ab?c
Location  F  Transitions
       0  0  { a => 1 }
       1  0  { b => 2, c => 3 }
       2  0  { c => 3 }
       3  1  undef
END

ok dfaFromExpr(element("a"), zeroOrMore(element("b")), element("c"))            #TzeroOrMore
  ->print("Zero Or More: ab*c") eq nws <<END;                                   #TzeroOrMore
Zero Or More: ab*c
Location  F  Transitions
       0  0  { a => 1 }
       1  0  { b => 1, c => 4 }
       4  1  undef
END

if (1)
 {my $dfa = dfaFromExpr(element("a"), oneOrMore(element("b")), element("c"));   #ToneOrMore
  ok $dfa->print("One or More: ab+c") eq nws <<END;                             #ToneOrMore
One or More: ab+c
Location  F  Transitions
       0  0  { a => 1 }
       1  0  { b => 2 }
       2  0  { b => 2, c => 4 }
       4  1  undef
END

  is_deeply [4], $dfa->reachableStates(2, "c");
  is_deeply [2], $dfa->reachableStates(2, "b");
 }

if (1)
 {my $dfa = dfaFromExpr(element("a"),                                           #Tchoice
                        choice(element("b"), element("c")),                     #Tchoice
                        element("d"));                                          #Tchoice
  ok $dfa->print("Choice: (a(b|c)d") eq nws <<END;                              #Tchoice
Choice: (a(b|c)d
Location  F  Transitions
       0  0  { a => 1 }
       1  0  { b => 2, c => 2 }
       2  0  { d => 5 }
       5  1  undef
END
  is_deeply [2], $dfa->reachableStates(1, "c");
  is_deeply [2], $dfa->reachableStates(1, "b");
  is_deeply ['a'..'d'], [$dfa->symbols];
 }

if (1)
 {my $nfa = nfaFromExpr(element("a"), zeroOrMore(choice(element("a"), element("a"))), element("a"));
  ok $nfa->print("aChoice: (a(a|a)*a") eq nws <<END;
aChoice: (a(a|a)*a
Location  F  Transitions  Jumps
       0  0  { a => 1 }   undef
       1  0  { a => 2 }   [3, 5]
       2  0  undef        [4]
       3  0  { a => 4 }   undef
       4  0  undef        [1]
       5  0  { a => 6 }   undef
       6  1  undef        undef
END
  is_deeply [2,4,6], $nfa->reachableStates(1, "a");
  $nfa->removeJumpsFromState(1);
  ok $nfa->print("Jumps removed 1") eq nws <<END;
Jumps removed 1
Location  F  Transitions  Jumps
      -2  1  { a => -2 }  undef
      -1  1  { a => -2 }  undef
       0  0  { a => 1 }   undef
       1  0  { a => -1 }  undef
       2  0  undef        [4]
       3  0  { a => 4 }   undef
       4  0  undef        [1]
       5  0  { a => 6 }   undef
       6  1  undef        undef
END
  $nfa->removeJumpsFromState(-1);
  ok $nfa->print("Jumps removed 2") eq nws <<END;
Jumps removed 2
Location  F  Transitions  Jumps
      -2  1  { a => -2 }  undef
      -1  1  { a => -2 }  undef
       0  0  { a => 1 }   undef
       1  0  { a => -1 }  undef
       2  0  undef        [4]
       3  0  { a => 4 }   undef
       4  0  undef        [1]
       5  0  { a => 6 }   undef
       6  1  undef        undef
END
  $nfa->removeJumpsFromState(4);
  ok $nfa->print("Jumps removed 4") eq nws <<END;
Jumps removed 4
Location  F  Transitions  Jumps
      -2  1  { a => -2 }  undef
      -1  1  { a => -2 }  undef
       0  0  { a => 1 }   undef
       1  0  { a => -1 }  undef
       2  0  undef        [4]
       3  0  { a => 4 }   undef
       4  0  { a => -1 }  undef
       5  0  { a => 6 }   undef
       6  1  undef        undef
END
 }

if (1)
 {my $dfa = dfaFromExpr(element("a"),
            zeroOrMore(choice(element("a"), element("a"))),
            element("a"));
  ok $dfa->print("Dfa for a(a|a)*a :") eq nws <<END;
Dfa for a(a|a)*a :
Location  F  Transitions
      -3  1  { a => -3 }
       0  0  { a => 1 }
       1  0  { a => -3 }
END
 }

if (1)
 {my $dfa = dfaFromExpr                                                         #TdfaFromExpr #Toptional #Tprint #Tsymbols  #Tparser
   (element("a"),                                                               #TdfaFromExpr #Toptional #Tprint #Tsymbols  #Tparser
    oneOrMore(choice(element("b"), element("c"))),                              #TdfaFromExpr #Toptional #Tprint #Tsymbols  #Tparser
    optional(element("d")),                                                     #TdfaFromExpr #Toptional #Tprint #Tsymbols  #Tparser
    element("e")                                                                #TdfaFromExpr #Toptional #Tprint #Tsymbols  #Tparser
   );                                                                           #TdfaFromExpr #Toptional #Tprint #Tsymbols  #Tparser

  ok $dfa->print("Dfa for a(b|c)+d?e :") eq nws <<END;                          #TdfaFromExpr #Toptional #Tprint #Tsymbols  #Tparser
Dfa for a(b|c)+d?e :
Location  F  Transitions
       0  0  { a => 1 }
       1  0  { b => 2, c => 2 }
       2  0  { b => 2, c => 2, d => 6, e => 7 }
       6  0  { e => 7 }
       7  1  undef
END

  is_deeply ['a'..'e'], [$dfa->symbols];                                        #Tsymbols

  if (1)                                                                        # Incomplete parse
   {my ($parser, $end) = $dfa->parser;                                          #Tparser
    &$parser($_) for qw(a b);                                                   #Tparser
    ok !&$end;                                                                  #Tparser
   }

  if (1)                                                                        # Completed parse
   {my ($parser, $end) = $dfa->parser;                                          #Tparser
    &$parser($_) for qw(a b b c e);                                             #Tparser
    ok &$end;                                                                   #Tparser
   }

  if (1)                                                                        # Parse progress message
   {my ($parser, $end, $next, $processed) = $dfa->parser;                       #Tparser
    eval{&$parser($_)} for(qw(a b a));                                          #Tparser
    ok !index(nws($@), nws <<END);                                              #Tparser
Already processed: a b
Expected one of  : b c d e
But was given    : a
END

    is_deeply [&$next],      [qw(b c d e)];                                     #Tparser
    is_deeply [&$processed], [qw(a b)];                                         #Tparser
    is_deeply [&$processed], [qw(a b)];                                         #Tparser
   }
 }
