<?php
/**
 * @author    Oliver Schieche <lispian@schieche.email>
 * @copyright 2018 Oliver Schieche
 */
namespace Ghoti\Tools\Lispian\Lexical;

use Ghoti\Tools\Lispian\Exception\Lexer\InvalidCharacterException;
use Ghoti\Tools\Lispian\Exception\Lexer\UnterminatedRegexException;
use Ghoti\Tools\Lispian\Exception\Lexer\UnterminatedStringException;
use Ghoti\Tools\Lispian\Parser\LexerInterface;
use Ghoti\Tools\Lispian\Parser\ParserDriver;

use function array_key_exists, in_array, preg_match, strlen, strtoupper, substr;

/**
 * Class Lexer
 * @package Ghoti\Tools\Lispian\Lexical
 */
class Lexer implements LexerInterface
{
    /** @var string[] */
    const SYMBOLS = ['(', ')', '.'];
    /** @var string */
    const PATTERN_CONSTANT = '~\G(?:true|false|null)\b~';
    /** @var string */
    const PATTERN_IDENTIFIER = '~\G[a-z_][a-z\d_]*~i';
    /** @var string */
    const PATTERN_NUMBER = '~\G-?\d+[.]\d+~';
    /** @var string */
    const PATTERN_OPERATORS_ARITHMETIC = '@\G[~*/+-]@';
    /** @var string */
    const PATTERN_OPERATORS_ARY = '~\G(?:eq|ne|i[ns]|lte?|gte?)~';
    /** @var string */
    const PATTERN_OPERATORS_BITWISE = '~\G(?:<<|>>)~';
    /** @var string */
    const PATTERN_OPERATORS_BOOL = '~\G(?:and|or)~';
    /** @var string */
    const PATTERN_OPERATORS_UNARY = '~\G(?:not)~';
    /** @var string */
    const PATTERN_POST_INCREMENT = '~\G(?:inc|dec)\b~';
    /** @var string */
    const PATTERN_PRE_INCREMENT = '~\G(?:[+]{2}|--)~';
    /** @var string */
    const PATTERN_REGEX_START = '@\Gm(?<delimiter>[~{/:(])@';
    /** @var string */
    const PATTERN_RESERVED_WORD = '~\G(?:break|defun|do|if|for|let|nil|while)\b~';
    /** @var string */
    const PATTERN_INTEGER = '~\G-?\d+~';
    /** @var string */
    const PATTERN_VARIABLE = '~\G[$][a-z_][a-z\d_]*~i';
    /** @var string */
    const PATTERN_WHITESPACE = '~\G\s+~';

    /** @var string[] */
    const REGEX_CLOSING_DELIMITERS = [
        '~' => '~',
        '/' => '/',
        ':' => ':',
        '{' => '}',
        '(' => ')'
    ];

    /** @var int */
    protected $column;
    /** @var int */
    protected $line;
    /** @var int */
    protected $offset;
    /** @var string */
    protected $source;

    /**
     * Lexer constructor.
     * @param string $source
     */
    public function __construct(string $source)
    {
        $this->column = $this->line = $this->offset = 0;
        $this->setSource($source);
    }

    /**
     * @return int
     */
    public function getColumn(): int
    {
        return $this->column + 1;
    }

    /**
     * @return int
     */
    public function getLine(): int
    {
        return $this->line + 1;
    }

    /**
     * @param ParserDriver $parser
     * @return array
     * @throws InvalidCharacterException
     * @throws UnterminatedStringException
     * @throws UnterminatedRegexException
     */
    public function lex(ParserDriver $parser): array
    {
        $source = $this->getSource();
        $firstChar = null;

        while (true) {
            $this->checkPattern(static::PATTERN_WHITESPACE, $whiteSpace, true);

            if ($this->offset >= strlen($source)) {
                return ['', null];
            }

            $firstChar = $source[$this->offset];

            if ('#' === $firstChar) {
                preg_match('~\G#[^\r\n]*~', $this->getSource(), $match, 0, $this->offset);
                $this->advanceCursor($match[0]);
                continue;
            }

            break;
        }

        if (in_array($firstChar, static::SYMBOLS, true)) {
            $this->advanceCursor('(');
            return [$firstChar, $firstChar];
        }

        if ('"' === $firstChar && $this->checkString($token)) {
            return ['T_STRING', $token];
        }

        if ('$' === $firstChar && $this->checkPattern(static::PATTERN_VARIABLE, $token)) {
            return ['T_VARIABLE', substr($token, 1)];
        }

        if ('-' === $firstChar && '>' === ($this->source[$this->offset + 1] ?? null)) {
            $this->advanceCursor('->');
            return ['->', '->'];
        }

        if ($this->checkPattern(static::PATTERN_OPERATORS_ARY, $token)) {
            return [$token, $token];
        }

        if ($this->checkPattern(static::PATTERN_OPERATORS_BITWISE, $token)) {
            return [$token, $token];
        }

        if ($this->checkPattern(static::PATTERN_OPERATORS_BOOL, $token)) {
            return [$token, $token];
        }

        if ($this->checkPattern(static::PATTERN_OPERATORS_UNARY, $token)) {
            return [$token, $token];
        }

        if ($this->checkPattern(static::PATTERN_CONSTANT, $token)) {
            return ['T_CONSTANT', $token];
        }

        if ($this->checkPattern(static::PATTERN_POST_INCREMENT, $token)) {
            return ['T_POST_INCREMENT', $token];
        }

        if ($this->checkPattern(static::PATTERN_PRE_INCREMENT, $token)) {
            return ['T_PRE_INCREMENT', $token];
        }

        if ($this->checkPattern(static::PATTERN_RESERVED_WORD, $token)) {
            $tokenName = strtoupper($token);
            $tokenName = "T_$tokenName";
            return [$tokenName, $token];
        }

        if ($this->checkRegexStart($token)) {
            return ['T_REGEX', $token];
        }

        if ($this->checkPattern(static::PATTERN_IDENTIFIER, $token)) {
            return ['T_IDENTIFIER', $token];
        }

        if ($this->checkPattern(static::PATTERN_NUMBER, $token)) {
            return ['T_NUMBER', (float) $token];
        }

        if ($this->checkPattern(static::PATTERN_INTEGER, $token)) {
            return ['T_INTEGER', (int) $token];
        }

        if ('*' === $firstChar && '*' === ($source[$this->offset + 1] ?? null)) {
            $this->advanceCursor('**');
            return ['**', '**'];
        }

        if ($this->checkPattern(static::PATTERN_OPERATORS_ARITHMETIC, $token)) {
            return [$token, $token];
        }

        throw new InvalidCharacterException($this, $this->getSourceOffset());
    }

    /**
     * @return string
     */
    public function getSource(): string
    {
        return $this->source;
    }

    /**
     * @return string
     */
    public function getSourceOffset(): string
    {
        return substr($this->source, $this->offset);
    }

    /**
     * @param string $source
     * @return Lexer
     */
    public function setSource(string $source): Lexer
    {
        $this->source = $source;
        return $this;
    }

    /**
     * @param string $token
     * @param bool $scanWhitespace
     * @return Lexer
     */
    protected function advanceCursor(string $token, bool $scanWhitespace = false): self
    {
        if (!$scanWhitespace) {
            $length = strlen($token);
            $this->offset += $length;
            $this->column += $length;
            return $this;
        }

        $whiteLen = strlen($token);
        $this->offset += $whiteLen;

        for ($p = 0; $p < $whiteLen; ++$p) {
            if ("\n" !== $token[$p]) {
                ++$this->column;
            } else {
                ++$this->line;
                $this->column = 0;
            }
        }

        return $this;
    }

    /**
     * @param string $pattern
     * @param string $result
     * @param bool $scanWhitespace
     * @return bool
     */
    protected function checkPattern(string $pattern, string &$result = null, bool $scanWhitespace = false): bool
    {
        if (!preg_match($pattern, $this->getSource(), $match, 0, $this->offset)) {
            return false;
        }

        list($result) = $match;
        $this->advanceCursor($result, $scanWhitespace);
        return true;
    }

    /**
     * @param string $token
     * @return bool
     * @throws UnterminatedStringException
     */
    protected function checkString(string &$token = null): bool
    {
        static $escapes = [
            'e' => "\e",
            'f' => "\f",
            'n' => "\n",
            'r' => "\r",
            't' => "\t",
            'v' => "\v",
        ];

        $source = $this->getSourceOffset();
        $delimiter = $source[0];

        $token = '';

        for ($p = 1, $length = strlen($source); $p < $length; $p++) {
            if ('\\' === $source[$p]) {
                ++$p;
                if (array_key_exists($source[$p], $escapes)) {
                    $token .= $escapes[$source[$p]];
                } else {
                    $token .= $source[$p];
                }
                continue;
            }

            if ($delimiter === $source[$p]) {
                $this->advanceCursor(substr($source, 0, $p + 1), true);
                return true;
            }

            $token .= $source[$p];
        }

        throw new UnterminatedStringException($this, $source);
    }

    /**
     * @param mixed|null $token
     * @return bool
     * @throws InvalidCharacterException
     * @throws UnterminatedRegexException
     */
    protected function checkRegexStart(&$token = null): bool
    {
        $source = $this->getSource();

        if (!preg_match(static::PATTERN_REGEX_START, $source, $match, 0, $this->offset)) {
            return false;
        }

        $delimiter = $match['delimiter'];
        $expect = static::REGEX_CLOSING_DELIMITERS[$delimiter] ?? null;

        if (null === $expect) {
            throw new InvalidCharacterException($this, $this->getSourceOffset());
        }

        for ($p = 1, $length = strlen($source); $p < $length; $p++) {
            if ('\\' === $source[$p]) {
                ++$p;
            } elseif ($expect === $source[$p]) {
                $token = [
                    'del' => $delimiter,
                    'rx' => substr($source, $this->offset + 1, $p - $this->offset)
                ];
                ++$p;

                if (preg_match('~\G(?<flags>[imsxU]+)~', $source, $match, 0, $p)) {
                    $p += strlen($match['flags']);
                    $token['fl'] = $match['flags'];
                }

                $this->advanceCursor(substr($source, $this->offset, $p - $this->offset), true);
                return true;
            }
        }

        throw new UnterminatedRegexException($this, $source);
    }
}
