#!/usr/bin/env php
<?php

/*
 * This is one big nasty file, not designed to really be maintained,
 * just to speed up the process of building the models
 * for each request.
 *
 * It is disgusting.
 *
 * It was a lot faster than going through each page in the docs.
 */

use Symfony\Component\DomCrawler\Crawler;

define('GEN_ROOT', __DIR__);
define('GEN_OUTPUT', __DIR__.'/../src/XeroPHP/Models');

require GEN_ROOT.'/../vendor/autoload.php';
require GEN_ROOT.'/objects/ParsedObjectInterface.php';
require GEN_ROOT.'/objects/API.php';
require GEN_ROOT.'/objects/Model.php';
require GEN_ROOT.'/objects/Property.php';
require GEN_ROOT.'/objects/Enum.php';

$documentation_base = 'http://developer.xero.com/documentation';
$scrape_apis = [
    [
        'name' => 'Accounting API',
        'uri' => 'api',
        'types' => 'types',
        'namespace' => 'Accounting',
        'api_stem_constant' => 'API_CORE',
        'model_uris' => [
            //'attachments',
            'accounts',
            //'bankstatements',
            'banktransactions',
            'bank-transfers',
            'branding-themes',
            'contacts',
            'contactgroups',
            'credit-notes',
            'currencies',
            'employees',
            'expense-claims',
            'invoices',
            'items',
            'journals',
            'linked-transactions',
            'manual-journals',
            'organisation',
            'overpayments',
            'payments',
            'prepayments',
            'purchase-orders',
            'quotes',
            'receipts',
            'repeating-invoices',
            //'reports', - this is in a way different format
            'tax-rates',
            'tracking-categories',
            'users'
        ]
    ],
    [
        'name' => 'Payroll - AU',
        'uri' => 'payroll-api',
        'types' => 'types-and-codes',
        'namespace' => 'PayrollAU',
        'api_stem_constant' => 'API_PAYROLL',
        'model_uris' => [
            'employees',
            'leaveapplications',
            'payitems',
            'payrollcalendars',
            'payruns',
            'payslip',
            'settings',
            'superfunds',
            'superfundproducts',
            'timesheets'
        ]
    ],
    [
        'name' => 'Payroll - US',
        'uri' => 'payroll-api-us',
        'types' => 'types-codes',
        'namespace' => 'PayrollUS',
        'api_stem_constant' => 'API_PAYROLL',
        'model_uris' => [
            'employees',
            'pay-items',
            'pay-runs',
            'pay-schedules',
            'pay-stubs',
            'settings',
            'timesheets',
            'work-locations'
        ]
    ],
    [
        'name' => 'Files API',
        'uri' => 'files-api',
        'types' => 'types',
        'namespace' => 'Files',
        'api_stem_constant' => 'API_FILE',
        'model_uris' => [
            'files',
            'folders',
            'associations'
        ]
    ],
    [
        'name' => 'Assets API',
        'uri' => 'assets-api',
        'types' => 'types',
        'namespace' => 'Assets',
        'api_stem_constant' => 'API_ASSET',
        'model_uris' => [
            'asset',
            'asset-types',
            'settings'
        ]
    ]

];



$apis = [];

$client = new Goutte\Client();

foreach ($scrape_apis as $scrape_api) {
    $api = new API($scrape_api['name'], $scrape_api['namespace'], $scrape_api['api_stem_constant']);

    //Get the types
    $full_uri = sprintf('%s/%s/%s/', $documentation_base, $scrape_api['uri'], $scrape_api['types']);
    $crawler = $client->request('GET', $full_uri);

    $crawler->filter('.entry-content')->children()->each(function(Crawler $node) use($api) {
        //keep static in closure.
        /** @var Enum|Model $current_object */
        static $section_name, $current_object;

        $current_tag = $node->getNode(0)->tagName;

        //don't start till there's a section
        if (!isset($section_name) && !in_array($current_tag, ['h3', 'h4'])) {
            return false;
        }
        switch ($current_tag) {
            case 'h3':
            case 'h4':
                $section_name = str_replace(' ', '', $node->text());
                break;
            case 'p':
                //Skip empty sections
                if ('' == $subsection_name = preg_replace('/\([\s\w]+\)/', '', str_replace(' ', '', $node->text()))) {
                    return null;
                }
                //Take the first line only.
                list($subsection_name) = explode("\n", $subsection_name);

                //Try record the anchor on page for searching
                $anchor = null;
                $node->filter('a')->each(function(Crawler $node) use(&$anchor) {
                    $anchor = $node->attr('name');
                });

                //Fix for doc update for BC.
                //todo - reassess if these cases should belong in 'TaxRates'` class
                //from 2.x (maybe Xero will have a machine-readable spec by then)
                if (strpos($anchor, 'TaxTypes') !== false) {
                    $section_name = $anchor;
                }

                if ($section_name === $subsection_name) {
                    $current_object = new Model();
                    $current_object->setName($section_name);
                } else {
                    $current_object = new Enum($section_name, $subsection_name, $anchor, $node->text());
                }

                break;
            case 'table':
                $possible_name_override = null;
                $node->filter('tr')->each(function(Crawler $node, $row_index) use($current_object, &$possible_name_override) {

                    //Why is this table different to every other one in the docs?
                    $swap_name_description = $current_object->getName() == 'SystemAccounts';
                    $skip_first_row = $current_object->getName() == 'SystemAccounts';

                    if ($skip_first_row && 0 == $row_index) {
                        return false;
                    }

                    $children = $node->children();
                    $has_description = count($children) > 1;

                    $name = $children->eq(0)->text();
                    $description =  $has_description ? $children->eq(1)->text() : null;

                    if ($swap_name_description) {
                        list($name, $description) = [$description, $name];
                    }
                    if ($current_object instanceof Model) {
                        //if there are commas in the name, it needs splitting.  eg. AddressLine 1,2,3,4
                        if (false !== strpos($name, ',')) {
                            list($name, $suffixes) = explode(' ', $name);
                            foreach (explode(',', $suffixes) as $suffix) {
                                $current_object->addProperty(new Property($name.$suffix, $description));
                            }
                        } else {
                            //this is the normal case, where there's only one property
                            $current_object->addProperty(new Property($name, $description));
                        }

                    } elseif ($current_object instanceof Enum) {

                        //Override the enum name if it is bold only in the first row (and it's a 2 column table)
                        if ($row_index === 0 &&
                            $children->count() === 2 &&
                            $children->eq(1)->filter('strong')->count() !== 0
                        ) {
                            $possible_name_override = $children->eq(0)->text();
                        } elseif ($row_index === 1 && $children->eq(1)->filter('strong')->count() === 0 && $possible_name_override !== null) {
                            $current_object->setRawName($possible_name_override);
                            $current_object->removeValue($possible_name_override);
                        }

                        if ($children->eq(0)->filter('em')->count()) {
                            //Skip if the first child has an em in it
                        } else {
                            $current_object->addValue($name, $description);
                        }
                    }

                    return null;
                });

                //Add here
                if ($current_object instanceof Model) {
                    $api->addModel($current_object);
                    //debug
                    /** @var Model $current_object */
                    $current_object->printPropertyTable();
                } elseif ($current_object instanceof Enum) {
                    $api->addEnum($current_object);
                }

                break;
        }

        return null;
    });

    //Parse actual classes
    foreach ($scrape_api['model_uris'] as $uri) {

        $full_uri = sprintf('%s/%s/%s/', $documentation_base, $scrape_api['uri'], $uri);
        $crawler = $client->request('GET', $full_uri);

        /** @var Model $current_model */
        $current_model = null;

        if($last = $crawler->filter('.entry-content')->count() === 0){
            continue;
        }

        $last = $crawler->filter('.entry-content')->children()->last();

        $crawler->filter('.entry-content')->children()->each(function (Crawler $node) use ($api, &$current_model, $crawler, $last) {
            //keep static in closure.
            /** @var Model $primary_model */
            static $page_heading, $section_name, $primary_model;

            $current_tag = $node->getNode(0)->tagName;

            switch ($current_tag) {
                case 'h1':
                    $h1_parts = explode("\n", $node->text());
                    $page_heading = trim($h1_parts[0]);
                    break;
                case 'h3':
                case 'h4':
                    if (false === stripos($node->text(), 'optional')) {
                        $section_name = str_replace("\xc2\xa0", ' ', $node->text()); //remove &nbsp;
                    }
                    break;
                case 'p':
                    $node->filter('strong')->each(function(Crawler $node) use (&$section_name) {
                        if ($node->parents()->getNode(0)->tagName != 'em') {
                            $section_name = $node->text();
                        }
                    });

                    //filtering for properties that have special PUT/DELETE URI
                    if (preg_match('/PUT\s\/?(?<primary_model>[a-z]+)\/{?[a-z0-9\-]+}?\/(?<secondary_model>[a-z]+)/i', $node->text(), $matches)) {
                        /**
                         * @var Model $primary_model
                         */
                        if ($matches['primary_model'] === $primary_model->getName() &&
                            $primary_model->hasProperty($matches['secondary_model'])
                        ) {
                            $primary_model->getProperty($matches['secondary_model'])->save_directly = true;
                        }
                    }

                    break;
                case 'table':
                    //If no section, we're in the overview table.
                    if ($current_model === null) {
                        //The primary model
                        $primary_model = new Model();
                        $current_model = $primary_model;

                        $node->filter('tr')->each(function(Crawler $node) use($primary_model) {
                            $columns = $node->children();
                            if (count($columns) == 0) {
                                return;
                            }
                            switch (strtolower($columns->eq(0)->text())) {
                                case 'url':
                                    $primary_model->setUrl($columns->eq(1)->text());
                                    break;
                                case 'methods supported':
                                    $primary_model->setMethods($columns->eq(1)->text());
                                    break;
                            }
                        });
                        return;
                    }

                    if (strpos($section_name, 'Example') === 0) {
                        return;
                    }

                    //try to work out what's next
                    if (preg_match('/(xml )?elements( returned)? for( adding)?( an| get| a)? (?<model_name>[\w\s]+)/i', $section_name, $matches)) {

                        //too messy to add to the above regex - juse override model name.  This will pick off any lower case words preceding the actual name.
                        if (preg_match('/^[a-z\s]+(?<model_name>[A-Z][\w\s]+)/', $matches['model_name'], $uc_words_matches)) {
                            $matches = $uc_words_matches;
                        }

                        $exploded_name = preg_split('/\b(and|or)\b/', $matches['model_name']);
                        $model_name = str_replace(' ', '', ucwords($exploded_name[0]));

                        $current_model->rawHTML = $crawler->filter('body')->text();

                        //if it hasn't been set, set it.
                        if ($primary_model->getName() === null) {
                            $primary_model_name = str_replace(' ', '', $page_heading);
                            $primary_model->setName($primary_model_name);
                            $api->addModel($primary_model);
                        } elseif ($current_model->getName() !== $model_name) {
                            //If its not the same, we've reached a new one
                            //create the sub one and add its parent
                            $current_model = new Model();
                            $current_model->setName($model_name);
                            $api->addModel($current_model);

                            $current_model->setParentModel($primary_model);

                            if (isset($exploded_name[1])) {
                                //there are two models with the same format.
                                //only allow for sub models
                                $api->addModelAlias($current_model, str_replace(' ', '', ucwords($exploded_name[1])));
                            }
                        }
                    }

                    $finished_scraping = preg_match('/^GET (?<model_name>[\w\s]+)/i', $section_name, $matches);

                    //At this point, we will have a $current_model, so parse properties
                    $node->filter('tr')->each(function(Crawler $node) use ($current_model, $primary_model, $finished_scraping) {
                        static $mandatory, $read_only, $get_only, $skip_next;

                        //this is for skipping rowspans
                        if (isset($skip_next) && $skip_next) {
                            $skip_next = false;
                            return;
                        }

                        $children = $node->children();
                        $num_children = count($children);

                        //Breaks in the table with colspans
                        if ($num_children === 1) {
                            $get_only = false;
                            //best that can be done really..  
                            if (preg_match('/at least (one|two)/i', $children->eq(0)->text())) {
                                $mandatory = false;
                                $read_only = false;
                            } elseif (preg_match('/^Either/i', $children->eq(0)->text())) {
                                $mandatory = false;
                                $read_only = false;
                            } elseif (preg_match('/(required|mandatory)/i', $children->eq(0)->text())) {
                                $mandatory = true;
                                $read_only = false;
                            }

                            if (preg_match('/(optional|recommended)/i', $children->eq(0)->text())) {
                                $mandatory = false;
                                $read_only = false;
                            } elseif (preg_match('/(updatable)/i', $children->eq(0)->text())) {
                                $read_only = false;
                                $mandatory = false;
                            } elseif (preg_match('/(only )?returned on (a )?GET requests?( only)?\.?$/i', $children->eq(0)->text())) {
                                $read_only = true;
                                $mandatory = false;
                                $get_only = true;
                            }
                            if (preg_match('/(PUT|POST)/i', $children->eq(0)->text())) {
                                $read_only = false;
                            }
                        } else {
                            if (!isset($mandatory)) {
                                $mandatory = false;
                            }

                            if (!isset($read_only)) {
                                $read_only = false;
                            }

                            if (!isset($get_only)) {
                                $get_only = false;
                            }

                            //Handle tables with rowspans
                            if (2 === $num_children) {
                                $column_name = trim($children->eq(0)->text(), "<> \n");
                                $column_description = trim($children->eq(1)->text());
                            } elseif (0 === $num_children) {
                                return;
                            } else {
                                //at least 3 children in here
                                $skip_next = true;
                                return;
                            }

                            //@todo Here should handle making these methods available on the models
                            if (preg_match('/^(?<special_function>where|order|sort|filter|page|offset|pagesize|modified( after)?|record filter|include ?archived)$/i',
                                    $column_name, $matches) || $finished_scraping ||
                                strpos($column_name, '–') !== false
                            ) {
                                if (isset($matches['special_function'])) {
                                    switch ($matches['special_function']) {
                                        case 'page':
                                            $primary_model->setIsPagable(true);
                                            break;
                                    }
                                }
                                return;
                            }

                            $property = null;
                            foreach (preg_split('/(>\s*or\s*<|\s&\s)/', $column_name) as $column_name) {
                                //make it into another param
                                $column_name = str_replace(' ', '', $column_name);
                                $tmp_ro = $read_only;
                                // Don't make Primary GUID readonly
                                if ($column_name === $current_model->getClassName() . 'ID') {
                                    $read_only = false;
                                } elseif (stripos($column_description, 'cannot be set') !== false) {
                                    $read_only = true;
                                } elseif (stripos($column_description, 'has an attachment') !== false) {
                                    $read_only = true;
                                }
                                $property = new Property($column_name, $column_description, $mandatory, $read_only);
                                $current_model->addProperty($property, null, $get_only);
                                $read_only = $tmp_ro;
                            }

                            //add links to property (for parsing types)
                            $children->eq(1)->filter('a')->each(function(Crawler $node) use($property) {
                                $property->addLink($node->text(), $node->attr('href'));
                            });

                        }
                    });

                unset($section_name);
                break;
                //END HANDLING FOR TABLE ELEMENT
            }

            // Do some processing once we are at the last node.
            if ($node->getNode(0)->nodeValue === $last->getNode(0)->nodeValue && $primary_model instanceof ParsedObjectInterface) {
                $models = [$primary_model];
                if  ($primary_model->getName() !== $current_model->getName()) {
                    $models[] = $current_model;
                }
                foreach ($models as $model) {
                    /** @var Model $model */
                    // If we don't have a GUID make a last ditch effort to get one...
                    if ($model->hasProperty($model->getClassName() . 'ID') === false && isset($model->rawHTML)) {
                        if (preg_match('/' . $model->getClassName() . 'ID/', $model->rawHTML)) {
                            $property = new Property($model->getClassName() . 'ID',
                                'Xero identifier',
                                false,
                                false,
                                false
                            );
                            $model->addProperty($property, 0);
                        }
                    }

                    //Debug
                    $model->printPropertyTable();
                }

                if (strpos($primary_model->rawHTML, 'returned as PDF') !== false) {
                    //Assume that it does support it.
                    $primary_model->setSupportsPDF(true);
                }
            }

        });
    }
    $apis[] = $api;
}

$loader = new Twig_Loader_Filesystem('generator/templates/');
$twig = new Twig_Environment($loader, []);
$twig->addFilter(new Twig_SimpleFilter('wordwrap', 'wordwrap'));
$twig->addFilter(new Twig_SimpleFilter('addslashes', 'addslashes'));

//print_r($apis[0]->getSearchKeys());
//exit;

foreach ($apis as $api) {
    /* @var API $api */
    foreach ($api->getModels() as $model) {

        $model_class = sprintf('\\XeroPHP\\Models\\%s', $model->getClassName(true));
        if (class_exists($model_class) && method_exists($model_class, 'getProperties')) {
            $property_position = 0;
            /** @var XeroPHP\Remote\ObjectInterface $model_class */
            foreach ($model_class::getProperties() as $name => $property_meta) {
                //At this point, it's been removed from the API (and not deprecated).  Thanks Xero!
                if (!$model->hasProperty($name)) {
                    //Make it and deprecate it.
                    $property = new Property($name, 'This property has been removed from the Xero API', $property_meta[0], false, true);
                    $model->addProperty($property, $property_position);
                }
                $property_position++;
            }
        }

        $dir = sprintf('%s/%s', GEN_OUTPUT, strtr($model->getNamespace(), ['\\' => '/']));
        if (!is_dir($dir)) {
            mkdir($dir);
        }

        $template = $twig->render('model.twig', [
            'model' => $model,
        ]);
        $filename = sprintf('%s/%s.php', $dir, $model->getClassName());
        file_put_contents($filename, $template);
    }

    foreach ($api->getStrayEnums() as $class_name => $enums) {
        $dir = sprintf('%s/%s', GEN_OUTPUT, strtr($api->getNamespace(), ['\\' => '/']));

        $template = $twig->render('enum.twig', [
            'class_name' => $class_name,
            'enums' => $enums,
            'api' => $api
        ]);
        $filename = sprintf('%s/%s.php', $dir, $class_name);
        file_put_contents($filename, $template);

    }
}
