battis / data-utilities Goto Github PK

View Code? Open in Web Editor NEW

1.0 4.0 1.0 764 KB

A handful of useful helper functions to process data

Home Page: http://battis.github.io/data-utilities/

License: GNU General Public License v3.0

PHP 100.00%

data-utilities's Introduction

DataUtilities

A handful of useful helper functions to process data

data-utilities's People

Contributors

Stargazers

Watchers

Forkers

hanleybrand

data-utilities's Issues

superscripts for 1st, 2nd, 3rd

data-utilities/src/DataUtilities.php

Line 78 in 0907e80

// TODO superscripts for 1st, 2nd, 3rd

        /* add a space after each piece of punctuation */
        $title = preg_replace('/([^a-z0-9])\s*/i', '$1 ', strtolower($title));

        // TODO smart em- and en-dashes
        // TODO superscripts for 1st, 2nd, 3rd

        /* Split the string into separate words */
        $words = preg_split('/[' . implode('', $spaceEquivalents) . ']+/', $title, -1, PREG_SPLIT_NO_EMPTY);

dry out this code somehow

data-utilities/src/Text.php

Line 210 in 74d4262

return "{$singular}ES"; // TODO dry out this code somehow

            case "S":
            case "X":
            case "Z":
                return "{$singular}ES"; // TODO dry out this code somehow
            case "y":
                if (self::isVowel(substr($singular, -2, 1))) {
                    return "{$singular}s";
                }
                return substr($singular, 0, strlen($singular) - 1) . "ies";
            case "Y":
                if (self::isVowel(substr($singular, -2, 1))) {
                    return "{$singular}S";
                }
                return substr($singular, 0, strlen($singular) - 1) . "IES";
            default:
                switch (substr($singular, -2)) {

Noisy about missing CONTEXT_PREFIX and CONTEXT_DOCUMENT_ROOT

Probably shouldn't be all loud in the PHP error log about these server variables when they're not present.

document pluralize

@param string $singular

@return string

data-utilities/src/Text.php

Line 153 in ea3c553

* TODO document pluralize

        return strtolower($snake_case);
    }

    /**
     * TODO document pluralize
     * @param string $singular
     * @return string
     */
    public static function pluralize(string $singular): string
    {
        switch (substr($singular, -1)) {

does DIRECTORY_SEPARATOR need to be escaped in regex?

https://api.github.com/battis/data-utilities/blob/a5d7dd650f609523c713628191dafd5e2bef1400/src/PHPUnit/FixturePath.php#L7

use ReflectionClass;

// TODO does DIRECTORY_SEPARATOR need to be escaped in regex?
trait FixturePath
{
    private ?string $_fixturePath = null;

smart em- and en-dashes

data-utilities/src/DataUtilities.php

Line 77 in 0907e80

// TODO smart em- and en-dashes

        /* add a space after each piece of punctuation */
        $title = preg_replace('/([^a-z0-9])\s*/i', '$1 ', strtolower($title));

        // TODO smart em- and en-dashes
        // TODO superscripts for 1st, 2nd, 3rd

        /* Split the string into separate words */
        $words = preg_split('/[' . implode('', $spaceEquivalents) . ']+/', $title, -1, PREG_SPLIT_NO_EMPTY);

More forgiving CSV parsing

Allow blank lines, maybe even incorrect line breaks. Per this issue.

smart em- and en-dashes

data-utilities/src/Text.php

Line 57 in f3ac7e7

// TODO smart em- and en-dashes

<?php

namespace Battis\DataUtilities;

use Battis\Hydratable\Hydrate;

class Text
{
    /**
     * Converts $title to Title Case, and returns the result
     *
     * @param string $title
     * @param array $params An associative array of additional special cases,
     *                      e.g. (with any subset of the keys below)
     *                      ```
     * [
     *     'lowerCaseWords' => ['foo', 'bar'],
     *     'allCapsWords' => ['BAZ'],
     *     'camelCaseWords' => ['foobarbaz' => 'fooBarBaz'],
     *     'spaceEquivalents' => ['\t']
     * ]
     * ```
     *
     * @return string
     *
     * @see http://www.sitepoint.com/title-case-in-php/ SitePoint
     **/
    public static function titleCase($title, $params = [])
    {
        $hydrate = new Hydrate();
        /*
         * Our array of 'small words' which shouldn't be capitalized if they
         * aren't the first word.  Add your own words to taste.
         */
        $lowerCaseWords = $hydrate($params['lowerCaseWords'],[
            'of','a','the','and','an','or','nor','but','is','if','then','else',
            'when','at','from','by','on','off','for','in','out','over','to',
            'into','with'
        ]);

        $allCapsWords = $hydrate($params['allCapsWords'], [
            'i', 'ii', 'iii', 'iv', 'v', 'vi', 'sis', 'csv', 'php', 'html',
            'lti'
        ]);

        $camelCaseWords = $hydrate($params['camelCaseWords'], [
            'github' => 'GitHub'
        ]);

        $spaceEquivalents = $hydrate($params['spaceEquivalents'], [
            '\s', '_'
        ]);

        /* add a space after each piece of punctuation */
        $title = preg_replace('/([^a-z0-9])\s*/i', '$1 ', strtolower($title));

        // TODO smart em- and en-dashes
        // TODO superscripts for 1st, 2nd, 3rd

        /* Split the string into separate words */
        $words = preg_split('/[' . implode('', $spaceEquivalents) . ']+/', $title, -1, PREG_SPLIT_NO_EMPTY);

        foreach ($words as $key => $word) {
            if (in_array($word, $allCapsWords)) {
                $words[$key] = strtoupper($word);
            } elseif (array_key_exists($word, $camelCaseWords)) {
                $words[$key] = $camelCaseWords[$word];
            } elseif ($key == 0 or !in_array($word, $lowerCaseWords)) {
                $words[$key] = ucwords($word);
            }
        }

        /* Join the words back into a string */
        $newtitle = implode(' ', $words);

        return $newtitle;
    }

    /**
     * What portion of string `$a` and `$b` overlaps?
     *
     * For example if `$a` is `'abcdefg`` and `$b` is `'fgjkli'`, the overlapping
     * portion would be `'fg'`.
     *
     * @param string $a
     * @param string $b
     * @param boolean $swap Attempt to swap `$a` and `$b` to find overlap. (default: `true`)
     * @return string Overlapping portion of `$a` and `$b`, `''` if no overlap
     */
    public static function overlap($a, $b, $swap = true)
    {
        if (!is_string($a) || !is_string($b)) {
            return '';
        }

        for ($i = 0; $i < strlen($a); $i++) {
            $overlap = true;
            for ($j = 0; $j < strlen($b) && $i + $j < strlen($a); $j++) {
                if ($a[$i+$j] !== $b[$j]) {
                    $overlap = false;
                    break;
                }
            }
            if ($overlap) {
                return substr($a, $i, $j);
            }
        }
        if ($swap) {
            return static::overlap($b, $a, false);
        }
        return '';
    }

    public static function snake_case_to_PascalCase(string $snake_case): string
    {
        return join(
            array_map(
                fn($token) => strtoupper(substr($token, 0, 1)) .
                    substr($token, 1),
                explode("_", $snake_case)
            )
        );
    }

    public static function camelCase_to_snake_case(string $camelCase): string
    {
        $snake_case = $camelCase;
        foreach (
            [
                "/([^0-9])([0-9])/", // separate numeric phrases
                "/([A-Z])([A-Z][a-z])/", // separate trailing word from acronym
                "/([^A-Z])([A-Z])/", // separate end of word from trailing word,
                "/([^_])_+([^_])/", // minimize underscores
            ]
            as $regexp
        ) {
            $snake_case = preg_replace($regexp, "$1_$2", $snake_case);
        }
        return strtolower($snake_case);
    }

    public static function pluralize(string $singular): string
    {
        switch (substr($singular, -1)) {
            case "s":
            case "x":
            case "z":
                return "{$singular}es";
            case "S":
            case "X":
            case "Z":
                return "{$singular}ES";
            case "y":
                return substr($singular, 0, strlen($singular) - 1) . "ies";
            case "Y":
                return substr($singular, 0, strlen($singular) - 1) . "IES";
            default:
                switch (substr($singular, -2)) {
                    case "sh":
                    case "Sh":
                    case "ch":
                    case "Ch":
                        return "{$singular}es";
                    case "SH":
                    case "sH":
                    case "CH":
                    case "cH":
                        return "{$singular}ES";
                    default:
                        if (
                            substr($singular, -1) ===
                            strtolower(substr($singular, -1))
                        ) {
                            return "{$singular}s";
                        } else {
                            return "{$singular}S";
                        }
                }
        }
    }
}

document camelCase_to_snake_case

@param string $camelCase

@return string

data-utilities/src/Text.php

Line 131 in ea3c553

* TODO document camelCase_to_snake_case

        );
    }

    /**
     * TODO document camelCase_to_snake_case
     * @param string $camelCase
     * @return string
     */
    public static function camelCase_to_snake_case(string $camelCase): string
    {
        $snake_case = $camelCase;

document snake_case_to_PascalCase

@param string $snake_case

@return string

data-utilities/src/Text.php

Line 115 in ea3c553

* TODO document snake_case_to_PascalCase

        return '';
    }

    /**
     * TODO document snake_case_to_PascalCase
     * @param string $snake_case
     * @return string
     */
    public static function snake_case_to_PascalCase(string $snake_case): string
    {
        return join(

superscripts for 1st, 2nd, 3rd

"/([A-Z])([A-Z][a-z])/", // separate trailing word from acronym

"/([^A-Z])([A-Z])/", // separate end of word from trailing word,

"/([^_])_+([^_])/", // minimize underscores

data-utilities/src/Text.php

Line 58 in f3ac7e7

// TODO superscripts for 1st, 2nd, 3rd

<?php

namespace Battis\DataUtilities;

use Battis\Hydratable\Hydrate;

class Text
{
    /**
     * Converts $title to Title Case, and returns the result
     *
     * @param string $title
     * @param array $params An associative array of additional special cases,
     *                      e.g. (with any subset of the keys below)
     *                      ```
     * [
     *     'lowerCaseWords' => ['foo', 'bar'],
     *     'allCapsWords' => ['BAZ'],
     *     'camelCaseWords' => ['foobarbaz' => 'fooBarBaz'],
     *     'spaceEquivalents' => ['\t']
     * ]
     * ```
     *
     * @return string
     *
     * @see http://www.sitepoint.com/title-case-in-php/ SitePoint
     **/
    public static function titleCase($title, $params = [])
    {
        $hydrate = new Hydrate();
        /*
         * Our array of 'small words' which shouldn't be capitalized if they
         * aren't the first word.  Add your own words to taste.
         */
        $lowerCaseWords = $hydrate($params['lowerCaseWords'],[
            'of','a','the','and','an','or','nor','but','is','if','then','else',
            'when','at','from','by','on','off','for','in','out','over','to',
            'into','with'
        ]);

        $allCapsWords = $hydrate($params['allCapsWords'], [
            'i', 'ii', 'iii', 'iv', 'v', 'vi', 'sis', 'csv', 'php', 'html',
            'lti'
        ]);

        $camelCaseWords = $hydrate($params['camelCaseWords'], [
            'github' => 'GitHub'
        ]);

        $spaceEquivalents = $hydrate($params['spaceEquivalents'], [
            '\s', '_'
        ]);

        /* add a space after each piece of punctuation */
        $title = preg_replace('/([^a-z0-9])\s*/i', '$1 ', strtolower($title));

        // TODO smart em- and en-dashes
        // TODO superscripts for 1st, 2nd, 3rd

        /* Split the string into separate words */
        $words = preg_split('/[' . implode('', $spaceEquivalents) . ']+/', $title, -1, PREG_SPLIT_NO_EMPTY);

        foreach ($words as $key => $word) {
            if (in_array($word, $allCapsWords)) {
                $words[$key] = strtoupper($word);
            } elseif (array_key_exists($word, $camelCaseWords)) {
                $words[$key] = $camelCaseWords[$word];
            } elseif ($key == 0 or !in_array($word, $lowerCaseWords)) {
                $words[$key] = ucwords($word);
            }
        }

        /* Join the words back into a string */
        $newtitle = implode(' ', $words);

        return $newtitle;
    }

    /**
     * What portion of string `$a` and `$b` overlaps?
     *
     * For example if `$a` is `'abcdefg`` and `$b` is `'fgjkli'`, the overlapping
     * portion would be `'fg'`.
     *
     * @param string $a
     * @param string $b
     * @param boolean $swap Attempt to swap `$a` and `$b` to find overlap. (default: `true`)
     * @return string Overlapping portion of `$a` and `$b`, `''` if no overlap
     */
    public static function overlap($a, $b, $swap = true)
    {
        if (!is_string($a) || !is_string($b)) {
            return '';
        }

        for ($i = 0; $i < strlen($a); $i++) {
            $overlap = true;
            for ($j = 0; $j < strlen($b) && $i + $j < strlen($a); $j++) {
                if ($a[$i+$j] !== $b[$j]) {
                    $overlap = false;
                    break;
                }
            }
            if ($overlap) {
                return substr($a, $i, $j);
            }
        }
        if ($swap) {
            return static::overlap($b, $a, false);
        }
        return '';
    }

    public static function snake_case_to_PascalCase(string $snake_case): string
    {
        return join(
            array_map(
                fn($token) => strtoupper(substr($token, 0, 1)) .
                    substr($token, 1),
                explode("_", $snake_case)
            )
        );
    }

    public static function camelCase_to_snake_case(string $camelCase): string
    {
        $snake_case = $camelCase;
        foreach (
            [
                "/([^0-9])([0-9])/", // separate numeric phrases
                "/([A-Z])([A-Z][a-z])/", // separate trailing word from acronym
                "/([^A-Z])([A-Z])/", // separate end of word from trailing word,
                "/([^_])_+([^_])/", // minimize underscores
            ]
            as $regexp
        ) {
            $snake_case = preg_replace($regexp, "$1_$2", $snake_case);
        }
        return strtolower($snake_case);
    }

    public static function pluralize(string $singular): string
    {
        switch (substr($singular, -1)) {
            case "s":
            case "x":
            case "z":
                return "{$singular}es";
            case "S":
            case "X":
            case "Z":
                return "{$singular}ES";
            case "y":
                return substr($singular, 0, strlen($singular) - 1) . "ies";
            case "Y":
                return substr($singular, 0, strlen($singular) - 1) . "IES";
            default:
                switch (substr($singular, -2)) {
                    case "sh":
                    case "Sh":
                    case "ch":
                    case "Ch":
                        return "{$singular}es";
                    case "SH":
                    case "sH":
                    case "CH":
                    case "cH":
                        return "{$singular}ES";
                    default:
                        if (
                            substr($singular, -1) ===
                            strtolower(substr($singular, -1))
                        ) {
                            return "{$singular}s";
                        } else {
                            return "{$singular}S";
                        }
                }
        }
    }
}

battis / data-utilities Goto Github PK

data-utilities's Introduction

DataUtilities

data-utilities's People

Contributors

Stargazers

Watchers

Forkers

data-utilities's Issues

Recommend Projects

Recommend Topics

Recommend Org