From d6118894a0de9623cc5fe7772c0256c87e3453d7 Mon Sep 17 00:00:00 2001 From: igor Date: Tue, 2 Jun 2026 22:24:35 +0200 Subject: [PATCH] added wildcard into --skip switch --- AGENTS.md | 5 ++ README.md | 22 ++++++-- src/SFTPsync.php | 140 +++++++++++++++++++++++++++++++++++++---------- 3 files changed, 132 insertions(+), 35 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index f79b36f..028d5eb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -18,6 +18,11 @@ - `2` argument/usage error - Missing `--host`, `--user`, `--password` should still support interactive prompt mode. - `--skip` and `--skip-delete` matching semantics should remain stable. + - Rules without wildcard characters (`*`, `?`) use legacy exact matching. + - Exact rules without slash match any path segment; exact rules with slash match a relative subpath. + - Rules containing `*` or `?` are glob patterns matched against normalized relative paths. + - Glob matching should prefer native `fnmatch()` and keep a regex fallback for platforms where it is unavailable. + - Skip patterns should be prepared once, not recompiled for every file. - `--no-print-skip` must suppress only `SKIP` log lines, without changing skip decisions or summary counters. - `--delete-dir` safety guard against dangerous paths (`/`, empty path, dot paths) must remain intact. diff --git a/README.md b/README.md index af168b2..7f17a90 100644 --- a/README.md +++ b/README.md @@ -48,8 +48,8 @@ php src/SFTPsync.php --host --user --password [--port < - `--port `: optional, default `22` - `--print-relative`: show paths relative to action root in logs - `--no-print-skip`: suppress `SKIP` status lines during execution -- `--skip `: repeatable, applied to `--sync` and `--sync-down` -- `--skip-delete `: repeatable, applied to `--delete` and `--delete-dir` +- `--skip `: repeatable, exact names/paths or glob patterns (`*`, `?`), applied to `--sync` and `--sync-down` +- `--skip-delete `: repeatable, exact names/paths or glob patterns (`*`, `?`), applied to `--delete` and `--delete-dir` - `-h`, `--help`: show help ## Examples @@ -69,7 +69,7 @@ php src/SFTPsync.php --host example.com --user u --password p \ # Skip selected entries during sync php src/SFTPsync.php --host example.com --user u --password p \ - --skip .git --skip node_modules --skip cache/tmp \ + --skip .git --skip node_modules --skip "*.log" --skip "cache/*" \ --sync ./app /srv/app # Delete remote directory but keep selected subpaths @@ -90,10 +90,22 @@ After upload/download, mtime is propagated to the target when possible. ## Skip Rule Matching -- Rule without slash (example: `node_modules`) matches any path segment with that name. -- Rule with slash (example: `cache/tmp`) matches that subpath within a relative path. +- Rule without wildcard characters (example: `node_modules`) keeps exact matching. +- Exact rule without slash (example: `node_modules`) matches any path segment with that name. +- Exact rule with slash (example: `cache/tmp`) matches that subpath within a relative path. +- Rule containing `*` or `?` is treated as a glob pattern. `*` matches any characters, and `?` matches one character. +- Glob rule without slash (example: `*.log`) can match file or directory names at any depth. +- Glob rule with slash (example: `src/temp/*.log` or `cache/*`) is matched against relative paths. - Rules are normalized to forward slashes. +Examples: + +- `--skip=*.bat` skips `test.bat` and `tools/deploy.bat`, but not `test.bat.txt`. +- `--skip=*.log` skips `app.log` and `src/temp/test.log`, but not `app.log.1`. +- `--skip=backup-*` skips `backup-2025`, `backup-old`, and `backup-test`. +- `--skip=cache/*` skips content under `cache`. +- `--skip=node_modules` and `--skip=.git` keep the original exact-name behavior. + ## Safety Notes - `--delete-dir` refuses dangerous roots such as empty path, `/`, `.`, `..`, and similar dot paths. diff --git a/src/SFTPsync.php b/src/SFTPsync.php index 2d0653a..2a230c5 100644 --- a/src/SFTPsync.php +++ b/src/SFTPsync.php @@ -150,7 +150,7 @@ final class SftpAdapter } /** - * @return array{help:bool,host:string,user:string,password:string,port:int,print_relative:bool,no_print_skip:bool,skip:list,skip_delete:list,actions:list>} + * @return array{help:bool,host:string,user:string,password:string,port:int,print_relative:bool,no_print_skip:bool,skip:list>,skip_delete:list>,actions:list>} */ function parseArguments(array $argv): array { @@ -210,13 +210,13 @@ function parseArguments(array $argv): array break; case '--skip': - $skipValue = readCliValue($argv, ++$i, '--skip '); - $parsed['skip'][] = normalizeSkipRule($skipValue); + $skipValue = readCliValue($argv, ++$i, '--skip '); + $parsed['skip'][] = prepareSkipRule($skipValue); break; case '--skip-delete': - $skipDeleteValue = readCliValue($argv, ++$i, '--skip-delete '); - $parsed['skip_delete'][] = normalizeSkipRule($skipDeleteValue); + $skipDeleteValue = readCliValue($argv, ++$i, '--skip-delete '); + $parsed['skip_delete'][] = prepareSkipRule($skipDeleteValue); break; case '--sync': @@ -312,8 +312,8 @@ Options: --port Optional, default 22 --print-relative Show logged paths relative to action local/remote roots --no-print-skip Do not print SKIP logs during synchronization - --skip Repeatable, skip matching names/paths in --sync and --sync-down - --skip-delete Repeatable, skip matching names/paths in --delete and --delete-dir + --skip Repeatable, skip exact names/paths or glob patterns (*, ?) in --sync and --sync-down + --skip-delete Repeatable, skip exact names/paths or glob patterns (*, ?) in --delete and --delete-dir -h, --help Show this help Examples: @@ -321,7 +321,7 @@ Examples: php SFTPsync.php --host example.com --user u --password p --sync-down /var/backups ./backups php SFTPsync.php --host example.com --user u --password p --sync ./a /remote/a --delete /remote/a/old.zip --sync-down /remote/logs ./logs php SFTPsync.php --host example.com --user u --password p --delete-dir /tmp/testdir - php SFTPsync.php --host example.com --user u --password p --skip .git --skip node_modules --sync ./app /srv/app --skip-delete .well-known --delete-dir /srv/app + php SFTPsync.php --host example.com --user u --password p --skip .git --skip node_modules --skip "*.log" --sync ./app /srv/app --skip-delete .well-known --delete-dir /srv/app php SFTPsync.php --host example.com --user u --password p --print-relative --sync ./local /var/www Exit codes: @@ -813,33 +813,35 @@ function normalizeSkipRule(string $rule): string return $rule; } +/** + * Skip rules without wildcard characters keep the legacy exact matching: + * a plain name matches any path segment, and a path matches that relative subpath. + * Rules containing "*" or "?" are glob patterns matched against normalized relative paths. + * + * @return array{pattern:string,is_glob:bool,has_slash:bool,regex:?string} + */ +function prepareSkipRule(string $rule): array +{ + $pattern = normalizeSkipRule($rule); + $isGlob = str_contains($pattern, '*') || str_contains($pattern, '?'); + + return [ + 'pattern' => $pattern, + 'is_glob' => $isGlob, + 'has_slash' => str_contains($pattern, '/'), + 'regex' => $isGlob && !function_exists('fnmatch') ? globPatternToRegex($pattern) : null, + ]; +} + function isPathSkipped(string $path, array $skipRules): bool { if ($skipRules === []) { return false; } - $normalizedPath = trim(str_replace('\\', '/', $path), '/'); - if ($normalizedPath === '') { - return false; - } - - $segments = explode('/', $normalizedPath); - $pathWithGuards = '/' . $normalizedPath . '/'; foreach ($skipRules as $rule) { - $normalizedRule = trim(str_replace('\\', '/', (string)$rule), '/'); - if ($normalizedRule === '') { - continue; - } - if (!str_contains($normalizedRule, '/')) { - if (in_array($normalizedRule, $segments, true)) { - return true; - } - continue; - } - - $ruleWithGuards = '/' . trim($normalizedRule, '/') . '/'; - if (str_contains($pathWithGuards, $ruleWithGuards)) { + $preparedRule = is_array($rule) ? $rule : prepareSkipRule((string)$rule); + if (matchesSkipPattern($path, $preparedRule)) { return true; } } @@ -847,6 +849,84 @@ function isPathSkipped(string $path, array $skipRules): bool return false; } +/** + * @param array{pattern:string,is_glob:bool,has_slash:bool,regex:?string} $rule + */ +function matchesSkipPattern(string $path, array $rule): bool +{ + $normalizedPath = trim(str_replace('\\', '/', $path), '/'); + if ($normalizedPath === '') { + return false; + } + + $pattern = $rule['pattern']; + $segments = explode('/', $normalizedPath); + + if (!$rule['is_glob']) { + if (!$rule['has_slash']) { + return in_array($pattern, $segments, true); + } + + return str_contains('/' . $normalizedPath . '/', '/' . $pattern . '/'); + } + + if (!$rule['has_slash']) { + foreach ($segments as $segment) { + if (globMatches($pattern, $segment, $rule)) { + return true; + } + } + + return false; + } + + foreach ($segments as $index => $_segment) { + $relativeSuffix = implode('/', array_slice($segments, $index)); + if (globMatches($pattern, $relativeSuffix, $rule)) { + return true; + } + } + + return false; +} + +/** + * Supports shell-style "*" (any characters) and "?" (one character). + * + * @param array{pattern:string,is_glob:bool,has_slash:bool,regex:?string} $rule + */ +function globMatches(string $pattern, string $candidate, array $rule): bool +{ + if (function_exists('fnmatch')) { + return fnmatch($pattern, $candidate); + } + + $regex = $rule['regex'] ?? globPatternToRegex($pattern); + + return preg_match($regex, $candidate) === 1; +} + +function globPatternToRegex(string $pattern): string +{ + $regex = ''; + $length = strlen($pattern); + + for ($i = 0; $i < $length; $i++) { + $char = $pattern[$i]; + if ($char === '*') { + $regex .= '.*'; + continue; + } + if ($char === '?') { + $regex .= '.'; + continue; + } + $regex .= preg_quote($char, '#'); + } + + return '#^' . $regex . '$#'; +} + function formatLocalLogPath(string $path, string $basePath, bool $printRelative): string { $normalizedPath = normalizeLocalPath($path); @@ -1121,8 +1201,8 @@ function main(array $argv): int } /** - * @param array{help:bool,host:string,user:string,password:string,port:int,print_relative:bool,no_print_skip:bool,skip:list,skip_delete:list,actions:list>} $config - * @return array{help:bool,host:string,user:string,password:string,port:int,print_relative:bool,no_print_skip:bool,skip:list,skip_delete:list,actions:list>} + * @param array{help:bool,host:string,user:string,password:string,port:int,print_relative:bool,no_print_skip:bool,skip:list>,skip_delete:list>,actions:list>} $config + * @return array{help:bool,host:string,user:string,password:string,port:int,print_relative:bool,no_print_skip:bool,skip:list>,skip_delete:list>,actions:list>} */ function askForMissingRequiredOptions(array $config): array {