1 #!/usr/bin/perl 2 # author: seth 3 # e-mail: for e-mail-address see http://www.wg-karlsruhe.de/seth/email_address.php 4 # description: renames files or directories using regexps 5 # thx to m. hoermann for utf8 support and some ideas of additional features 6 # 7 # tab-size: 2 8 9 use Cwd; 10 use strict; 11 use warnings; 12 13 sub syntaxCheck{ 14 my @params = @_; 15 my @path_splitted = split(/[\/\\]/, reverse($0)); 16 my $prg_name = reverse($path_splitted[0]); 17 my $version = '1.23.20100130'; 18 my $syntax = 'syntax: '.$prg_name.' findRE replaceRE [options]'; 19 my $usage = 'renames (recursively) files using regexps 20 21 '.$syntax.' 22 23 findRE files to rename given by a regexp 24 replaceRE how to rename, i.e., s/findRE/replaceRE/ in perl syntax 25 -c, --capitalize Capitalize Every Word 26 -d, --directories rename files and directories (default: files only) 27 -D, --Directories rename directories only (default: files only) 28 -m, --mtime use $y, $mon, $d in replaceRE to insert modification 29 time 30 -p, --path while searching use full paths of files in findRE 31 (default: base filenames only) 32 for the sake of intuition this options automatically 33 sets .*? as prefix and (?=[^\/]*\z) as suffix of 34 findRE 35 --disable-auto disables automatisms, i.e., 36 1. automatically generated prefixes and suffixes 37 in findRE, when using --path, and 38 2. escaping of unescaped slashes in findRE, filesRE, 39 and replaceRE by backslashes 40 -r, --recursive for searching subdirectories recursively 41 -t, --test don\'t change anything, just print possible changes to 42 screen 43 -u, --utf8 enable unicode support for input, output, and renaming 44 -y --tr use tr/findRE/replaceRE/ instead of s/findRE/replaceRE/ 45 -F --filesRE=s restrict replacements to files given by regexp string s 46 (default s=".", i.e., all files) 47 -V, --version display version and exit. 48 --examples show some examples of using this tool 49 50 force overwriting: 51 -p, --predictive=x first look for possible overwriting or not... 52 x=0: rename, but skip files, which would be 53 overwritten, but shouldn\'t (depends on --force). 54 x=1: don\'t even start renaming, if files would be 55 overwritten, that shouldn\'t (depends on --force). 56 (default) 57 -f, --force same as --force=1 58 -f, --force=x grade of forcing renaming 59 x=0: don\'t overwrite any files (default) 60 x=1: overwrite existing (but not already renamed) 61 files 62 x=2: overwrite even renamed files 63 regexp-modifiers: 64 -e, --emodifier set e-modifier in RE, i.e., s/findRE/replaceRE/e 65 (maybe you should consider using --disable-auto if you 66 use slashes in replaceRE) 67 (don\'t combine this with parameter --tr) 68 -g, --global set g-modifier, i.e., rename as many times as possible, 69 i.e., s/findRE/replaceRE/g 70 (don\'t combine this with parameter --tr) 71 -i, --ignorecase set i-modifier, i.e., ignore case in findRE, i.e., 72 s/findRE/replaceRE/i 73 (don\'t combine this with parameter --tr) 74 --tr_c use c-modifier (only if --tr is used), i.e., 75 complement findRE. 76 --tr_d use d-modifier (only if --tr is used), i.e., 77 delete found but unreplaced characters. 78 --tr_s use s-modifier (only if --tr is used), i.e., 79 squash duplicate replaced characters. 80 verbosity: 81 -q, --silent same as --verbose=0 82 -v, --verbose same as --verbose=1 (default) 83 -vv,--very-verbose same as --verbose=2 84 -v, --verbose=x grade of verbosity 85 x=0: no output 86 x=1: default output 87 x=2: much output 88 89 examples: '.$prg_name.' "ASD" "asd" 90 replaces _first_ occurrence of "ASD" by "asd" in all files, e.g. 91 fooASDASD.txt -> fooasdASD (use -g for replacing all occurrences)'."\n".' 92 '.$prg_name.' --examples 93 shows more examples.'."\n"; 94 my $examples =' 95 '.$syntax.' 96 97 examples: '.$prg_name.' 98 displays help'."\n".' 99 '.$prg_name.' "ASD" "asd" 100 replaces _first_ occurrence of "ASD" by "asd" in all files, e.g. 101 fooASDASD.txt -> fooasdASD'."\n".' 102 '.$prg_name.' "ASD" "asd" -g 103 replaces all occurrence of "ASD" by "asd" in all files, e.g. 104 fooASDASD.txt -> fooasdasd'."\n".' 105 '.$prg_name.' "(.)" "\u$1" 106 sets _first_ character to upper case (use -g for all)'."\n".' 107 '.$prg_name.' ".*cd(\d+)/([^/]+)$" "$1$2" --path -r 108 uses full path, i.e., renames ...cd01/title.ogg to 109 ...cd01/01title.ogg and so on (no files will be moved to another 110 directory)'."\n".' 111 '.$prg_name.' "(/d)(/d)" "$2$1" -gr 112 exchanges digits of numbers in all filenames recursively'."\n".' 113 '.$prg_name.' "(error_log\.)\d+(\.gz)" "$1$y-$mon-$d$2" --mtime 114 e.g. errog_log.10.gz -> error_log.2007-07-07.gz'."\n".' 115 '.$prg_name.' -rt _dir -dir 116 be careful and respect the parameter order. 117 this will replace "-rt" by "_dir" using parameters d, i, and r'."\n".' 118 '.$prg_name.' "foo(\d\d)" "\'bar\'.($1+42)" -ert 119 prints the result of replacing e.g. "foo10" by "bar52" recursively, 120 but doesn\'t really change filenames.'."\n".' 121 '.$prg_name.' "(\p{Cyrillic}+)" "\U$1" -drug 122 uppercase all cyrillic letters of files and directories in this 123 directory and all subdirectories.'."\n".' 124 '.$prg_name.' "xy" "yx" --tr -F="foo_x=\d+_y=\d+" 125 use tr/xy/yx/ on all files that match /foo_x=\d+_y=\d+/, i.e., 126 switch the letters "x" and "y" in all of those files.'."\n".' 127 note that in linux you have to use single quotes instead of double quotes. 128 alternatively you can mask the dollar-signs etc. '."\n"; 129 my $version_info = 'ren_ext.pl '.$version."\n".' 130 this program is distributed in the hope that it will be useful, 131 but without any warranty; without even the implied warranty of 132 merchantability or fitness for a particular purpose.'."\n".' 133 originally written by seth (for e-mail-address see 134 http://www.wg-karlsruhe.de/seth/email_address.php ).'."\n"; 135 my $syntax_correct = 0; 136 my %param_hash; 137 # default values 138 $param_hash{'capitalize'} = 0; # capitalize names 139 $param_hash{'directories'} = 0; # rename dirs 140 $param_hash{'emodifier'} = 0; # s///e 141 $param_hash{'cmodifier'} = 0; # y///c 142 $param_hash{'dmodifier'} = 0; # y///d 143 $param_hash{'smodifier'} = 0; # y///s 144 $param_hash{'auto'} = 1; # automatisms 145 $param_hash{'examples'} = 0; # diplay examples and exit 146 $param_hash{'files'} = 1; # rename files 147 $param_hash{'filesRE'} = '.'; # RE for files to apply on 148 $param_hash{'force'} = 0; # force; grade of forcing renaming 149 $param_hash{'fullpath'} = 0; # use full path instead of filenames only 150 $param_hash{'global'} = 0; # s///g 151 $param_hash{'ignorecase'} = 0; # s///i 152 $param_hash{'mtime'} = 0; # use modification time in replaceRE 153 $param_hash{'predictive'} = 1; # predictive; look-ahead, and break before start, if necessary 154 $param_hash{'recursively'} = 0; # search subdirs 155 $param_hash{'test'} = 0; # show result only (without renaming) 156 $param_hash{'tr'} = 0; # tr/// instead of s/// 157 $param_hash{'utf8'} = 0; # interpret regexps and file names as unicode 158 $param_hash{'verbose'} = 1; # trace; grade of verbosity 159 $param_hash{'version'} = 0; # diplay version and exit 160 if(defined($params[1])){ 161 $param_hash{'findRE'} = shift(@params); 162 $param_hash{'replaceRE'} = shift(@params); 163 $syntax_correct = 1; 164 $syntax_correct = (@params==0)? 1 : preparse_options(\@params) if $syntax_correct; 165 my $v_counter = 0; # counter for -vv = -v -v = --very-verbose 166 for(@params){ 167 if($_ eq '-c' || $_ eq '--capitalize'){ 168 $param_hash{'capitalize'} = 1; 169 next; 170 } 171 if($_ eq '-d' || $_ eq '--directory'){ 172 $param_hash{'directories'} = 1; 173 next; 174 } 175 if($_ eq '--disable-auto'){ 176 $param_hash{'auto'} = 0; 177 next; 178 } 179 if($_ eq '--examples'){ 180 $param_hash{'examples'} = 1; 181 next; 182 } 183 if($_ eq '-D' || $_ eq '--Directory'){ 184 $param_hash{'directories'} = 1; 185 $param_hash{'files'} = 0; 186 next; 187 } 188 if($_=~/^-(?:F|-filesRE)=(.*)$/){ 189 $param_hash{'filesRE'} = $1; 190 next; 191 } 192 if($_ eq '-m' || $_ eq '--mtime'){ 193 $param_hash{'mtime'} = 1; 194 next; 195 } 196 if($_ eq '-p' || $_ eq '--path'){ 197 $param_hash{'fullpath'} = 1; 198 next; 199 } 200 if($_ eq '-r' || $_=~/--recursive(?:ly)?/){ 201 $param_hash{'recursively'} = 1; 202 next; 203 } 204 if($_ eq '-t' || $_ eq '--test'){ 205 $param_hash{'test'} = 1; 206 next; 207 } 208 if($_ eq '-y' || $_ eq '--tr'){ 209 $param_hash{'tr'} = 1; 210 next; 211 } 212 if($_ eq '-u' || $_ eq '--utf8'){ 213 $param_hash{'utf8'} = 1; 214 next; 215 } 216 if($_ eq '-V' || $_ eq '--version'){ 217 $param_hash{'version'} = 1; 218 next; 219 } 220 # force overwriting 221 if($_=~/^-(?:p[= ]?|-predictive=)([01])$/){ 222 $param_hash{'predictive'} = $1; 223 next; 224 } 225 if($_ eq '-f' || $_ eq '--force'){ 226 $param_hash{'force'} = 1; 227 next; 228 } 229 if($_=~/^-(?:f[= ]?|-force=)([012])$/){ 230 $param_hash{'force'} = $1; 231 next; 232 } 233 # modifiers 234 if($_ eq '-e' || $_ eq '--emodifier'){ 235 $param_hash{'emodifier'} = 1; 236 next; 237 } 238 if($_ eq '-g' || $_ eq '--global'){ 239 $param_hash{'global'} = 1; 240 next; 241 } 242 if($_ eq '-i' || $_ eq '--ignorecase'){ 243 $param_hash{'ignorecase'} = 1; 244 next; 245 } 246 if($_ eq '--tr_c'){ 247 $param_hash{'cmodifier'} = 1; 248 next; 249 } 250 if($_ eq '--tr_d'){ 251 $param_hash{'dmodifier'} = 1; 252 next; 253 } 254 if($_ eq '--tr_s'){ 255 $param_hash{'smodifier'} = 1; 256 next; 257 } 258 # verbosity 259 if($_ eq '-q' || $_ eq '--silent'){ 260 $param_hash{'verbose'} = 0; 261 next; 262 } 263 if($_=~/^-(?:v|-verbose)$/){ 264 $param_hash{'verbose'} = ++$v_counter; 265 next; 266 } 267 if($_=~/^--very-verbose$/){ 268 $param_hash{'verbose'} = 2; 269 next; 270 } 271 if($_=~/^-(?:v[= ]?|-verbose=)([012])$/){ 272 $param_hash{'verbose'} = $1; 273 next; 274 } # else 275 $syntax_correct = 0; 276 last; 277 } 278 # check for unvalid combinations 279 if( $param_hash{'tr'} 280 && ($param_hash{'emodifier'} || $param_hash{'global'} || $param_hash{'ignorecase'}) 281 || !$param_hash{'tr'} 282 && ($param_hash{'cmodifier'} || $param_hash{'dmodifier'} || $param_hash{'smodifier'}) 283 ){ 284 die " error: don\'t mix up the modifiers. use s///eig or tr///cds. see help: $prg_name --help\n"; 285 } 286 } 287 if( $param_hash{'version'} 288 || (defined($params[0]) && !defined($params[1]) && $params[0] =~ '^(-V|--version)$')){ 289 die $version_info; 290 }elsif($param_hash{'examples'} 291 || (defined($params[0]) && !defined($params[1]) && $params[0] eq '--examples')){ 292 die $examples; 293 }else{ 294 $syntax_correct || die $usage; 295 } 296 return %param_hash; 297 } 298 299 sub preparse_options{ 300 # yeah, i know that at CPAN there already exist many get-opt-modules. 301 my $unparsed_params = shift; 302 my @params = (); 303 my $l = '[a-zA-Z]'; # leading char of long param (--Xooo) 304 my $n = '[a-zA-Z_-]'; # non-leading char of long param (--oXXX) 305 my $p = '.'; # param of param (--oooo=X or -o=X) 306 my $s = '[a-zA-Z]'; # short params (-X) 307 my $waiting_for_param_param = 0; 308 my $syntax_check = 1; 309 my $param_param; 310 for my $param (@$unparsed_params){ 311 if($param =~ /^--$l$n+(?:=$p+)?\z/){ # long param 312 push @params, $param; 313 $waiting_for_param_param = 0; 314 }elsif($param =~ /-($s*)($s=$p+)\z/){ # short param with param 315 $param_param = $2; 316 push @params, grep s/^/-/, split /(?=$s)/, $1; 317 push @params, '-'.$param_param; 318 $waiting_for_param_param = 0; 319 }elsif($param =~ /-$s+\z/){ # short param w/o param 320 push @params, grep s/^/-/, split /(?=$s)/, substr($param, 1); 321 $waiting_for_param_param = 1; 322 }elsif($waiting_for_param_param==1){ # separated param of param 323 $params[$#params].='='.$param; 324 $waiting_for_param_param = 0; 325 }else{ 326 $syntax_check = 0; 327 last; 328 } 329 } 330 @{$unparsed_params} = @params; 331 return ($syntax_check, @params); 332 } 333 334 sub capitalize{ 335 my $s = shift; 336 $s = lc($s); 337 $s=~s/\b(\w)/\u$1/g; # capitalize all words 338 $s=~s/\.(\w+)\z/.\l$1/g; # decapitalize file-extension 339 return $s; 340 } 341 342 sub get_time_of_entry{ 343 my $file = shift; 344 my $file_stat = (stat($file))[9]; 345 my @filetime = localtime($file_stat); 346 my $year = sprintf "%04u", $filetime[5]+1900; 347 my $month = sprintf "%02u", $filetime[4]+1; 348 my $day = sprintf "%02u", $filetime[3]; 349 return ($year, $month, $day); 350 } 351 352 sub decode_utf8{ 353 my $bytes = shift; 354 utf8::decode($$bytes) 355 or die 'invalid utf-8 encountered (try omitting option -u or --utf8 respectively)'; 356 } 357 358 sub generate_searchreplace_string{ 359 my $params = shift; 360 return ''.($$params{'tr'} ? 'tr' : 's').'/'.$$params{'findRE'}.'/'.$$params{'replaceRE'}.'/'.$$params{'emodifier'}.$$params{'ignorecase'}.$$params{'global'}.$$params{'cmodifier'}.$$params{'dmodifier'}.$$params{'smodifier'}; 361 } 362 363 sub is_win{ 364 return $^O=~/win/i; 365 } 366 367 sub get_fullpath{ 368 my $filename = shift; 369 my $fullpath = Cwd::abs_path($filename); 370 $fullpath=~s/\//\\/g if(-d $filename && is_win()); 371 return $fullpath; 372 } 373 374 sub check_target_filename{ 375 my $filename = shift; 376 if($filename=~/\\/ && is_win() || $filename=~/\//){ 377 my $output = shift; 378 my $params = shift; 379 print "error: operation not allowed!\n"; 380 print ' '.$output."\"\n"; 381 print ' slashes "/" (and in windows backslashes "\", too) are not allowed in target filenames'."\n"; 382 print ' files can be renamed only, they can\'t be moved to other directories with this tool.'."\n"; 383 print " findRE or replaceRE seem to be unvalid!.\n"; 384 if($$params{'fullpath'} && $$params{'findRE'}!~/\^?\.\*/){ 385 print ' maybe you want to try'."\n"; 386 print ' findRE = .*'.$$params{'findRE'}."\n"; 387 } 388 die 'program aborted.'; 389 } 390 } 391 392 sub rename_files_addon{ 393 my $working_dir = shift; 394 my $overwrite = shift; 395 my $params = shift; 396 my $findRE = $$params{'findRE'}; 397 my $filesRE = $$params{'filesRE'}; 398 my $replaceRE = $$params{'replaceRE'}; 399 my $ren_dirs = $$params{'directories'}; 400 my $ren_files = $$params{'files'}; 401 my $capitalize = $$params{'capitalize'}; 402 my $mtime = $$params{'mtime'}; 403 my $recursively = $$params{'recursively'}; 404 my $test = $$params{'test'}; 405 my $tr = $$params{'tr'}; 406 my $force = $$params{'force'}; 407 my $fullpath = $$params{'fullpath'}; 408 my $ignorecase = $$params{'ignorecase'}; 409 my $prediction = $$params{'prediction'}; 410 my $utf8 = $$params{'utf8'}; 411 my $verbose = $$params{'verbose'}; 412 my $output; 413 my $entry; 414 my $entry_filename; 415 my $exists; 416 my @entries_to_rename; 417 my $need_force; 418 my @renamed_entries; 419 my @dirs; 420 my $searchreplace_string = generate_searchreplace_string($params); 421 print "\n".' '.$working_dir.'/'."\n" if $verbose>1; 422 opendir(DIR, ".") || die $working_dir.": $!"; # read_dir and generate renaming_array 423 while($entry_filename=readdir(DIR)){ # loop over directory entries 424 next if($entry_filename eq '.' || $entry_filename eq '..'); 425 decode_utf8(\$entry_filename) if $utf8; 426 push(@dirs, $entry_filename) if -d $entry_filename; 427 if($fullpath){ 428 $entry = get_fullpath($entry_filename); 429 decode_utf8(\$entry) if $utf8; 430 }else{ 431 $entry = $entry_filename; 432 } 433 if((($ren_dirs && -d $entry_filename) || ($ren_files && not -d $entry_filename)) 434 && $entry=~/$filesRE/ 435 &&( $tr 436 || !$tr 437 &&( ($ignorecase eq '' && $entry=~/$findRE/) 438 ||($ignorecase eq 'i' && $entry=~/$findRE/i) 439 ) 440 ) 441 ){ # if entry should be renamed 442 my ($y, $mon, $d) = get_time_of_entry($entry_filename) if $mtime==1; 443 print "\n".' '.$working_dir.'/'."\n" if $verbose==1 && @entries_to_rename==0; 444 $output = 'rename: "'.$entry_filename; 445 print "'$entry'=~$searchreplace_string\n" if $verbose>2; 446 push(@entries_to_rename, $entry_filename); 447 eval('$entry=~'.$searchreplace_string); # generate new file name 448 #$entry = substr $entry, length($`) if($fullpath && (!is_win() && -1<index($`, '/') || is_win() && -1<index($`, '\\'))); 449 $entry = capitalize($entry) if $capitalize; 450 $output .= '" ==> "'.$entry; 451 check_target_filename($entry, $output, $params); 452 $need_force = 0; 453 # check for collisions 454 if((!is_win() && grep $_ eq $entry, @renamed_entries) 455 || (is_win() && grep lc($_) eq lc($entry), @renamed_entries) 456 ){ # if another file was already renamed to the same name 457 ++$$overwrite{'newfiles'}; 458 $output .= ' (another file '.(($force>1)? 'will' : 'would').' be renamed to this name already!)'; 459 $need_force = 2; 460 } 461 if(-e $entry){ # target file exists already 462 if($entry eq $entries_to_rename[-1]){ # old_name == new_name 463 $output .= ' (name unchanged)'; 464 }elsif(is_win() && lc($entry) eq lc($entries_to_rename[-1])){ 465 $output .= ' (changing case only)'; # case-change (win-only) 466 }else{ 467 ++$$overwrite{'existingfiles'}; # overwrite existing file? 468 $output .= ' (existing already! '.(($force>0)? 'will' : 'won\'t').' be overwritten.)'; 469 $need_force = 1 if $need_force<1; 470 } 471 } 472 unless(-e $entry && $entry eq $entries_to_rename[-1] && $tr){ 473 print $output."\"\n" if $verbose>0 && ($prediction==0 || $need_force>0) || $verbose>1; 474 } 475 if($force>=$need_force){ 476 push(@renamed_entries, $entry); 477 }else{ 478 pop(@entries_to_rename); 479 } 480 }else{ 481 print 'skip: '.$entry."\n" if $verbose>1; 482 } 483 } 484 closedir(DIR); 485 @dirs = sort(@dirs); 486 if($recursively==1){ # first search subdirectories 487 for(@dirs){ 488 chdir($_); 489 rename_files_addon($working_dir.'/'.$_, $overwrite, $params); 490 chdir('..'); 491 } 492 } 493 if(!$test && !$prediction){ # after searching subdirectories begin renaming 494 for(my $i=0; $i<@entries_to_rename; ++$i){ 495 rename($entries_to_rename[$i],$renamed_entries[$i]); 496 } 497 } 498 } 499 500 sub rename_files_using_regexps{ 501 my %params = syntaxCheck(@_); 502 # postprocess params 503 if($params{'auto'}){ 504 # escape unescaped slashes 505 $params{$_}=~s/(?<!\\)((?:\\\\)*)\//$1\\\//g for('findRE', 'replaceRE', 'filesRE'); 506 # add prefix and suffix to findRE for intuitive fullpath searching 507 $params{findRE} = '.*?'.$params{findRE}.'(?=[^\/]*\z)' if $params{'fullpath'}; 508 } 509 if($params{'utf8'}){ 510 # enable unicode for both regular expressions ... 511 decode_utf8(\$params{$_}) for('findRE', 'replaceRE', 'filesRE'); 512 # ... and for the screen output 513 binmode(STDOUT, ':utf8'); 514 binmode(STDERR, ':utf8'); 515 } 516 # bool2modifier 517 $params{'cmodifier'} = ($params{'cmodifier'}) ? 'c' : ''; 518 $params{'dmodifier'} = ($params{'dmodifier'}) ? 'd' : ''; 519 $params{'emodifier'} = ($params{'emodifier'}) ? 'e' : ''; 520 $params{'smodifier'} = ($params{'smodifier'}) ? 's' : ''; 521 $params{'global'} = ($params{'global'}) ? 'g' : ''; 522 $params{'ignorecase'} = ($params{'ignorecase'}) ? 'i' : ''; 523 524 my $working_dir = cwd; 525 decode_utf8(\$working_dir) if $params{'utf8'}; 526 my %overwrite; # counting of overwritten files 527 $overwrite{'existingfiles'} = 0; 528 $overwrite{'newfiles'} = 0; 529 print ' using '.generate_searchreplace_string(\%params)."\n" if $params{'verbose'}>0; 530 if($params{'test'}==0 && $params{'predictive'}==1){ # if not just testing, predict collisions 531 print 'predicting changes/collisions...'."\n" if $params{'verbose'}>0; 532 $params{'prediction'} = 1; 533 rename_files_addon($working_dir, \%overwrite, \%params); 534 print 'number of collisions: '.($overwrite{'existingfiles'}+$overwrite{'newfiles'})."\n" if $params{'verbose'}>1; 535 } 536 if($overwrite{'existingfiles'}+$overwrite{'newfiles'}==0){ # call renaming-routine 537 print $params{'test'} ? "\ntesting...\n": "\nrenaming...\n" if $params{'verbose'}>0; 538 $params{'prediction'} = 0; 539 rename_files_addon($working_dir, \%overwrite, \%params); 540 }elsif($params{'verbose'}>0){ 541 print $overwrite{'existingfiles'}.' existing and '.$overwrite{'newfiles'}.' new files would be overwritten.'."\n"; 542 } 543 chdir($working_dir); 544 } 545 546 rename_files_using_regexps(@ARGV);