#!/usr/bin/perl # author: seth # e-mail: for e-mail-address see http://www.wg-karlsruhe.de/seth/email_address.php # description: replaces strings in a text-file or a directory using regexps # textre means TEXT-REplacing (REcursively) by Regular Expressions # # tab-size: 2 use strict; use warnings; use Cwd; sub syntaxCheck{ my @params = @_; my @path_splitted = split(/[\/\\]/, reverse($0)); my $prg_name = reverse($path_splitted[0]); my $version = '1.01.20110226'; my %param_hash; # default values $param_hash{'filesRE'} = '(\\.bas|\\.bat|\\.c|\\.cc|\\.cgi|\\.cpp|\\.css|\\.csv|\\.f|\\.h|\\.hpp|\\.html?|\\.js|\\.pas|\\.php\\d?|\\.pl|\\.tex|\\.txt|\\.vbs)$'; $param_hash{'ignorecase'} = 0; # s///i (findRE) $param_hash{'emodifier'} = 0; # s///e (findRE) $param_hash{'icf'} = 0; # s///i (filesRE) $param_hash{'lowercase'} = 0; # tr/[A-Z]/[a-z]/ and some umlauts too $param_hash{'uppercase'} = 0; # tr/[a-z]/[A-Z]/ and some umlauts too $param_hash{'charwise'} = 0; # read charwise (not linewise) $param_hash{'linesRE'} = '.'; # lines to work at $param_hash{'recursively'} = 0; # search subdirs $param_hash{'germanshit'} = 0; # replace äÄöÖüÜß $param_hash{'test'} = 0; # show result only (without renaming) $param_hash{'verbose'} = 1; # trace; grade of verbosity $param_hash{'version'} = 0; # diplay version and exit my $usage = 'replaces strings in a text-file or a directory using regexps syntax: '.$prg_name.' findRE replaceRE [options] findRE text to be replaced replaceRE replacement -f, --filesRE=s files to search (default="'.$param_hash{'filesRE'}.'") -c, --charwise don\'t read files linewise (default), but charwise -e, --emodifier use e-modifier in findRE, i.e., s///e -g, --germanshit _after_ replacing, convert äÄöÖüÜß to ae, ..., ss -i, --ignorecase-find ignore case in findRE -I, --ignorecase-files ignore case in filesRE -l, --lower-case _after_ replacing, convert _all_ to lower case -L, --lines=s replace only in lines s, s is interpreted as a regexp, default = all lines -u, --upper-case _after_ replacing, convert _all_ to upper case -r, --recursively for search subdirectories recursively -t, --test don\'t change anything, just print possible changes -V, --version display version and exit. -q, --silent same as --verbose=0 -v, --verbose same as --verbose=1 (default) -vv,--very-verbose same as --verbose=2 -v, --verbose=x grade of verbosity x=0: no output x=1: default output x=2: much output examples: '.$prg_name.' "bratwurst" "gruenkohl" '.$prg_name.' "Blutwurst" "salat" -l '.$prg_name.' "blutwurst" "salat" -i -l (not the same as above) '.$prg_name.' "blutwurst" "salat" -L="^123$" '.$prg_name.' "(/d)(/d)" "$2$1" -f="(\\.htm|\\.txt)$" -r'."\n".' note that in linux you have to use single quotes instead of double quotes. alternatively you can mask the dollar-signs etc. '."\n"; my $syntax_correct = 0; if(defined($params[1])){ $param_hash{'findRE'} = shift(@params); $param_hash{'replaceRE'} = shift(@params); $syntax_correct = (@params==0)? 1 : preparse_options(\@params); for(@params){ if($_ eq '-c' || $_ eq '--charwise'){ $param_hash{'charwise'} = 1; next; } if($_=~/^-(?:f|-filesRE)=(.*)$/){ $param_hash{'filesRE'} = $1; next; } if($_ eq '-e' || $_ eq '--emodifier'){ $param_hash{'emodifier'} = 1; next; } if($_ eq '-g' || $_ eq '--germanshit'){ $param_hash{'germanshit'} = 1; next; } if($_ eq '-i' || $_ eq '--ignorecase-find'){ $param_hash{'ignorecase'} = 1; next; } if($_ eq '-I' || $_ eq '--ignorecase-files'){ $param_hash{'icf'} = 1; next; } if($_ eq '-l' || $_ eq '--lower-case'){ $param_hash{'lowercase'} = 1; die 'error: conversion to lowercase _and_ uppercase not possible!'."\n" if($param_hash{'uppercase'}); next; } if($_=~/^-(?:L|-lines)=(.*)$/){ $param_hash{'linesRE'} = $1; next; } if($_ eq '-r' || $_=~/--recursive(?:ly)?/){ $param_hash{'recursively'} = 1; next; } if($_ eq '-t' || $_ eq '--test'){ $param_hash{'test'} = 1; next; } if($_ eq '-u' || $_ eq '--upper-case'){ $param_hash{'uppercase'} = 1; die 'error: conversion to lowercase _and_ uppercase not possible!'."\n" if($param_hash{'lowercase'}); next; } if($_ eq '-V' || $_ eq '--version'){ $param_hash{'version'} = 1; next; } # verbosity if($_ eq '-q' || $_ eq '--silent'){ $param_hash{'verbose'} = 0; next; } if($_=~/^-(?:v|-verbose)$/){ $param_hash{'verbose'} = 1; next; } if($_=~/^-(?:vv|-very-verbose)$/){ $param_hash{'verbose'} = 2; next; } if($_=~/^-(?:v|-verbose)=([0123])$/){ $param_hash{'verbose'} = $1; next; } # else $syntax_correct = 0; last; } } if($param_hash{'version'} || (defined($params[0]) && !defined($params[1]) && $params[0] =~ '^(-V|--version)$')){ my $version_info = 'textre.pl '.$version."\n".' this program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. originally written by (and suggestions to) seth (for e-mail-address see http://www.wg-karlsruhe.de/seth/email_address.php).'."\n"; die $version_info; }else{ $syntax_correct || die $usage; } return %param_hash; } sub preparse_options{ # yeah, i know that at CPAN there already exist many get-opt-modules. my $unparsed_params = shift; my @params = (); my $l = '[a-zA-Z]'; # leading char of long param (--Xooo) my $n = '[a-zA-Z-]'; # non-leading char of long param (--oXXX) my $p = '.'; # param of param (--oooo=X or -o=X) my $s = '[a-zA-Z]'; # short params (-X) my $waiting_for_param_param = 0; my $syntax_check = 1; my $param_param; for my $param (@$unparsed_params){ if($param =~ /^--$l$n+(?:=$p+)?\z/){ # long param push @params, $param; $waiting_for_param_param = 0; }elsif($param =~ /-($s*)($s=$p+)\z/){ # short param with param $param_param = $2; push @params, grep s/^/-/, split /(?=$s)/, $1; push @params, '-'.$param_param; $waiting_for_param_param = 0; }elsif($param =~ /-$s+\z/){ # short param w/o param push @params, grep s/^/-/, split /(?=$s)/, substr($param, 1); $waiting_for_param_param = 1; }elsif($waiting_for_param_param==1){ # separated param of param $params[$#params].='='.$param; $waiting_for_param_param = 0; }else{ $syntax_check = 0; last; } } @{$unparsed_params} = @params; return $syntax_check; } sub log10{ my $n = shift; return ($n<=0)?0:log($n)/log(10); } sub max{ my $a = shift; my $b = shift; return ($a>$b)?$a:$b; } sub vernichte_bloede_deutsche_umlaute_und_sz{ my $str = shift; $str=~s/ä/ae/g; $str=~s/Ä/Ae/g; $str=~s/ö/oe/g; $str=~s/Ö/Oe/g; $str=~s/ü/ue/g; $str=~s/Ü/Ue/g; $str=~s/ß/ss/g; return $str; } sub loadFile{ my $infile = shift; open(INFILE, "<".$infile) || die "error: $!\n"; my @lines = ; close(INFILE); return @lines; } sub loadFile_charwise{ my $infile = shift; my $content = ''; open(INFILE, "<".$infile) || die "error: $!\n"; while(!eof(INFILE)){ $content.=getc(INFILE); } close(INFILE); return $content; } sub saveFile{ my $outfile = shift; print "write file ".$outfile."\n"; open(OUTFILE, ">".$outfile) || die " could not write file. $!\n"; print OUTFILE @_; close(OUTFILE); return 1; } # core sub search_file{ my $flag_dir_printed = shift; my $work_dir = shift; my $file = shift; my $params = shift; my $findRE = $$params{'findRE'}; my $linesRE = $$params{'linesRE'}; my $replaceRE = $$params{'replaceRE'}; my $ignorecase = ($$params{'ignorecase'})?'i':''; my $emodifier = ($$params{'emodifier'})?'e':''; my $verbose = $$params{'verbose'}; my $charwise = $$params{'charwise'}; # not used yet my %counter = ('lines'=>0, 'changed_lines'=>0, 'changes'=>0); if($charwise==1){ my $file_content = loadFile_charwise($file); my $file_new_content = ''; my $found_str; my $len; my $old_pos = 0; my $after_matched; while($ignorecase eq 'i' && $file_content=~/$findRE/ig || $ignorecase ne 'i' && $file_content=~/$findRE/g){ print $work_dir.$file."\n" if($counter{'changes'}==0 && $verbose>0); $$flag_dir_printed = 1; ++$counter{'changes'}; $found_str = $&; $after_matched = $'; $len = length($found_str); $file_new_content.=substr($file_content, $old_pos, pos($file_content)-$len-$old_pos); $old_pos = pos($file_content); print ' orig: '.$found_str."\n" if($verbose>0); eval('$found_str=~s/'.$findRE.'/'.$replaceRE.'/'.$ignorecase.$emodifier); print ' new: '.$found_str."\n" if($verbose>0); $file_new_content.=$found_str; } if($counter{'changes'}>0){ $file_content = $file_new_content.$after_matched; $file_content = uc($file_content) if($$params{'uppercase'}==1); $file_content = lc($file_content) if($$params{'lowercase'}==1); $file_content = vernichte_bloede_deutsche_umlaute_und_sz($file_content) if($$params{'germanshit'}==1); print ' '.$counter{'changes'}.' changes'."\n" if($verbose>0); saveFile($file, $file_content) if($$params{'test'}==0); } }else{ # linewise my @lines = loadFile($file); my $loglines = int(log10($#lines+1)+1); for(@lines){ ++$counter{'lines'}; if($counter{'lines'} =~ /$linesRE/ && ( ($ignorecase eq 'i' && $_=~/$findRE/i) || ($ignorecase eq '' && $_=~/$findRE/ ) ) ){ print $work_dir.$file."\n" if($counter{'changed_lines'}==0 && $verbose>0); $$flag_dir_printed = 1; printf('%0'.max($loglines, 2).'d: ', $counter{'lines'}) if($verbose>0); print $_ if($verbose>0); ++$counter{'changed_lines'}; eval('$_ =~ s/'.$findRE.'/'.$replaceRE.'/'.$ignorecase.'g'.$emodifier); print ' ' x (max(0,$loglines-2)).'->: '.$_ if($verbose>0); } $_ = uc($_) if($$params{'uppercase'}==1); $_ = lc($_) if($$params{'lowercase'}==1); $_ = vernichte_bloede_deutsche_umlaute_und_sz($_) if($$params{'germanshit'}==1); } if($counter{'changed_lines'}>0){ print ' '.$counter{'changed_lines'}.' lines replaced'."\n" if($verbose>0); saveFile($file, @lines) if($$params{'test'}==0); } } return %counter; } sub text_replacer{ my $counter = shift; my $working_dir = shift; my $params = shift; my $filesRE = $$params{'filesRE'}; my $icf = ($$params{'icf'})?'i':''; my $recursively = $$params{'recursively'}; my $verbose = $$params{'verbose'}; my $entry; my @dirs; print "\n\n".' '.$working_dir.'/'."\n" if $verbose>1; opendir(DIR, ".") || die $working_dir.": $!"; # read_dir and generate renaming_array my $flag_first_search = 1; my $flag_first_skip = 1; my $flag_dir_printed = 0; my %counter_present; my $work_dir; my @entries = sort(readdir(DIR)); # cosmetics closedir(DIR); for $entry (@entries){ if(-d $entry){ push(@dirs, $entry); }else{ if( (($icf eq '' && $entry=~/$filesRE/) || ($icf eq 'i' && $entry=~/$filesRE/i)) ){ $work_dir = ($verbose==1 && $flag_dir_printed==0)?"\n".' '.$working_dir.'/'."\n":"\n"; print 'search:' if $verbose>1 && $flag_first_search++; print ' "'.$entry.'"' if $verbose>1; ++$$counter{'files'}; %counter_present = &search_file(\$flag_dir_printed, $work_dir, $entry, $params); $$counter{'changed_files'}+=($counter_present{'changed_lines'}+$counter_present{'changes'}>0); $$counter{'lines'}+=$counter_present{'lines'}; $$counter{'changed_lines'}+=$counter_present{'changed_lines'}; $$counter{'changes'}+=$counter_present{'changes'}; }else{ print 'skip:' if $verbose>2 && $flag_first_skip++; print ' "'.$entry.'"' if $verbose>1; } } } # @dirs = sort(@dirs); # not necessary if($recursively==1){ # search subdirectories for(@dirs){ if($_ ne '.' && $_ ne '..'){ chdir($_); ++$$counter{'dir'}; text_replacer($counter, $working_dir.'/'.$_, $params); chdir('..'); } } } } sub stats{ my %counter = %{shift()}; my $params = shift; print "\n".'stats:'."\n"; print ' searched: '.$counter{'dir'}.' dirs, '.$counter{'files'}.' files'; print ', '.$counter{'lines'}.' lines' if($$params{'charwise'}==0); print "\n"; print ' changed: '.$counter{'changed_files'}.' files, '; print (($$params{'charwise'}==0)? $counter{'changed_lines'}.' lines' : $counter{'changes'}.' places'); print "\n"; print '(case-changing not included)'."\n" if($$params{'lowercase'}|$$params{'uppercase'}); } sub text_replacing_using_regexps{ my %params = syntaxCheck(@_); # command line parameters my $working_dir = cwd; my %counter; $counter{'dir'} = 1; # number of searched directories $counter{'files'} = 0; # number of searched files $counter{'lines'} = 0; # number of searched lines $counter{'changed_files'} = 0; # number of changed files $counter{'changed_lines'} = 0; # number of changed lines $counter{'changes'} = 0; # number of changes text_replacer(\%counter, $working_dir, \%params); chdir($working_dir); stats(\%counter, \%params) if $params{'verbose'}>0; } text_replacing_using_regexps(@ARGV);