#!/usr/local/bin/perl -w # author: seth # email: email_software@wg-karlsruhe.de # description: renames file using html-file xor id3-tags # todo-list: # - support of noalbum, (--noalbum = do not search for a whole album) use MP3::Tag; use Cwd; use strict; use File::Copy; my $html_file_path='C:/Dokumente und Einstellungen/Administrator/Eigene Dateien/musik-db/music_list.htm'; sub syntaxCheck{ my @params=@_; my @path_splitted=split(/[\/\\]/, reverse($0)); my $prg_name=reverse($path_splitted[0]); my $version='0.95.20070324'; my $usage='renames file using html-file xor id3-tags usage: '.$prg_name.' filesRE [options] filesRE files to rename (use regular expressions!) -r, --recursively for searching subdirectories recursively --v[erbose]=x verbose (x=0: no output, x=1: default output, x=2: much output) -t, --test don\'t change anything, just print possible changes -s, --sortToDir makes (and moves files to) directory artist/album/* -V, --version display version and exit. --htmlfile=x use other path than default (x="path to html_music_list") --notrackno do not use trackno in filename -u --useid3 use id3-tags (don\'t search html-file, but id3v1 and id3v2) --useid3v1 use id3-tags (don\'t search html-file, but id3v1 only) --useid3v2 use id3-tags (don\'t search html-file, but id3v2 only) forced info: may be (multiple) chosen from the following case-sensitive list --TPE1=s artist (default s=..) --TALB=s album (default s=.) s is a string s=. means "use current dir name" s=.. means "use parent dir name" (there are no bands called . or ..) examples: .../artist/album/'.$prg_name.' "^\\d\\d\\.mp3$" renames all files "xx.mp3" to "xx artist - track.mp3" .../artist/'.$prg_name.' "\\.mp3$" --TPE1=. --TALB="dick und doof" renames all files "*.mp3" to "xx artist - track.mp3"'."\n"; my $syntax_correct=0; my %param_hash; $param_hash{'noalbum'}=0; $param_hash{'notrackno'}=0; $param_hash{'recursively'}=0; $param_hash{'test'}=0; $param_hash{'sortToDir'}=0; $param_hash{'useid3'}=0; $param_hash{'useid3v1'}=0; $param_hash{'useid3v2'}=0; $param_hash{'verbose'}=1; $param_hash{'version'}=0; $param_hash{'TPE1'}='..'; $param_hash{'TALB'}='.'; if(defined($params[0])){ $param_hash{'filesRA'}=shift(@params); $syntax_correct=1; $syntax_correct=0 if $param_hash{'filesRA'}=~/^(-?-h|\/\?)$/; # if somebody trys -h or --h or /? for help foreach(@params){ if($_=~/^-[^-]./){ while(length($_)>2){ push(@params, '-'.substr($_, 2, 1)); $_=substr($_, 0, 2).((length($_)>3)?substr($_, 3):''); } } if($_ eq '--noalbum'){ $param_hash{'noalbum'}=1; next; } if($_ eq '--notrackno'){ $param_hash{'notrackno'}=1; next; } if($_ eq '-r' || $_ eq '--recursively'){ $param_hash{'recursively'}=1; next; } if($_ eq '-t' || $_ eq '--test'){ $param_hash{'test'}=1; next; } if($_ eq '-s' || $_ eq '--sortToDir'){ $param_hash{'sortToDir'}=1; next; } if($_ eq '-u' || $_ eq '--useid3'){ $param_hash{'useid3'}=1; next; } if($_ eq '--useid3v1'){ $param_hash{'useid3v1'}=1; next; } if($_ eq '--useid3v2'){ $param_hash{'useid3v2'}=1; next; } if($_=~/^--v(erbose)?=([0123])$/){ $param_hash{'verbose'}=$2; next; } if($_ eq '-V' || $_ eq '--version'){ $param_hash{'version'}=1; next; } if($_=~/^--htmlfile=(.+)$/){ $html_file_path=$1; next; } if($_=~/^--(T[A-Z0-9]{3})=(.*)$/){ $param_hash{$1}=$2; }else{ $syntax_correct=0; last; } } } $syntax_correct=0 if 1<$param_hash{'useid3'}+$param_hash{'useid3v1'}+$param_hash{'useid3v2'}; if($param_hash{'version'} || ($param_hash{'filesRA'} && ($param_hash{'filesRA'} eq '-V' || $param_hash{'filesRA'} eq '--version'))){ my $version_info='ren_mp3.pl '.$version."\n".' this program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. originally written by seth .'."\n"; die $version_info; } else{ $syntax_correct || die $usage; } return %param_hash; } sub get_info_from_id3{ my $file = shift; my $verbose = shift; my $notrackno = shift; my $mp3 = MP3::Tag->new($file); $mp3->config('autoinfo', @_); my @info = $mp3->autoinfo(); $info[1] =~ s~/.*~~g; $info[1] = '0'.$info[1] if(length($info[1])==1); my $result_filename = $info[1].' '.$info[2].' - '.$info[0]; print ' ('.$info[3].') '.$result_filename if $verbose>1; $result_filename = $info[2].' - '.$info[0] if $notrackno==1; if(exists $mp3->{ID3v2}){ if($mp3->{ID3v2}->get_frame("TYER")){ push(@info, $mp3->{ID3v2}->get_frame("TYER")); print ' ('.$info[4].')' if $verbose>1; } } $result_filename=subst_disallowed_chars($result_filename, '_'); print "\n" if $verbose>1; $mp3->close; return $result_filename; } sub loadFile{ my $infile=shift; my $verbose=shift; print 'read file "'.$infile."\"\n" if $verbose>0; open(INFILE, "<".$infile) || die ' datei "'.$infile.'" nicht gefunden'."\n"; my @lines=; close(INFILE); return @lines; } sub dehtml{ my $s=shift; $s=~s/&/&/g; $s=~s/ä/ä/g; $s=~s/ö/ö/g; $s=~s/ü/ü/g; $s=~s/Ä/Ä/g; $s=~s/Ö/Ö/g; $s=~s/Ü/Ü/g; $s=~s/ß/ß/g; $s=~s/Æ/Æ/g; $s=~s/[\/\?*!]/_/g; # that's not all yet! return $s; } sub fuzzy_string{ my $s=shift; $s=~s/[_!]/[_\\\/\\?\\\\*!]{0,2}/g; $s=~s/&/(?:&| ?and ?| ?und ?)/g; $s=~s/(ä|ae|Æ)/(?:[äÆ]|ae)/g; $s=~s/(ö|oe)/(?:ö|oe)/g; $s=~s/(ü|ue)/(?:ü|ue)/g; $s=~s/(Ä|Ae)/(?:Ä|ae)/g; $s=~s/(Ö|Oe)/(?:Ö|oe)/g; $s=~s/(Ü|Ue)/(?:Ü|ue)/g; $s=~s/(ß|ss)/(?:ß|ss)/g; # that's not all yet! return $s; } sub quote_meta_chars{ # quotes {}[]()^$.|*+?\ my $s=shift; $s=~s/([\{\}\[\]\(\)\^\$\.\|\*\+\?\\])/\\$1/g; return $s; } sub subst_disallowed_chars{ # substitutes \/:*?"<>| with $char my $str=shift; my $char=shift; $str=~s/[\\\/:*\?"<>\|]/$char/g; return $str; } sub cut_file_extension{ # deletes e.g. '.mp3' my $s=shift; my $pt_pos=index(reverse($s),'.'); if($pt_pos>-1 && $pt_pos<6){ $s=substr($s,0,length($s)-$pt_pos-1); } return $s; } sub subtract_str{ my $haystack=shift; my $needle=shift; my $pos=index($haystack, $needle); $haystack=substr($haystack, 0, $pos).substr($haystack, $pos+length($needle)) if($pos>-1); return $haystack; } # search html_music_list for album/track information sub get_info_from_html_file{ my $verbose=shift; my $noalbum=shift; my $notrackno=shift; my $TPE1=shift; my $TALB=shift; my $TPE1_fuzzy=$TPE1; my $TALB_fuzzy=$TALB; $TPE1_fuzzy=~s/([^a-zA-Z0-9])/[^a-zA-Z0-9]/g; $TALB_fuzzy=~s/([^a-zA-Z0-9])/[^a-zA-Z0-9]/g; my @files=@_; my %info; my @html_file_tracknames=loadFile($html_file_path, $verbose); if($noalbum==1){ die 'not implemented yet'."\n"; }else{ # extract album from html-file print 'a name=([^>]+>){1,2}'.$TPE1_fuzzy.' - '.$TALB_fuzzy."\n" if $verbose>2; while((dehtml(shift(@html_file_tracknames))=~/a name=([^>]+>){1,2}${TPE1_fuzzy} - ${TALB_fuzzy}/i)==0){ if(0>=@html_file_tracknames){ print 'not found in html-file'."\n" if $verbose>0; last; } # cut begin } my $no_tracks=0; foreach(@html_file_tracknames){ # reformat and count names if(/^\t\d\d\s-\s/){ $_=subtract_str($_,' (ianacd)'); $_=dehtml(substr($_, 1, 2)." $TPE1".substr($_, 3, length($_)-8)); $_=substr($_, 3) if $notrackno==1; $no_tracks++; }else{ last if not(/^\tCD\d
/); } } while(@html_file_tracknames>$no_tracks){ pop(@html_file_tracknames); # cut end } if($verbose>1){ print 'found: '."\n ".@html_file_tracknames.' tracks in htmlfile'."\n"; print ' '.@files.' files in dir'."\n"; } my $temp_no_track; my $temp_file_trackname; my $html_file_trackname; my $html_file_trackname_quoted; my @track_no_bucket=(1); # any value but -1 for(my $i=0;$i<@html_file_tracknames;++$i){ $html_file_tracknames[$i]=subst_disallowed_chars($html_file_tracknames[$i], '_'); push(@track_no_bucket,-1); # init bucket } for(my $i=0;$i<@html_file_tracknames;++$i){ # loop over all tracknames (found in html_file) $html_file_trackname=substr($html_file_tracknames[$i],6+length($TPE1)); # cut /\d\d $artist - / $html_file_trackname_quoted=quote_meta_chars($html_file_trackname); $html_file_trackname_quoted=fuzzy_string($html_file_trackname_quoted); for(my $j=0;$j<@files;++$j){ # loop over all filenames if($files[$j]=~/^(?:$TALB|$TPE1)?[-_ ]{0,3}(\d\d)\D/){ # identification using 2-digit number in filename $temp_no_track=(substr($1,0,1) eq '0')?substr($1,1,1):$1;# extract this tracknumber if($track_no_bucket[$temp_no_track]==-1){ # if tracknumber still available $track_no_bucket[$temp_no_track]=$j; # occupy tracknumber print 'tracing: '.$1.' ; '.$temp_no_track.' ; ' if $verbose>2; $info{$files[$j]}=$html_file_tracknames[$temp_no_track-1];# link file with track_name (from html_file) print $files[$j].' ; '.$info{$files[$j]}."\n" if $verbose>2; last; }else{ # if tracknumber occupied already print 'maybe there will occur an error. mixed up filenames or something...'."\n" if $verbose>0; } }# now identification using parts of name $temp_file_trackname=quote_meta_chars(cut_file_extension($files[$j])); $temp_file_trackname=fuzzy_string($temp_file_trackname); # for debugging: #if($files[$j]=~/name/i && $html_file_trackname=~/name/i){ # print 'file = '.$files[$j]."\n"; # print 'temp_file_trackname= '.$temp_file_trackname."\n"; # print 'html_file_trackname= '.$html_file_trackname."\n"; # print 'html_file_trackname_quoted= '.$html_file_trackname_quoted."\n"; #} if($files[$j]=~/$html_file_trackname_quoted/i || $html_file_trackname=~/$temp_file_trackname/i){ # (filename similar to trackname)? $temp_no_track=(substr($html_file_tracknames[$i],0,1) eq '0')?substr($html_file_tracknames[$i],1,1):substr($html_file_tracknames[$i],0,2); if($track_no_bucket[$temp_no_track]==-1){ # if tracknumber still available $track_no_bucket[$temp_no_track]=$j; # occupy tracknumber $info{$files[$j]}=$html_file_tracknames[$i]; # link file with track_name (from html_file) last; }else{ # if tracknumber occupied already print 'maybe there will occur an error. mixed up filenames or something...'."\n" if $verbose>0; if($verbose>1){ print ' track_no ='.substr($html_file_tracknames[$i],0,2)."\n"; print ' html_trackname ='.$html_file_trackname."\n"; print ' file_trackname ='.$files[$j]."\n"; print ' in conflict with '.$files[$track_no_bucket[$temp_no_track]]." (hitherto existing)\n"; } if($files[$j]=~/$html_file_trackname_quoted/i){ $temp_file_trackname=subtract_str(cut_file_extension($files[$j]), $TPE1); $temp_file_trackname=substr($temp_file_trackname, 3) if(substr($temp_file_trackname, 0, 3) eq ' - '); if($temp_file_trackname eq $html_file_trackname){ $info{$files[$j]}=$info{$files[$track_no_bucket[$temp_no_track]]}; # = $html_file_tracknames[$i]; $info{$files[$track_no_bucket[$temp_no_track]]}=''; # $html_file_tracknames[$i]; # or better ='' ?? my $swap=$files[$track_no_bucket[$temp_no_track]]; $files[$track_no_bucket[$temp_no_track]]=$files[$j]; $files[$j]=$swap; # $track_no_bucket[$temp_no_track]=$j; is not being run because of swap($files[$track_no_bucket[$temp_no_track]],$files[$j]) print ' ...tried to correct that' if $verbose>0; print ' by changing association to '.$files[$track_no_bucket[$temp_no_track]] if $verbose>1; print ".\n" if $verbose>0; # $i=0 necessary (or crashes)?? }else{ print ' nothing changed'."\n" if $verbose>1; } } } } } } if($verbose>0){ my $i=0; foreach(@track_no_bucket){ print 'trackname of track #'.$i.' not found!'."\n" if $_==-1; ++$i; } } } return %info; } sub search_dir{ my $working_dir=shift; my %params=@_; my $filesRE=$params{'filesRA'}; my $recursively=$params{'recursively'}; my $verbose=$params{'verbose'}; my $test=$params{'test'}; my $sortToDir=$params{'sortToDir'}; my $noalbum=$params{'noalbum'}; my $notrackno=$params{'notrackno'}; my $useid3=$params{'useid3'}; my $useid3v1=$params{'useid3v1'}; my $useid3v2=$params{'useid3v2'}; my $TPE1=$params{'TPE1'}; my $TALB=$params{'TALB'}; my @path_splitted=split(/[\/\\]/, reverse($working_dir)); $TPE1=reverse($path_splitted[1]) if($params{'TPE1'} eq '..'); $TPE1=reverse($path_splitted[0]) if($params{'TPE1'} eq '.'); $TALB=reverse($path_splitted[1]) if($params{'TALB'} eq '..'); $TALB=reverse($path_splitted[0]) if($params{'TALB'} eq '.'); my $entry; my @files; my @dirs; print "\n".' '.$working_dir.'/'."\n" if $verbose>1; opendir(DIR, ".") || die $working_dir.": $!"; while(telldir(DIR)>=0){ # collect relevant files $entry=readdir(DIR); if(-d $entry){ push(@dirs, $entry) }else{ if($entry=~/$filesRE/){ push(@files, $entry); }else{ print 'skip: '.$entry."\n" if $verbose>1; } } } closedir(DIR); @dirs=sort(@dirs); # optional my %info; if($useid3==1 || $useid3v1==1 || $useid3v2==1){ my @id3_options; @id3_options=('ID3v2', 'ID3v1') if $useid3==1; @id3_options=('ID3v1') if $useid3v1==1; @id3_options=('ID3v2') if $useid3v2==1; foreach(@files){ # match old filename with new ones $info{$_}=get_info_from_id3($working_dir.'\\'.$_, $verbose, $notrackno, @id3_options); # read from (intern) mp3_file_id3_tags } }else{ %info=get_info_from_html_file($verbose, $noalbum, $notrackno, $TPE1, $TALB, @files); # read from (extern) html_music_list } if($sortToDir==1 && keys(%info)>0){ if($test==0){ mkdir($TPE1, 0777) if not -e $TPE1; chdir($TPE1); die 'directory exists already!'."\n" if -e $TALB; mkdir($TALB, 0777); chdir('..'); }else{ if(-e $TPE1){ chdir($TPE1); print 'directory of album already exists!'."\n" if -e $TALB; chdir('..') } } } while(my ($old, $new)=each %info){ # renaming print 'old: '.$old."\n".' -> '.$new.'.mp3'."\n" if $verbose>0; if($test==0){ rename($old, $new.'.mp3'); move($new.'.mp3',$TPE1.'/'.$TALB.'/'.$new.'.mp3') if $sortToDir==1; } } if($recursively==1){ foreach(@dirs){ if($_ ne '.' && $_ ne '..'){ chdir($_); search_dir($working_dir.'/'.$_, %params); chdir('..'); } } } } sub rename_music_files{ my %params=syntaxCheck(@_); my $working_dir=cwd; search_dir($working_dir, %params); chdir($working_dir); } rename_music_files(@ARGV);