#!/usr/bin/perl # Last change: PFB 6 May 2001 11:48 pm # srv12.cgi ############################################################################ # POD Documentation =head1 PROGRAM NAME AND AUTHOR SR - Version 1.2 Build Date: May 5, 2001 srv12.cgi by Peter F. Brown peterbrown@worldcommunity.com =head1 WHAT IT IS sr (search and replace) (ok, it's not an original name...) It's quite fast. It processed 13,402,165 replaces, (13.4 million) (a 67 meg text file with 163,441 lines) in 1 minute, 10 seconds on a Pentium 166 with 64 megs of RAM. (This was the test in v1.0) On one client's system it processed 13,029 files, with 7,487 replaces, in 11 seconds. As an alternate method, I recommend using VEDIT, the fastest huge file text editor in the world, at 'www.vedit.com'. HELP: type "sr -h for help" edit s/r values below if you're not using command line parameters =head1 COPYRIGHT Copyright 2001 Peter F. Brown SR complies with the GNU GENERAL PUBLIC LICENSE and is released as "Open Source Software". NO WARRANTY IS OFFERED FOR THE USE OF THIS SOFTWARE ! Just remember. Back up your file first! (when you mess with huge data sets, please do save yourself grief and backup the file up :-) =head1 BUG REPORTS AND SUPPORT Send bug reports to peterbrown@worldcommunity.com. Visit the author's web site at 'worldcommunity.com' to view information about support, customer quotes, a resume link, and fees for custom Perl/MySQL programming. =head1 OBTAINING THE LATEST VERSION ==> Get the most recent version of this program at: http://worldcommunity.com =head1 REQUIREMENTS Perl 5 =cut ############################################################################## # setup area # these are the approved file extensions that the # program will look for. @file_extensions = qw[htm html shtml txt cgi pl js]; # I recommend using an extension for the log file below that # is NOT included in the array above. $log_file = './sr.log'; # end of setup area ############################################################################## use File::Find; $clear = `clear`; print $clear; $sr_header = qq~sr v1.2 - replaces characters in a text file. by Peter F. Brown; peterbrown\@worldcommunity.com Copyright 2001 Peter F. Brown. All Rights Reserved Worldwide. Open Source Software. [http://worldcommunity.com]~; if ($#ARGV == 0 and $ARGV[0] eq "-u") { print "\nUsing values in text file.\n"; # EDIT VALUES HERE IF YOU'RE NOT USING COMMAND LINE PARAMETERS ################################################################### # you can use regular expressions here, if you're brave. # NOTE: this version doesn't support $1 parenthesizing # (perhaps in the next version) # note for DOS users: be careful of the 8.3 and \ conventions # when you name your files. Otherwise, sr should work under DOS. # NOTE: Using single quotes may have a different effect. $input_file = ""; $search_string = ""; $replace_string = ""; $prompt_replace = "yes"; $save_backups = "yes"; $outfile = "outfile.sr"; $double_check = "no"; $case_sensitive = "no"; } ################ end of s/r editing ############################### elsif ($#ARGV == 0 and $ARGV[0] eq "-h") { &help_header; } elsif ($#ARGV == 2) { $input_file = $ARGV[0]; $search_string = $ARGV[1]; $replace_string = $ARGV[2]; $prompt_replace = 'yes'; $save_backups = 'yes'; $outfile = 'outfile.sr'; $double_check = 'no'; $case_sensitive = 'no'; } elsif ($#ARGV == 7) { $input_file = $ARGV[0]; $search_string = $ARGV[1]; $replace_string = $ARGV[2]; $prompt_replace = $ARGV[3]; $save_backups = $ARGV[4]; $outfile = $ARGV[5]; $double_check = $ARGV[6]; $case_sensitive = $ARGV[7]; } else { &help_header; } # convert input vars $prompt_replace = lc($prompt_replace); $save_backups = lc($save_backups); $double_check = lc($double_check); $case_sensitive = lc($case_sensitive); if ( $prompt_replace ne 'yes' and $prompt_replace ne 'no' ) { print "\nPrompt Replace must equal either 'yes' or 'no' or BLANK.\n"; print "If you leave it blank, it will default to 'YES'.\n"; print "Exiting ... \n\n"; exit; } if ( $save_backups ne 'yes' and $save_backups ne 'no' ) { print "\nSave Backups must equal either 'yes' or 'no' or BLANK.\n"; print "If you leave it blank, it will default to 'YES'.\n"; print "Exiting ... \n\n"; exit; } if ( $double_check ne 'yes' and $double_check ne 'no' ) { print "\nDouble Check must equal either 'yes' or 'no' or BLANK.\n"; print "If you leave it blank, it will default to 'NO'.\n"; print "Exiting ... \n\n"; exit; } if ( $case_sensitive ne 'yes' and $case_sensitive ne 'no' ) { print "\nCase Sensitive must equal either 'yes' or 'no' or BLANK.\n"; print "If you leave it blank, it will default to 'NO'.\n"; print "Exiting ... \n\n"; exit; } # check user input #............................. $| = 1; # check for Unix or DOS, for console input if (-e "/dev/tty") {$console = "/dev/tty";} else {$console = "con";} unless ( open(USER_PROMPT, "$console")) { print "Can't open console: $!\n"; exit; } #.............................. $process = "false"; while ($process eq "false") { print qq~ $sr_header You have specified the following: Input File: $input_file Search String: $search_string Replace String: $replace_string Prompt Replace: $prompt_replace (prompts at each replace) Save Backups: $save_backups Temp File: $outfile Double Check: $double_check (double checks each replace) Case Sensitive: $case_sensitive NOTE: 'case_sensitive' only applies to searching. The replace value will use the case of the 'replace_string'. NOTE: If Input File equals 'CURDIR', then all the TEXT files in the current directory and all of its subdirectories will be processed. NOTE: If Save Backups is set to 'yes', then the input file will be copied to $input_file\.bak In either case, the input file ($input_file) will be overwritten with the temp file, for 'in place' editing. Do you wish to continue (enter only "y" or "n")? ~; $continue = ; chop $continue; $continue = lc($continue); if ($continue eq "y") { $process = "true"; } elsif ($continue eq "n") { $process = "false"; close(USER_PROMPT); print "\n"; exit; } else { $process = "false"; } } close(USER_PROMPT); #........................................................................ #........................................................................ #... process ... $files_processed = 0; $grand_total_replaces = 0; $start = time; # open log file; use append mode unless ( open (LOG, ">>$log_file")) { print "Error opening log file ($log_file): $!\n"; exit; } print LOG "\nSR LOG File:\n\n"; print LOG qq~ SEARCHING USING THE FOLLOWING PARAMETERS: Input File: $input_file Search String: $search_string Replace String: $replace_string Prompt Replace: $prompt_replace (prompts at each replace) Save Backups: $save_backups Temp File: $outfile Double Check: $double_check (double checks each replace) Case Sensitive: $case_sensitive =========================================================================== ~; if ( $input_file eq 'CURDIR' ) { # we go into dir mode @DIRLIST = qw[.]; find(\&process_file, @DIRLIST); } else { &process_file('single_file'); } $end = time; $seconds = $end - $start; $minutes = $seconds / 60; print LOG "Processed $files_processed Files and $grand_total_replaces Replaces in $minutes minutes ($seconds seconds.)\n"; print "Processed $files_processed Files and $grand_total_replaces Replaces in $minutes minutes ($seconds seconds.)\n"; close(LOG); exit; ########################################################################### # process_file sub process_file { my ($file_mode) = @_; if ( $file_mode ne 'single_file' ) { $input_file = $_; $input_file_long = $File::Find::name; } $files_processed++; $good_ext = 'no'; foreach $file_extension ( @file_extensions ) { if ( $input_file =~ /\.$file_extension$/ ) { $good_ext = 'yes'; } } unless ( $good_ext eq 'yes' ) { print LOG "File $input_file_long does not end in an approved extension. Skipping.\n"; print "File $input_file_long does not end in an approved extension. Skipping.\n"; return; } if ( -d $input_file ) { print LOG "Skipping $input_file_long (directory.)\n"; print "Skipping $input_file_long (directory.)\n"; return; } unless ( -T $input_file ) { print LOG "Skipping $input_file_long (NOT a TEXT File.)\n"; print "Skipping $input_file_long (NOT a TEXT File.)\n"; return; } $backup_file = $input_file . '.BAK'; # get file info @info = stat($input_file); $file_mode = $info[2]; $file_uid = $info[4]; $file_gid = $info[5]; $permissions = sprintf ("%04o", $file_mode & 07777); $permissions = oct($permissions); # open input file unless ( open(IN_FILE, "< $input_file" )) { print LOG "Error with input file ($input_file_long): $!\n"; print "Error with input file ($input_file_long): $!\n"; return; } # open temp output file unless ( open (OUT_FILE, ">$outfile")) { print LOG "Error with output file ($outfile_long): $!\n"; print "Error with output file ($outfile_long): $!\n"; return; } $lines = 0; $replaces = 0; print LOG "\nProcessing $input_file_long\n"; print "\nProcessing $input_file_long\n"; # file looping here #............................ while () { $line = $_; $found = 'no'; if ( $case_sensitive eq 'yes' ) { if ( $line =~ /$search_string/ ) { $found = 'yes'; } } else { if ( $line =~ /$search_string/i ) { $found = 'yes'; } } if ( $found eq 'yes' ) { $line_check = $line; if ( $prompt_replace eq 'yes' ) { print LOG "\nPRIOR TO REPLACE:\n... [$line_check\]\n\n"; print "\nPRIOR TO REPLACE:\n... [$line_check\]\n\n"; $response = 'n'; $response = &user_prompt('Do you wish to replace these occurrences?'); if ( $response eq 'y' ) { if ( $case_sensitive eq 'yes' ) {$line_check =~ s/$search_string/$replace_string/g;} else {$line_check =~ s/$search_string/$replace_string/gi;} print LOG "\nAFTER REPLACE:\n... [$line_check\]\n\n"; print "\nAFTER REPLACE:\n... [$line_check\]\n\n"; if ( $double_check eq 'yes' ) { $response = 'n'; $response = &user_prompt('Was the replace done correctly?'); if ( $response eq 'y' ) { $line = $line_check; $replaces++; } else { print LOG "\nError in Replace. Line Replace Not Saved. Skipping.\n"; print "\nError in Replace. Line Replace Not Saved. Skipping.\n"; } } else { $line = $line_check; $replaces++; } } } # prompt replace equals yes else { # prompt replace equals no, so just go ahead and replace it if ( $case_sensitive eq 'yes' ) {$line_check =~ s/$search_string/$replace_string/g;} else {$line_check =~ s/$search_string/$replace_string/gi;} $line = $line_check; $replaces++; } } # line contains search string print OUT_FILE ($line); $lines++; } $_ = $input_file; # end of file looping #............................ close(IN_FILE); close(OUT_FILE); # rename files # restore permissions and ownership if ( $save_backups eq 'yes' ) { rename($input_file, $backup_file) or print LOG "ALERT! can't rename $input_file_long to $backup_file: $!\n"; chown ($file_uid, $file_gid, $backup_file) == 1 or print LOG "ALERT! can't chown $backup_file: $!\n"; chmod ($permissions, $backup_file) == 1 or print LOG "ALERT! can't chmod $backup_file: $!\n"; print LOG "Wrote backup to: $backup_file.\n"; print "Wrote backup to: $backup_file.\n"; } rename($outfile, $input_file) or print LOG "ALERT! can't rename $outfile to $input_file_long: $!\n"; chown ($file_uid, $file_gid, $input_file) == 1 or print LOG "ALERT! can't chown $input_file_long: $!\n"; chmod ($permissions, $input_file) == 1 or print LOG "ALERT! can't chmod $input_file_long: $!\n"; # finish if ($replaces > 0) { print LOG "Processed $replaces replaces of $search_string with $replace_string in $input_file_long.\n"; print "Processed $replaces replaces of $search_string with $replace_string in $input_file_long.\n"; } else { print LOG "No replaces done.\n"; print "No replaces done.\n"; } $grand_total_replaces = $grand_total_replaces + $replaces; print LOG "Lines processed: $lines\n\n"; print "Lines processed: $lines\n\n"; } ########################################################################### # help_header sub help_header { print qq~ $sr_header SR is quite fast. It processed 13,402,165 replaces, (13.4 million) (a 67 meg text file with 163,441 lines) in 1 minute, 10 seconds on a Pentium 166 with 64 megs of RAM. On one client's system it processed 13,029 files, with 7,487 replaces, in 11 seconds. Syntax: sr OPTIONAL: OPTIONAL: OPTIONAL: OPTIONAL: OPTIONAL: NOTE: 'case_sensitive' only applies to searching. The replace value will use the case of the 'replace_string'. NOTE: If Input File equals 'CURDIR', then all the TEXT files in the current directory and all of its subdirectories will be processed. NOTE: If Save Backups is set to 'yes', then the input file will be copied to $input_file\.bak In either case, the input file will be overwritten with the temp file, for 'in place' editing. Note: You can use regular expressions in your s/r values. Using single or double quotes may change your s/r values. REMEMBER: BACK UP YOUR FILE FIRST!!! ~; exit; } ########################################################################### # user_prompt sub user_prompt { # syntax: $response = &user_prompt($message); my ($message) = @_; my $prompt = "false"; my $continue; $message = $message . '("(y) yes", "(n) no")?' . "\n"; unless ( open(USER_PROMPT, "$console")) { print LOG "Can't open console: $!\n"; print "Can't open console: $!\n"; exit; } #.............................. while ($prompt eq "false") { print $message; $continue = ; chop $continue; $continue = lc($continue); if ($continue eq "y") { $prompt = "true"; } elsif ($continue eq "n") { $prompt = "true"; } else { $process = "false"; } } close(USER_PROMPT); return($continue); } ###########################################################################