#!/usr/bin/perl # sr.cgi ############################################################################ # POD Documentation =head1 PROGRAM NAME AND AUTHOR SR - Version 1.0 Build Date: May 20, 1998 sr.cgi by Peter F. Brown peterbrown@futurerealm.com =head1 WHAT IT IS sr (search and replace) (ok, it's not an original name...) replaces characters in a file in chunks of bytes It's quite fast. It processed 13,402,165 replaces, (13.4 million) (a 67 meg text file with 163,441 lines) in 1 minute, 10 seconds on a Pentium 166 with 64 megs of RAM. As an alternate method, I recommend using VEDIT, the fastest huge file text editor in the world, at 'www.vedit.com'. HELP: type "sr -h for help" edit s/r values below if you're not using command line parameters see "REPLACE COUNTING SECTION" below and uncomment that line if you want to get a count of the replaces (not just the lines processed) (doing so increases the processing time by a factor of 2.5) =head1 GOTCHAS IN NEED OF IMPROVEMENT Because it replaces data in arbitrary chunks, it may miss a phrase that spans both sides of the chunk. For example: 'my phrase' might have 'my' on one side of the chunk and 'phrase' on the other. Thus, it would be missed by the program. This needs improvement... =head1 COPYRIGHT Copyright 1998 Peter F. Brown (FutureRealm Productions) FutureSQL complies with the GNU GENERAL PUBLIC LICENSE and is released as "Open Source Software". NO WARRANTY IS OFFERED FOR THE USE OF THIS SOFTWARE ! Just remember. Back up your file first! (when you mess with huge data sets, please do save yourself grief and backup the file up :-) =head1 BUG REPORTS AND SUPPORT Send bug reports to peterbrown@futurerealm.com. Visit the author's web site at 'worldcommunity.com' to view information about support, customer quotes, a resume link, and fees for custom Perl/MySQL programming. =head1 OBTAINING THE LATEST VERSION ==> Get the most recent version of this program at: http://futurerealm.com/opensource =head1 REQUIREMENTS Perl 5 =cut ############################################################################## if ($#ARGV == 0 and $ARGV[0] eq "-u") { print "\nUsing values in text file.\n"; # EDIT VALUES HERE IF YOU'RE NOT USING COMMAND LINE PARAMETERS ################################################################### # you can use regular expressions here, if you're brave. # NOTE: this version doesn't support $1 parenthesizing # (perhaps in the next version) $input_file = ""; $outfile = "outfile.sr"; # note for DOS users: be careful of the 8.3 and \ conventions # when you name your files. Otherwise, sr should work under DOS. # NOTE: Using single quotes may have a different effect. $search_string = ""; $replace_string = ""; } ################ end of s/r editing ############################### elsif ($#ARGV == 0 and $ARGV[0] eq "-h") { &help_header; } elsif ($#ARGV == 2) { $input_file = $ARGV[0]; $search_string = $ARGV[1]; $replace_string = $ARGV[2]; $outfile = "outfile.sr"; } elsif ($#ARGV == 3) { $input_file = $ARGV[0]; $search_string = $ARGV[1]; $replace_string = $ARGV[2]; $outfile = $ARGV[3]; } else { &help_header; } # check user input #............................. $| = 1; # check for Unix or DOS, for console input if (-e "/dev/tty") {$console = "/dev/tty";} else {$console = "con";} open(USER_PROMPT, "$console") || die print "Can't open $console.\n"; #.............................. $process = "false"; while ($process eq "false") { print qq~ sr v1.0 - replaces characters in a text file. by Peter F. Brown; peterbrown\@futurerealm.com Copyright 1998 Peter F. Brown. All Rights Reserved Worldwide. Open Source Software. [http://futurerealm.com/opensource] You have specified the following: Input File: $input_file Output File: $outfile Search String: $search_string Replace String: $replace_string Do you wish to continue (enter only "y" or "n")? ~; $continue = ; chop $continue; $continue = lc($continue); if ($continue eq "y") { $process = "true"; } elsif ($continue eq "n") { $process = "false"; close(USER_PROMPT); print "\n"; exit; } else { $process = "false"; } } close(USER_PROMPT); #..................................... # process... $start = time; # open input file unless ( open(IN_FILE, $input_file )) { die print "Error with input file. sr -h for help.\n"; } # open output file unless ( open (OUT_FILE, ">$outfile")) { die print "Error with output file. sr -h for help.\n"; } $filesize = -s $input_file; $chunk = 0; # reading in 1 meg chunks seems to be an optimized amount for speed $chunksize = 1000000; $processed = 0; $repnum = 0; print "\nProcessing $input_file\n\n"; # file looping here #............................ while (read IN_FILE, $line, $chunksize) { # "REPLACE COUNTING SECTION" # uncomment this line if you want to get a count of the replaces # (doing so increases the processing time by a factor of 2.5) #....................................................................... # while ( $line =~ /$search_string/g ) {$repnum++;} #....................................................................... $line =~ s/$search_string/$replace_string/g; print OUT_FILE ($line); $chunk++; $processed = $chunk * $chunksize; if ($processed > $filesize) {$processed = $filesize;} print "Processed $processed of $filesize\n"; } # end of file looping #............................ close(IN_FILE); close(OUT_FILE); $end = time; $seconds = $end - $start; $minutes = $seconds / 60; print "\nReplaced $search_string with $replace_string in FILE: $input_file.\n"; print "Wrote output to NEWFILE: $outfile.\n\n"; if ($repnum > 0) { print "Processed $repnum replaces in $minutes minutes ($seconds seconds.)\n"; } else { print "Processed in $minutes minutes ($seconds seconds.)\n"; } exit; # end ###################### sub help_header { print qq~ sr v1.0 - replaces characters in a text file. by Peter F. Brown; peterbrown\@futurerealm.com Copyright 1998 Peter F. Brown. All Rights Reserved Worldwide. Open Source Software. [http://futurerealm.com/opensource] It's quite fast. It processed 13,402,165 replaces, (13.4 million) (a 67 meg text file with 163,441 lines) in 1 minute, 10 seconds on a Pentium 166 with 64 megs of RAM. Syntax: "sr " equals "outfile.sr" if not specified. Note: You can use regular expressions in your s/r values. Using single or double quotes may change your s/r values. You can type the s/r values into the "sr" program file if you wish to use complicated regular expressions. (Although sr doesn't support \$1 parenthesizing - yet.) "sr -u" to use values in file. "sr -h" for this help message. REMEMBER: BACK UP YOUR FILE FIRST!!! ~; exit; } ###########################################################################