1 #!/usr/bin/perl -w 2 #=============================================================================== 3 # 4 # FILE: processData.pl 5 # 6 # USAGE: perl processData.pl datafile 7 # 8 # DESCRIPTION: for Lucy Li. 9 # valid data are those lines with 9 columns 10 # 11 # OPTIONS: --- 12 # ASSUMPTIONS: 1. valid data are those rows with at least 5 columns 13 # 2. all valid data patterns repeat exact the same times 14 # 3. column 5 is the data to be extracted 15 # 4. column 1 and 2 are pattern names 16 # BUGS: --- 17 # NOTES: --- 18 # AUTHOR: Lucy Li 19 # COMPANY: 20 # VERSION: 1.0 21 # CREATED: 02/03/14 15:36:11 PST 22 # REVISION: --- 23 #=============================================================================== 24 25 use strict; 26 use warnings; 27 28 $, = ","; #create csv file 29 my $datafile = shift || die "$!\n"; 30 my $firstData = 0; #hold first data pattern 31 my $firstDataFound = 0; #used for header print 32 my %testData = (); #hole test data 33 my $headerPrinted = 0; 34 open(DF,$datafile) || die "$!\n"; 35 while(){ 36 s/"//g; 37 my @data = split(/,/,$_); 38 #ignore those rows first column is not 4 digits 39 if($data[0] =~ m/^\d\d\d\d$/){ 40 my $header = $data[0] . "(" . $data[1] . ")"; 41 if($firstDataFound == 0 ){ 42 $firstData = $data[0]; 43 $firstDataFound = 1; 44 }else { 45 if($data[0] eq $firstData){ 46 #print header 47 if($headerPrinted ==0){ 48 print sort keys(%testData); 49 print "\n"; 50 $headerPrinted = 1; 51 } 52 foreach my $value (sort keys(%testData)){ 53 print "$testData{$value},"; 54 } 55 print "\n"; 56 } 57 } 58 $testData{$header} = $data[4]; 59 } 60 } 61 foreach my $value (sort keys(%testData)){ 62 print "$testData{$value},"; 63 } 64 #print values(%testData); 65 print "\n"; 66 close(DF);
Thursday, February 27, 2014
A Data Processing Script for Lucy Li
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment