Thursday, February 27, 2014

A Data Processing Script for Lucy Li

~/doc/Lucy/processData.pl.html
 1 #!/usr/bin/perl -w
 2 #===============================================================================
 3 #
 4 #         FILE:  processData.pl
 5 #
 6 #        USAGE:  perl processData.pl datafile
 7 #
 8 #  DESCRIPTION:  for Lucy Li. 
 9 #                valid data are those lines with 9 columns
10 #
11 #      OPTIONS:  ---
12 #  ASSUMPTIONS:  1. valid data are those rows with at least 5 columns
13 #                2. all valid data patterns repeat exact the same times
14 #                3. column 5 is the data to be extracted
15 #                4. column 1 and 2 are pattern names
16 #         BUGS:  ---
17 #        NOTES:  ---
18 #       AUTHOR:   Lucy Li
19 #      COMPANY:  
20 #      VERSION:  1.0
21 #      CREATED:  02/03/14 15:36:11 PST
22 #     REVISION:  ---
23 #===============================================================================
24 
25 use strict;
26 use warnings;
27 
28 $, = ",";  #create csv file
29 my $datafile = shift || die "$!\n";
30 my $firstData = 0;   #hold first data pattern
31 my $firstDataFound = 0;  #used for header print
32 my %testData = ();   #hole test data
33 my $headerPrinted = 0;
34 open(DF,$datafile) || die "$!\n";
35  while(){
36   s/"//g;
37   my @data = split(/,/,$_);
38   #ignore those rows first column is not 4 digits
39   if($data[0] =~ m/^\d\d\d\d$/){
40    my $header = $data[0] . "(" . $data[1] . ")";
41    if($firstDataFound == 0 ){
42       $firstData = $data[0];
43       $firstDataFound = 1;
44    }else {
45       if($data[0] eq $firstData){
46           #print header
47        if($headerPrinted ==0){
48         print sort keys(%testData);
49         print "\n";
50         $headerPrinted = 1;
51       }
52        foreach my $value (sort keys(%testData)){
53         print "$testData{$value},";
54        }
55        print "\n";
56       }
57   }
58     $testData{$header} = $data[4];
59   }
60  }
61  foreach my $value (sort keys(%testData)){
62   print "$testData{$value},";
63  }
64  #print values(%testData);
65  print "\n";
66 close(DF);

No comments:

Post a Comment