1 | #!/usr/bin/perl |
---|
2 | # |
---|
3 | # Notes on usage: |
---|
4 | # This is fairly rough and ready. I lost the will to refine it any further |
---|
5 | # since it's essentially a one-shot affair. |
---|
6 | # You need to run it in two passes: |
---|
7 | # - import-postuk < postuk.csv |
---|
8 | # - import-postuk multi < postuk.csv |
---|
9 | # This is because many of the dupe districts are in the lines where one |
---|
10 | # town is listed for many districts and we prefer not to use them |
---|
11 | # If we add them last they'll be caught by the uniqueness requirement |
---|
12 | # of the DB |
---|
13 | # |
---|
14 | # There are a few other cases in the data I'm using where there are dupes |
---|
15 | # We don't really need to care that much about them. |
---|
16 | |
---|
17 | use strict; |
---|
18 | use warnings; |
---|
19 | |
---|
20 | use FindBin; |
---|
21 | use Text::CSV; |
---|
22 | |
---|
23 | # Find our private perl libraries |
---|
24 | use lib "$FindBin::Bin/../perllib"; |
---|
25 | use NPEMap::Postcodes; |
---|
26 | use NPEMap; |
---|
27 | use Data::Dumper; |
---|
28 | |
---|
29 | my $multi; |
---|
30 | |
---|
31 | if ($ARGV[0] && ($ARGV[0] eq 'multi')) { |
---|
32 | $multi = 1; |
---|
33 | } |
---|
34 | |
---|
35 | my $dbh = setup_dbh() or die $!; |
---|
36 | my $csv = Text::CSV->new; |
---|
37 | |
---|
38 | my $sth = $dbh->prepare('INSERT INTO districts (outward, district,county) VALUES (?, ?, ?)'); |
---|
39 | |
---|
40 | #"Aberdeen","Aberdeenshire","AB1, AB2","Aberdeen","Scotland","" |
---|
41 | #"Blackburn","Lancashire","BB1, BB2 ... BB6","Blackburn","England","" |
---|
42 | |
---|
43 | while (<STDIN>) { |
---|
44 | my $rawline = $_; |
---|
45 | # God this data is shoddy |
---|
46 | s// /; |
---|
47 | $csv->parse($_); |
---|
48 | my @fields = $csv->fields; |
---|
49 | if ($fields[0] and $fields[2]) { |
---|
50 | $fields[2] =~ s/(\w) (\w)/$1, $2/; |
---|
51 | $fields[2] =~ s/(\w),(\w)/$1, $2/; |
---|
52 | my @codes = split /, /, $fields[2]; |
---|
53 | if ((scalar(@codes) > 1) or $fields[2] =~ /\.\.\./) { |
---|
54 | next unless $multi; |
---|
55 | } else { |
---|
56 | next if $multi; |
---|
57 | } |
---|
58 | foreach my $code (@codes) { |
---|
59 | if ($code =~ /(\S+) \.\.\. (\S+)/) { |
---|
60 | my $first = $1; |
---|
61 | my $second = $2; |
---|
62 | $first =~ /(\w+?)(\d+)/; |
---|
63 | my $first_alpha = $1; |
---|
64 | my $first_num = $2; |
---|
65 | $second =~ /(\w+?)(\d+)/; |
---|
66 | my $second_alpha = $1; |
---|
67 | my $second_num = $2; |
---|
68 | $first_alpha =~ s/\W+//; |
---|
69 | $second_alpha =~ s/\W+//; |
---|
70 | die "bad loop" if ($first_alpha ne $second_alpha); |
---|
71 | for ($first_num .. $second_num) { |
---|
72 | $_ =~ s/\W+//; |
---|
73 | my $munged_code = $first_alpha . $_; |
---|
74 | if (!$sth->execute($munged_code, $fields[0], $fields[1])) { |
---|
75 | print STDERR $munged_code . "\n"; |
---|
76 | } |
---|
77 | |
---|
78 | } |
---|
79 | } else { |
---|
80 | $code =~ s/\W+//; |
---|
81 | if (!$sth->execute($code, $fields[0], $fields[1])) { |
---|
82 | print STDERR $code . "\n"; |
---|
83 | } |
---|
84 | } |
---|
85 | } |
---|
86 | } |
---|
87 | } |
---|
88 | |
---|
89 | $dbh->disconnect; |
---|