source: trunk/npemap.org.uk/scripts/postuk/import-postuk @ 332

Last change on this file since 332 was 332, checked in by Dominic Hargreaves, 13 years ago

meh, I moved the script

  • Property svn:executable set to *
File size: 2.6 KB
Line 
1#!/usr/bin/perl
2#
3# Notes on usage:
4# This is fairly rough and ready. I lost the will to refine it any further
5# since it's essentially a one-shot affair.
6# You need to run it in two passes:
7# - import-postuk < postuk.csv
8# - import-postuk multi < postuk.csv
9# This is because many of the dupe districts are in the lines where one
10# town is listed for many districts and we prefer not to use them
11# If we add them last they'll be caught by the uniqueness requirement
12# of the DB
13#
14# There are a few other cases in the data I'm using where there are dupes
15# We don't really need to care that much about them.
16
17use strict;
18use warnings;
19
20use FindBin;
21use Text::CSV;
22
23# Find our private perl libraries
24use lib "$FindBin::Bin/../../perllib";
25use NPEMap::Postcodes;
26use NPEMap;
27use Data::Dumper;
28
29my $multi;
30
31if ($ARGV[0] && ($ARGV[0] eq 'multi')) {
32    $multi = 1;
33}
34
35my $dbh = setup_dbh() or die $!;
36my $csv = Text::CSV->new;
37
38my $sth = $dbh->prepare('INSERT INTO districts (outward, district,county) VALUES (?, ?, ?)');
39
40#"Aberdeen","Aberdeenshire","AB1, AB2","Aberdeen","Scotland",""
41#"Blackburn","Lancashire","BB1, BB2 ... BB6","Blackburn","England",""
42
43while (<STDIN>) {
44    my $rawline = $_;
45    # God this data is shoddy
46    s// /;
47    $csv->parse($_);
48    my @fields = $csv->fields;
49    if ($fields[0] and $fields[2]) {
50        $fields[2] =~ s/(\w) (\w)/$1, $2/;
51        $fields[2] =~ s/(\w),(\w)/$1, $2/;
52        my @codes = split /, /, $fields[2];
53        if ((scalar(@codes) > 1) or $fields[2] =~ /\.\.\./) {
54            next unless $multi;
55        } else {
56            next if $multi;
57        }
58        foreach my $code (@codes) {
59            if ($code =~ /(\S+) \.\.\. (\S+)/) {
60                my $first = $1;
61                my $second = $2;
62                $first =~ /(\w+?)(\d+)/;
63                my $first_alpha = $1;
64                my $first_num = $2;
65                $second =~ /(\w+?)(\d+)/;
66                my $second_alpha = $1;
67                my $second_num = $2;
68                $first_alpha =~ s/\W+//;
69                $second_alpha =~ s/\W+//;
70                die "bad loop" if ($first_alpha ne $second_alpha);
71                for ($first_num .. $second_num) {
72                    $_ =~ s/\W+//;
73                    my $munged_code = $first_alpha . $_;
74                    if (!$sth->execute($munged_code, $fields[0], $fields[1])) {
75                        print STDERR $munged_code . "\n";
76                    }
77
78                }
79            } else {
80                $code =~ s/\W+//;
81                if (!$sth->execute($code, $fields[0], $fields[1])) {
82                    print STDERR $code . "\n";
83                }
84            }
85        }
86    }       
87}
88
89$dbh->disconnect;
Note: See TracBrowser for help on using the repository browser.