source: trunk/npemap.org.uk/scripts/freethepostcode.org-importer/importer.py @ 127

Last change on this file since 127 was 127, checked in by Nick Burch, 15 years ago

Update for new schema. Don't require a prompt about the change if it's a sensible, small one

  • Property svn:executable set to *
File size: 3.9 KB
Line 
1#!/usr/bin/python
2#               Importer from FreeThePostcode.org
3#               ---------------------------------
4#
5# Imports FreeThePostcode data into the current schema, removing any
6#  existing FreeThePostcode data.
7#
8# You will need to tweak this script with your database settings for it
9#  to work.
10
11from pyPgSQL import PgSQL
12from geo_helper import turn_wgs84_into_osgb36, turn_osgb36_into_eastingnorthing
13import os
14
15# Database settings
16dbtype = "postgres"
17dbname = "npemaps"
18dbhost = "localhost"
19dbuser = "npemaps"
20dbpass = "npemaps"
21
22
23# Connect to the database
24dbh = None
25if dbtype == "pgsql" or dbtype == "postgres" or dbtype == "postgresql":
26        if len(dbhost):
27                dbh = PgSQL.connect(database=dbname, host=dbhost, user=dbuser, password=dbpass)
28        else:
29                dbh = PgSQL.connect(database=dbname, user=dbuser, password=dbpass)
30else:
31        raise Exception("Unknown dbtype %s" % dbtype)
32
33
34# Check what source value FreeThePostcode data will have
35ftp_source_name = "FreeThePostcode.org Importer"
36ftp_source = None
37
38sql = "SELECT id FROM sources WHERE name = %s"
39sth = dbh.cursor()
40sth.execute(sql, ftp_source_name)
41ids = sth.fetchall()
42sth.close()
43if len(ids) == 1:
44        for id in (ids):
45                ftp_source = id.id
46else:
47        print "Unable to find ID for source '%s' - error code %d" % (ftp_source_name, len(ids))
48
49
50# Download the latest list of postcodes if needed
51download = False
52current_file = None
53if os.path.isfile("currentlist"):
54        current_file = "currentlist"
55if os.path.isfile("/tmp/currentlist"):
56        current_file = "/tmp/currentlist"
57
58if not current_file == None:
59        print "Data found, do you wish to re-download?"
60        redownload = raw_input("")
61        if redownload == "y" or redownload == "yes":
62                download = True
63else:
64        download = True
65
66if download:
67        url = "http://www.freethepostcode.org/currentlist"
68        print "Downloading from %s" % url
69        os.system("wget -O currentlist '%s'" % url)
70        current_file = "currentlist"
71print ""
72
73
74# Read in the new list
75postcodes = []
76ftpc = open(current_file, 'r')
77for line in ftpc:
78        line = line[0:-1]
79        if line[0:1] == "#":
80                continue
81
82        parts = line.split(" ")
83        if not len(parts) == 4:
84                print "Invalid line '%s'" % line
85
86        # Turn lat+long into easting+northing
87        osll = turn_wgs84_into_osgb36(parts[0], parts[1], 0)
88        en = turn_osgb36_into_eastingnorthing(osll[0], osll[1])
89
90        pc = {}
91        pc["easting"] = en[0]
92        pc["northing"] = en[1]
93        pc["outer"] = parts[2]
94        pc["inner"] = parts[3]
95        pc["raw"] = "%s %s" % (parts[2],parts[3])
96        pc["raw_outer"] = parts[2]
97        pc["raw_inner"] = parts[3]
98
99        postcodes.append(pc)
100
101
102# Prompt before removing all the current ones
103sql = "SELECT COUNT(*) AS count FROM postcodes WHERE source = %s"
104sth = dbh.cursor()
105sth.execute(sql, ftp_source)
106counts = sth.fetchall()
107count = counts[0][0]
108sth.close()
109
110print "There are currently %d entries in the database from freethepostcode.org" % count
111print "The new import contains %d entries" % len(postcodes)
112
113# Only prompt if it's a big difference
114if count == 0 or len(postcodes) == 0 or len(postcodes) < count or (len(postcodes)-count) > 50:
115        print "Are you sure you wish to remove the old entries, to add new ones?"
116        confirm = raw_input("")
117        print ""
118
119        if confirm == "y" or confirm == "yes":
120                # Good, go ahead
121                pass
122        else:
123                print ""
124                raise Exception("Aborting import")
125
126
127# Delete the old entries
128sql = "DELETE FROM postcodes WHERE source = %s"
129sth = dbh.cursor()
130sth.execute(sql, ftp_source)
131print "Deleted %d entries" % sth.rowcount
132sth.close()
133dbh.commit()
134
135
136# Add the latest list to the database
137sql = "INSERT INTO postcodes (outward, inward, raw_postcode_outward, raw_postcode_inward, easting, northing, source) VALUES (%s, %s, %s, %s, %s, %s, %s)"
138sth = dbh.cursor()
139worked = 0
140for postcode in postcodes:
141        sth.execute(sql, (postcode["outer"], postcode["inner"], postcode["raw_outer"], postcode["raw_inner"], postcode["easting"], postcode["northing"], ftp_source))
142        worked = worked + sth.rowcount
143print "Added %d entries" % worked
144
145
146# All done
147sth.close()
148dbh.commit()
149dbh.close()
Note: See TracBrowser for help on using the repository browser.