source: trunk/npemap.org.uk/scripts/freethepostcode.org-importer/importer.py @ 644

Last change on this file since 644 was 644, checked in by sheldon, 11 years ago

With --no-confirm, don't delete more than 100 postcodes, same as we don't add more than 100

  • Property svn:executable set to *
File size: 7.6 KB
Line 
1#!/usr/bin/python
2#
3# Copyright (c) 2006-2007 Nick Burch and Dominic Hargreveaves
4# Permission is hereby granted, free of charge, to any person obtaining a
5# copy of this software and associated documentation files (the "Software"),
6# to deal in the Software without restriction, including without limitation
7# the rights to use, copy, modify, merge, publish, distribute, sublicense,
8# and/or sell copies of the Software, and to permit persons to whom the
9# Software is furnished to do so, subject to the following conditions:
10#
11# The above copyright notice and this permission notice shall be included in
12# all copies or substantial portions of the Software.
13#
14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20# IN THE SOFTWARE.
21#
22#               Importer from FreeThePostcode.org
23#               ---------------------------------
24#
25# Imports FreeThePostcode data into the current schema, removing any
26#  existing FreeThePostcode data.
27#
28# You will need to tweak this script with your database settings for it
29#  to work.
30
31from pyPgSQL import PgSQL
32from geo_helper import turn_wgs84_into_osgb36, turn_osgb36_into_eastingnorthing, \
33                                                turn_wgs84_into_osie36, turn_osie36_into_eastingnorthing
34import os
35import sys
36
37# Database settings
38dbtype = "postgres"
39dbname = "npemap"
40dbhost = ""
41dbuser = "npemap"
42dbpass = ""
43
44
45# What arguments did they pass in?
46verbose = False
47download = False
48confirm_update = True
49for arg in sys.argv[1:]: 
50        if arg == "--verbose":
51                verbose = True
52        if arg == "--download":
53                download = True
54        if arg == "--no-confirm":
55                confirm_update = False
56
57
58# Connect to the database
59dbh = None
60if dbtype == "pgsql" or dbtype == "postgres" or dbtype == "postgresql":
61        if len(dbhost):
62                dbh = PgSQL.connect(database=dbname, host=dbhost, user=dbuser, password=dbpass)
63        else:
64                dbh = PgSQL.connect(database=dbname, user=dbuser, password=dbpass)
65else:
66        raise Exception("Unknown dbtype %s" % dbtype)
67
68
69# Check what source value FreeThePostcode data will have
70ftp_source_name = "FreeThePostcode.org Importer"
71ftp_source = None
72
73sql = "SELECT id FROM sources WHERE name = %s"
74sth = dbh.cursor()
75sth.execute(sql, ftp_source_name)
76ids = sth.fetchall()
77sth.close()
78if len(ids) == 1:
79        for id in (ids):
80                ftp_source = id.id
81else:
82        print "Unable to find ID for source '%s' - error code %d" % (ftp_source_name, len(ids))
83
84
85# See if our delete reason exists, and if no, add it
86delete_reason = None
87reason_text = "Gone from FTP"
88while delete_reason == None:
89        sql = "SELECT id FROM delete_reasons WHERE reason = %s"
90        sth = dbh.cursor()
91        sth.execute(sql, reason_text)
92        ids = sth.fetchall()
93        sth.close()
94        if len(ids) == 1:
95                for id in (ids):
96                        delete_reason = id.id
97        else:
98                sth = dbh.cursor()
99                sth.execute("INSERT INTO delete_reasons (reason) VALUES (%s)", reason_text)
100                sth.close()
101
102
103# Download the latest list of postcodes if needed
104current_file = None
105if os.path.isfile("currentlist"):
106        current_file = "currentlist"
107if os.path.isfile("/tmp/currentlist"):
108        current_file = "/tmp/currentlist"
109
110if not download and not current_file == None:
111        print "Data found, do you wish to re-download?"
112        redownload = raw_input("")
113        if redownload == "y" or redownload == "yes":
114                download = True
115else:
116        download = True
117
118if download:
119        url = "http://www.freethepostcode.org/currentlist"
120        if verbose:
121                print "Downloading from %s" % url
122        os.system("wget --quiet -O currentlist '%s'" % url)
123        current_file = "currentlist"
124        if verbose:
125                print ""
126
127
128# Read in the new list
129postcodes = []
130ftpc = open(current_file, 'r')
131for line in ftpc:
132        line = line[0:-1]
133        if line[0:1] == "#":
134                continue
135
136        # Replace double spaces
137        line = line.replace("  "," ")
138
139        parts = line.split(" ")
140        if not len(parts) == 4:
141                print "Invalid line '%s'" % line
142
143        pc = {}
144        pc["outer"] = parts[2]
145        pc["inner"] = parts[3]
146        pc["raw"] = "%s %s" % (parts[2],parts[3])
147        pc["raw_outer"] = parts[2]
148        pc["raw_inner"] = parts[3]
149
150        pc["easting"] = None
151        pc["northing"] = None
152        pc["ie_easting"] = None
153        pc["ie_northing"] = None
154
155        # Turn lat+long into easting+northing
156        # All NI postcodes are BT
157        if pc["outer"][0:2] == 'BT':
158                osll = turn_wgs84_into_osie36(parts[0], parts[1], 0)
159                en = turn_osie36_into_eastingnorthing(osll[0], osll[1])
160                pc["ie_easting"] = en[0]
161                pc["ie_northing"] = en[1]
162        else:
163                osll = turn_wgs84_into_osgb36(parts[0], parts[1], 0)
164                en = turn_osgb36_into_eastingnorthing(osll[0], osll[1])
165                pc["easting"] = en[0]
166                pc["northing"] = en[1]
167
168        postcodes.append(pc)
169
170
171# Grab all of the current ones
172sql = "SELECT id, outward, inward, deleted FROM postcodes WHERE source = %s AND NOT deleted"
173sth = dbh.cursor()
174sth.execute(sql, ftp_source)
175
176ftps = {}
177while 1:
178        row = sth.fetchone()
179        if row == None:
180                break
181        id, outward, inward, deleted = row
182        ftps["%s %s" % (outward, inward)] = \
183                                { 'outward':outward, 'inward':inward, 'id':id, 'deleted':deleted, 'done':False }
184sth.close()
185count = len(ftps.keys())
186
187print "There are currently %d entries in the database from freethepostcode.org" % count
188print "The new import contains %d entries" % len(postcodes)
189
190# Only prompt if it's a big difference
191if confirm_update:
192        if (count == 0 or len(postcodes) == 0 or 
193                        len(postcodes) < count or (len(postcodes)-count) > 50):
194                print "Are you sure you wish to run an import?"
195                confirm = raw_input("")
196                print ""
197
198                if confirm == "y" or confirm == "yes":
199                        # Good, go ahead
200                        pass
201                else:
202                        print ""
203                        raise Exception("Aborting import")
204else:
205        # Don't trash everything even with --no-confirm
206        if len(postcodes) == 0 or abs(len(postcodes)-count) > 100:
207                raise Exception("Postcode count too different, not running (re-run without --no-confirm to allow")
208
209
210# Add the latest list to the database, tweaking if already there
211add_sql = "INSERT INTO postcodes (outward, inward, raw_postcode_outward, raw_postcode_inward, easting, northing, ie_easting, ie_northing, source) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
212upd_sql = "UPDATE postcodes SET outward=%s, inward=%s, raw_postcode_outward=%s, raw_postcode_inward=%s, easting=%s, northing=%s, ie_easting=%s, ie_northing=%s WHERE id=%s"
213del_sql = "UPDATE postcodes SET deleted='t', delete_reason=%s WHERE id=%s"
214sth = dbh.cursor()
215worked = 0
216
217# Add the postcodes
218for postcode in postcodes:
219        pc = "%s %s" % (postcode["outer"], postcode["inner"])
220        if verbose:
221                print "Processing %s" % pc
222
223        if ftps.has_key(pc):
224                # Update existing one
225                ftps[pc]['done'] = True
226                sth.execute(upd_sql, (postcode["outer"], postcode["inner"], postcode["raw_outer"], postcode["raw_inner"], postcode["easting"], postcode["northing"], postcode["ie_easting"], postcode["ie_northing"], ftps[pc]['id']))
227                if verbose:
228                        print "\tupdated record at %d" % ftps[pc]['id']
229        else:
230                # New record, add
231                sth.execute(add_sql, (postcode["outer"], postcode["inner"], postcode["raw_outer"], postcode["raw_inner"], postcode["easting"], postcode["northing"], postcode["ie_easting"], postcode["ie_northing"], ftp_source))
232                if verbose:
233                        print "\tadded new postcode"
234                else:
235                        print "Added postcode %s" % pc
236        worked = worked + 1
237
238print "Processed %d entries" % worked
239
240
241# Find ones that have gone
242if verbose:
243        print "\nDeleting any postcodes no longer in FTP:"
244for gone_pc in [ pc for pc in ftps.keys() if not ftps[pc]['done'] ]:
245        print "\tflagging as deleted old FTP postcode %s" % gone_pc
246        sth.execute(del_sql, (delete_reason, ftps[gone_pc]['id']))
247
248# All done
249if verbose:
250        print "\nAll Done"
251sth.close()
252dbh.commit()
253dbh.close()
Note: See TracBrowser for help on using the repository browser.