source: trunk/npemap.org.uk/scripts/freethepostcode.org-importer/importer.py @ 622

Last change on this file since 622 was 622, checked in by Dominic Hargreaves, 12 years ago

make some more messages verbose-only

  • Property svn:executable set to *
File size: 6.5 KB
Line 
1#!/usr/bin/python
2#               Importer from FreeThePostcode.org
3#               ---------------------------------
4#
5# Imports FreeThePostcode data into the current schema, removing any
6#  existing FreeThePostcode data.
7#
8# You will need to tweak this script with your database settings for it
9#  to work.
10
11from pyPgSQL import PgSQL
12from geo_helper import turn_wgs84_into_osgb36, turn_osgb36_into_eastingnorthing, \
13                                                turn_wgs84_into_osie36, turn_osie36_into_eastingnorthing
14import os
15import sys
16
17# Database settings
18dbtype = "postgres"
19dbname = "npemap"
20dbhost = ""
21dbuser = "npemap"
22dbpass = ""
23
24
25# What arguments did they pass in?
26verbose = False
27download = False
28confirm_update = True
29for arg in sys.argv[1:]: 
30        if arg == "--verbose":
31                verbose = True
32        if arg == "--download":
33                download = True
34        if arg == "--no-confirm":
35                confirm_update = False
36
37
38# Connect to the database
39dbh = None
40if dbtype == "pgsql" or dbtype == "postgres" or dbtype == "postgresql":
41        if len(dbhost):
42                dbh = PgSQL.connect(database=dbname, host=dbhost, user=dbuser, password=dbpass)
43        else:
44                dbh = PgSQL.connect(database=dbname, user=dbuser, password=dbpass)
45else:
46        raise Exception("Unknown dbtype %s" % dbtype)
47
48
49# Check what source value FreeThePostcode data will have
50ftp_source_name = "FreeThePostcode.org Importer"
51ftp_source = None
52
53sql = "SELECT id FROM sources WHERE name = %s"
54sth = dbh.cursor()
55sth.execute(sql, ftp_source_name)
56ids = sth.fetchall()
57sth.close()
58if len(ids) == 1:
59        for id in (ids):
60                ftp_source = id.id
61else:
62        print "Unable to find ID for source '%s' - error code %d" % (ftp_source_name, len(ids))
63
64
65# See if our delete reason exists, and if no, add it
66delete_reason = None
67reason_text = "Gone from FTP"
68while delete_reason == None:
69        sql = "SELECT id FROM delete_reasons WHERE reason = %s"
70        sth = dbh.cursor()
71        sth.execute(sql, reason_text)
72        ids = sth.fetchall()
73        sth.close()
74        if len(ids) == 1:
75                for id in (ids):
76                        delete_reason = id.id
77        else:
78                sth = dbh.cursor()
79                sth.execute("INSERT INTO delete_reasons (reason) VALUES (%s)", reason_text)
80                sth.close()
81
82
83# Download the latest list of postcodes if needed
84current_file = None
85if os.path.isfile("currentlist"):
86        current_file = "currentlist"
87if os.path.isfile("/tmp/currentlist"):
88        current_file = "/tmp/currentlist"
89
90if not download and not current_file == None:
91        print "Data found, do you wish to re-download?"
92        redownload = raw_input("")
93        if redownload == "y" or redownload == "yes":
94                download = True
95else:
96        download = True
97
98if download:
99        url = "http://www.freethepostcode.org/currentlist"
100        if verbose:
101                print "Downloading from %s" % url
102        os.system("wget --quiet -O currentlist '%s'" % url)
103        current_file = "currentlist"
104print ""
105
106
107# Read in the new list
108postcodes = []
109ftpc = open(current_file, 'r')
110for line in ftpc:
111        line = line[0:-1]
112        if line[0:1] == "#":
113                continue
114
115        # Replace double spaces
116        line = line.replace("  "," ")
117
118        parts = line.split(" ")
119        if not len(parts) == 4:
120                print "Invalid line '%s'" % line
121
122        pc = {}
123        pc["outer"] = parts[2]
124        pc["inner"] = parts[3]
125        pc["raw"] = "%s %s" % (parts[2],parts[3])
126        pc["raw_outer"] = parts[2]
127        pc["raw_inner"] = parts[3]
128
129        pc["easting"] = None
130        pc["northing"] = None
131        pc["ie_easting"] = None
132        pc["ie_northing"] = None
133
134        # Turn lat+long into easting+northing
135        # All NI postcodes are BT
136        if pc["outer"][0:2] == 'BT':
137                osll = turn_wgs84_into_osie36(parts[0], parts[1], 0)
138                en = turn_osie36_into_eastingnorthing(osll[0], osll[1])
139                pc["ie_easting"] = en[0]
140                pc["ie_northing"] = en[1]
141        else:
142                osll = turn_wgs84_into_osgb36(parts[0], parts[1], 0)
143                en = turn_osgb36_into_eastingnorthing(osll[0], osll[1])
144                pc["easting"] = en[0]
145                pc["northing"] = en[1]
146
147        postcodes.append(pc)
148
149
150# Grab all of the current ones
151sql = "SELECT id, outward, inward, deleted FROM postcodes WHERE source = %s AND NOT deleted"
152sth = dbh.cursor()
153sth.execute(sql, ftp_source)
154
155ftps = {}
156while 1:
157        row = sth.fetchone()
158        if row == None:
159                break
160        id, outward, inward, deleted = row
161        ftps["%s %s" % (outward, inward)] = \
162                                { 'outward':outward, 'inward':inward, 'id':id, 'deleted':deleted, 'done':False }
163sth.close()
164count = len(ftps.keys())
165
166print "There are currently %d entries in the database from freethepostcode.org" % count
167print "The new import contains %d entries" % len(postcodes)
168
169# Only prompt if it's a big difference
170if confirm_update:
171        if (count == 0 or len(postcodes) == 0 or 
172                        len(postcodes) < count or (len(postcodes)-count) > 50):
173                print "Are you sure you wish to run an import?"
174                confirm = raw_input("")
175                print ""
176
177                if confirm == "y" or confirm == "yes":
178                        # Good, go ahead
179                        pass
180                else:
181                        print ""
182                        raise Exception("Aborting import")
183else:
184        # Don't trash everything even with --no-confirm
185        if len(postcodes) == 0 or (len(postcodes)-count) > 100:
186                raise Exception("Postcode count too different, not running (re-run without --no-confirm to allow")
187
188
189# Add the latest list to the database, tweaking if already there
190add_sql = "INSERT INTO postcodes (outward, inward, raw_postcode_outward, raw_postcode_inward, easting, northing, ie_easting, ie_northing, source) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
191upd_sql = "UPDATE postcodes SET outward=%s, inward=%s, raw_postcode_outward=%s, raw_postcode_inward=%s, easting=%s, northing=%s, ie_easting=%s, ie_northing=%s WHERE id=%s"
192del_sql = "UPDATE postcodes SET deleted='t', delete_reason=%s WHERE id=%s"
193sth = dbh.cursor()
194worked = 0
195
196# Add the postcodes
197for postcode in postcodes:
198        pc = "%s %s" % (postcode["outer"], postcode["inner"])
199        if verbose:
200                print "Processing %s" % pc
201
202        if ftps.has_key(pc):
203                # Update existing one
204                ftps[pc]['done'] = True
205                sth.execute(upd_sql, (postcode["outer"], postcode["inner"], postcode["raw_outer"], postcode["raw_inner"], postcode["easting"], postcode["northing"], postcode["ie_easting"], postcode["ie_northing"], ftps[pc]['id']))
206                if verbose:
207                        print "\tupdated record at %d" % ftps[pc]['id']
208        else:
209                # New record, add
210                sth.execute(add_sql, (postcode["outer"], postcode["inner"], postcode["raw_outer"], postcode["raw_inner"], postcode["easting"], postcode["northing"], postcode["ie_easting"], postcode["ie_northing"], ftp_source))
211                if verbose:
212                        print "\tadded new postcode"
213                else:
214                        print "Added postcode %s" % pc
215        worked = worked + 1
216
217print "Processed %d entries" % worked
218
219
220# Find ones that have gone
221if verbose:
222        print "\nDeleting any postcodes no longer in FTP:"
223for gone_pc in [ pc for pc in ftps.keys() if not ftps[pc]['done'] ]:
224        print "\tflagging as deleted old FTP postcode %s" % gone_pc
225        sth.execute(del_sql, (delete_reason, ftps[gone_pc]['id']))
226
227# All done
228if verbose:
229        print "\nAll Done"
230sth.close()
231dbh.commit()
232dbh.close()
Note: See TracBrowser for help on using the repository browser.