source: trunk/npemap.org.uk/scripts/freethepostcode.org-importer/importer.py @ 577

Last change on this file since 577 was 577, checked in by Nick Burch, 12 years ago

Add --verbose flag, and don't report changes to postcodes unless it's set

  • Property svn:executable set to *
File size: 6.2 KB
Line 
1#!/usr/bin/python
2#               Importer from FreeThePostcode.org
3#               ---------------------------------
4#
5# Imports FreeThePostcode data into the current schema, removing any
6#  existing FreeThePostcode data.
7#
8# You will need to tweak this script with your database settings for it
9#  to work.
10
11from pyPgSQL import PgSQL
12from geo_helper import turn_wgs84_into_osgb36, turn_osgb36_into_eastingnorthing, \
13                                                turn_wgs84_into_osie36, turn_osie36_into_eastingnorthing
14import os
15import sys
16
17# Database settings
18dbtype = "postgres"
19dbname = "npemap"
20dbhost = ""
21dbuser = "npemap"
22dbpass = ""
23
24
25# What arguments did they pass in?
26verbose = False
27download = False
28confirm_update = True
29for arg in sys.argv[1:]: 
30        if arg == "--verbose":
31                verbose = True
32        if arg == "--download":
33                download = True
34        if arg == "--no-confirm":
35                confirm_update = False
36
37
38# Connect to the database
39dbh = None
40if dbtype == "pgsql" or dbtype == "postgres" or dbtype == "postgresql":
41        if len(dbhost):
42                dbh = PgSQL.connect(database=dbname, host=dbhost, user=dbuser, password=dbpass)
43        else:
44                dbh = PgSQL.connect(database=dbname, user=dbuser, password=dbpass)
45else:
46        raise Exception("Unknown dbtype %s" % dbtype)
47
48
49# Check what source value FreeThePostcode data will have
50ftp_source_name = "FreeThePostcode.org Importer"
51ftp_source = None
52
53sql = "SELECT id FROM sources WHERE name = %s"
54sth = dbh.cursor()
55sth.execute(sql, ftp_source_name)
56ids = sth.fetchall()
57sth.close()
58if len(ids) == 1:
59        for id in (ids):
60                ftp_source = id.id
61else:
62        print "Unable to find ID for source '%s' - error code %d" % (ftp_source_name, len(ids))
63
64
65# See if our delete reason exists, and if no, add it
66delete_reason = None
67reason_text = "Gone from FTP"
68while delete_reason == None:
69        sql = "SELECT id FROM delete_reasons WHERE reason = %s"
70        sth = dbh.cursor()
71        sth.execute(sql, reason_text)
72        ids = sth.fetchall()
73        sth.close()
74        if len(ids) == 1:
75                for id in (ids):
76                        delete_reason = id.id
77        else:
78                sth = dbh.cursor()
79                sth.execute("INSERT INTO delete_reasons (reason) VALUES (%s)", reason_text)
80                sth.close()
81
82
83# Download the latest list of postcodes if needed
84current_file = None
85if os.path.isfile("currentlist"):
86        current_file = "currentlist"
87if os.path.isfile("/tmp/currentlist"):
88        current_file = "/tmp/currentlist"
89
90if not download and not current_file == None:
91        print "Data found, do you wish to re-download?"
92        redownload = raw_input("")
93        if redownload == "y" or redownload == "yes":
94                download = True
95else:
96        download = True
97
98if download:
99        url = "http://www.freethepostcode.org/currentlist"
100        print "Downloading from %s" % url
101        os.system("wget -O currentlist '%s'" % url)
102        current_file = "currentlist"
103print ""
104
105
106# Read in the new list
107postcodes = []
108ftpc = open(current_file, 'r')
109for line in ftpc:
110        line = line[0:-1]
111        if line[0:1] == "#":
112                continue
113
114        # Replace double spaces
115        line = line.replace("  "," ")
116
117        parts = line.split(" ")
118        if not len(parts) == 4:
119                print "Invalid line '%s'" % line
120
121        pc = {}
122        pc["outer"] = parts[2]
123        pc["inner"] = parts[3]
124        pc["raw"] = "%s %s" % (parts[2],parts[3])
125        pc["raw_outer"] = parts[2]
126        pc["raw_inner"] = parts[3]
127
128        pc["easting"] = None
129        pc["northing"] = None
130        pc["ie_easting"] = None
131        pc["ie_northing"] = None
132
133        # Turn lat+long into easting+northing
134        # All NI postcodes are BT
135        if pc["outer"][0:2] == 'BT':
136                osll = turn_wgs84_into_osie36(parts[0], parts[1], 0)
137                en = turn_osie36_into_eastingnorthing(osll[0], osll[1])
138                pc["ie_easting"] = en[0]
139                pc["ie_northing"] = en[1]
140        else:
141                osll = turn_wgs84_into_osgb36(parts[0], parts[1], 0)
142                en = turn_osgb36_into_eastingnorthing(osll[0], osll[1])
143                pc["easting"] = en[0]
144                pc["northing"] = en[1]
145
146        postcodes.append(pc)
147
148
149# Grab all of the current ones
150sql = "SELECT id, outward, inward, deleted FROM postcodes WHERE source = %s AND NOT deleted"
151sth = dbh.cursor()
152sth.execute(sql, ftp_source)
153
154ftps = {}
155while 1:
156        row = sth.fetchone()
157        if row == None:
158                break
159        id, outward, inward, deleted = row
160        ftps["%s %s" % (outward, inward)] = \
161                                { 'outward':outward, 'inward':inward, 'id':id, 'deleted':deleted, 'done':False }
162sth.close()
163count = len(ftps.keys())
164
165print "There are currently %d entries in the database from freethepostcode.org" % count
166print "The new import contains %d entries" % len(postcodes)
167
168# Only prompt if it's a big difference
169if confirm_update and \
170        (count == 0 or len(postcodes) == 0 or 
171        len(postcodes) < count or (len(postcodes)-count) > 50):
172        print "Are you sure you wish to run an import?"
173        confirm = raw_input("")
174        print ""
175
176        if confirm == "y" or confirm == "yes":
177                # Good, go ahead
178                pass
179        else:
180                print ""
181                raise Exception("Aborting import")
182
183
184# Add the latest list to the database, tweaking if already there
185add_sql = "INSERT INTO postcodes (outward, inward, raw_postcode_outward, raw_postcode_inward, easting, northing, ie_easting, ie_northing, source) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
186upd_sql = "UPDATE postcodes SET outward=%s, inward=%s, raw_postcode_outward=%s, raw_postcode_inward=%s, easting=%s, northing=%s, ie_easting=%s, ie_northing=%s WHERE id=%s"
187del_sql = "UPDATE postcodes SET deleted='t', delete_reason=%s WHERE id=%s"
188sth = dbh.cursor()
189worked = 0
190
191# Add the postcodes
192for postcode in postcodes:
193        pc = "%s %s" % (postcode["outer"], postcode["inner"])
194        if verbose:
195                print "Processing %s" % pc
196
197        if ftps.has_key(pc):
198                # Update existing one
199                ftps[pc]['done'] = True
200                sth.execute(upd_sql, (postcode["outer"], postcode["inner"], postcode["raw_outer"], postcode["raw_inner"], postcode["easting"], postcode["northing"], postcode["ie_easting"], postcode["ie_northing"], ftps[pc]['id']))
201                if verbose:
202                        print "\tupdated record at %d" % ftps[pc]['id']
203        else:
204                # New record, add
205                sth.execute(add_sql, (postcode["outer"], postcode["inner"], postcode["raw_outer"], postcode["raw_inner"], postcode["easting"], postcode["northing"], postcode["ie_easting"], postcode["ie_northing"], ftp_source))
206                if verbose:
207                        print "\tadded new postcode"
208                else:
209                        print "Added postcode %s" % pc
210        worked = worked + 1
211
212print "Processed %d entries" % worked
213
214
215# Find ones that have gone
216print "\nDeleting any postcodes no longer in FTP:"
217for gone_pc in [ pc for pc in ftps.keys() if not ftps[pc]['done'] ]:
218        print "\tflagging as deleted old FTP postcode %s" % gone_pc
219        sth.execute(del_sql, (delete_reason, ftps[gone_pc]['id']))
220
221# All done
222print "\nAll Done"
223sth.close()
224dbh.commit()
225dbh.close()
Note: See TracBrowser for help on using the repository browser.