source: trunk/npemap.org.uk/scripts/freethepostcode.org-importer/importer.py @ 576

Last change on this file since 576 was 576, checked in by Nick Burch, 12 years ago

Add flag checking for automated runs

  • Property svn:executable set to *
File size: 6.1 KB
Line 
1#!/usr/bin/python
2#               Importer from FreeThePostcode.org
3#               ---------------------------------
4#
5# Imports FreeThePostcode data into the current schema, removing any
6#  existing FreeThePostcode data.
7#
8# You will need to tweak this script with your database settings for it
9#  to work.
10
11from pyPgSQL import PgSQL
12from geo_helper import turn_wgs84_into_osgb36, turn_osgb36_into_eastingnorthing, \
13                                                turn_wgs84_into_osie36, turn_osie36_into_eastingnorthing
14import os
15import sys
16
17# Database settings
18dbtype = "postgres"
19dbname = "npemap"
20dbhost = ""
21dbuser = "npemap"
22dbpass = ""
23
24
25# What arguments did they pass in?
26download = False
27confirm_update = True
28for arg in sys.argv[1:]: 
29        if arg == "--download":
30                download = True
31        if arg == "--no-confirm":
32                confirm_update = False
33
34
35# Connect to the database
36dbh = None
37if dbtype == "pgsql" or dbtype == "postgres" or dbtype == "postgresql":
38        if len(dbhost):
39                dbh = PgSQL.connect(database=dbname, host=dbhost, user=dbuser, password=dbpass)
40        else:
41                dbh = PgSQL.connect(database=dbname, user=dbuser, password=dbpass)
42else:
43        raise Exception("Unknown dbtype %s" % dbtype)
44
45
46# Check what source value FreeThePostcode data will have
47ftp_source_name = "FreeThePostcode.org Importer"
48ftp_source = None
49
50sql = "SELECT id FROM sources WHERE name = %s"
51sth = dbh.cursor()
52sth.execute(sql, ftp_source_name)
53ids = sth.fetchall()
54sth.close()
55if len(ids) == 1:
56        for id in (ids):
57                ftp_source = id.id
58else:
59        print "Unable to find ID for source '%s' - error code %d" % (ftp_source_name, len(ids))
60
61
62# See if our delete reason exists, and if no, add it
63delete_reason = None
64reason_text = "Gone from FTP"
65while delete_reason == None:
66        sql = "SELECT id FROM delete_reasons WHERE reason = %s"
67        sth = dbh.cursor()
68        sth.execute(sql, reason_text)
69        ids = sth.fetchall()
70        sth.close()
71        if len(ids) == 1:
72                for id in (ids):
73                        delete_reason = id.id
74        else:
75                sth = dbh.cursor()
76                sth.execute("INSERT INTO delete_reasons (reason) VALUES (%s)", reason_text)
77                sth.close()
78
79
80# Download the latest list of postcodes if needed
81current_file = None
82if os.path.isfile("currentlist"):
83        current_file = "currentlist"
84if os.path.isfile("/tmp/currentlist"):
85        current_file = "/tmp/currentlist"
86
87if not download and not current_file == None:
88        print "Data found, do you wish to re-download?"
89        redownload = raw_input("")
90        if redownload == "y" or redownload == "yes":
91                download = True
92else:
93        download = True
94
95if download:
96        url = "http://www.freethepostcode.org/currentlist"
97        print "Downloading from %s" % url
98        os.system("wget -O currentlist '%s'" % url)
99        current_file = "currentlist"
100print ""
101
102
103# Read in the new list
104postcodes = []
105ftpc = open(current_file, 'r')
106for line in ftpc:
107        line = line[0:-1]
108        if line[0:1] == "#":
109                continue
110
111        # Replace double spaces
112        line = line.replace("  "," ")
113
114        parts = line.split(" ")
115        if not len(parts) == 4:
116                print "Invalid line '%s'" % line
117
118        pc = {}
119        pc["outer"] = parts[2]
120        pc["inner"] = parts[3]
121        pc["raw"] = "%s %s" % (parts[2],parts[3])
122        pc["raw_outer"] = parts[2]
123        pc["raw_inner"] = parts[3]
124
125        pc["easting"] = None
126        pc["northing"] = None
127        pc["ie_easting"] = None
128        pc["ie_northing"] = None
129
130        # Turn lat+long into easting+northing
131        # All NI postcodes are BT
132        if pc["outer"][0:2] == 'BT':
133                osll = turn_wgs84_into_osie36(parts[0], parts[1], 0)
134                en = turn_osie36_into_eastingnorthing(osll[0], osll[1])
135                pc["ie_easting"] = en[0]
136                pc["ie_northing"] = en[1]
137        else:
138                osll = turn_wgs84_into_osgb36(parts[0], parts[1], 0)
139                en = turn_osgb36_into_eastingnorthing(osll[0], osll[1])
140                pc["easting"] = en[0]
141                pc["northing"] = en[1]
142
143        postcodes.append(pc)
144
145
146# Grab all of the current ones
147sql = "SELECT id, outward, inward, deleted FROM postcodes WHERE source = %s AND NOT deleted"
148sth = dbh.cursor()
149sth.execute(sql, ftp_source)
150
151ftps = {}
152while 1:
153        row = sth.fetchone()
154        if row == None:
155                break
156        id, outward, inward, deleted = row
157        ftps["%s %s" % (outward, inward)] = \
158                                { 'outward':outward, 'inward':inward, 'id':id, 'deleted':deleted, 'done':False }
159sth.close()
160count = len(ftps.keys())
161
162print "There are currently %d entries in the database from freethepostcode.org" % count
163print "The new import contains %d entries" % len(postcodes)
164
165# Only prompt if it's a big difference
166if confirm_update and \
167        (count == 0 or len(postcodes) == 0 or 
168        len(postcodes) < count or (len(postcodes)-count) > 50):
169        print "Are you sure you wish to run an import?"
170        confirm = raw_input("")
171        print ""
172
173        if confirm == "y" or confirm == "yes":
174                # Good, go ahead
175                pass
176        else:
177                print ""
178                raise Exception("Aborting import")
179
180
181# Add the latest list to the database, tweaking if already there
182add_sql = "INSERT INTO postcodes (outward, inward, raw_postcode_outward, raw_postcode_inward, easting, northing, ie_easting, ie_northing, source) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
183upd_sql = "UPDATE postcodes SET outward=%s, inward=%s, raw_postcode_outward=%s, raw_postcode_inward=%s, easting=%s, northing=%s, ie_easting=%s, ie_northing=%s WHERE id=%s"
184del_sql = "UPDATE postcodes SET deleted='t', delete_reason=%s WHERE id=%s"
185sth = dbh.cursor()
186worked = 0
187
188# Add the postcodes
189for postcode in postcodes:
190        pc = "%s %s" % (postcode["outer"], postcode["inner"])
191        print "Processing %s" % pc
192
193        if ftps.has_key(pc):
194                # Update existing one
195                ftps[pc]['done'] = True
196                sth.execute(upd_sql, (postcode["outer"], postcode["inner"], postcode["raw_outer"], postcode["raw_inner"], postcode["easting"], postcode["northing"], postcode["ie_easting"], postcode["ie_northing"], ftps[pc]['id']))
197                print "\tupdated record at %d" % ftps[pc]['id']
198        else:
199                # New record, add
200                sth.execute(add_sql, (postcode["outer"], postcode["inner"], postcode["raw_outer"], postcode["raw_inner"], postcode["easting"], postcode["northing"], postcode["ie_easting"], postcode["ie_northing"], ftp_source))
201                print "\tadded new postcode"
202        worked = worked + 1
203
204print "Processed %d entries" % worked
205
206
207# Find ones that have gone
208print "\nDeleting any postcodes no longer in FTP:"
209for gone_pc in [ pc for pc in ftps.keys() if not ftps[pc]['done'] ]:
210        print "\tflagging as deleted old FTP postcode %s" % gone_pc
211        sth.execute(del_sql, (delete_reason, ftps[gone_pc]['id']))
212
213# All done
214print "\nAll Done"
215sth.close()
216dbh.commit()
217dbh.close()
Note: See TracBrowser for help on using the repository browser.