source: trunk/npemap.org.uk/scripts/freethepostcode.org-importer/importer.py @ 509

Last change on this file since 509 was 509, checked in by Nick Burch, 13 years ago

Proper importing on multiple occassions, as per ticket #39

  • Property svn:executable set to *
File size: 5.8 KB
Line 
1#!/usr/bin/python
2#               Importer from FreeThePostcode.org
3#               ---------------------------------
4#
5# Imports FreeThePostcode data into the current schema, removing any
6#  existing FreeThePostcode data.
7#
8# You will need to tweak this script with your database settings for it
9#  to work.
10
11from pyPgSQL import PgSQL
12from geo_helper import turn_wgs84_into_osgb36, turn_osgb36_into_eastingnorthing, \
13                                                turn_wgs84_into_osie36, turn_osie36_into_eastingnorthing
14import os
15
16# Database settings
17dbtype = "postgres"
18dbname = "npemap"
19dbhost = "localhost"
20dbuser = "npemap"
21dbpass = ""
22
23
24# Connect to the database
25dbh = None
26if dbtype == "pgsql" or dbtype == "postgres" or dbtype == "postgresql":
27        if len(dbhost):
28                dbh = PgSQL.connect(database=dbname, host=dbhost, user=dbuser, password=dbpass)
29        else:
30                dbh = PgSQL.connect(database=dbname, user=dbuser, password=dbpass)
31else:
32        raise Exception("Unknown dbtype %s" % dbtype)
33
34
35# Check what source value FreeThePostcode data will have
36ftp_source_name = "FreeThePostcode.org Importer"
37ftp_source = None
38
39sql = "SELECT id FROM sources WHERE name = %s"
40sth = dbh.cursor()
41sth.execute(sql, ftp_source_name)
42ids = sth.fetchall()
43sth.close()
44if len(ids) == 1:
45        for id in (ids):
46                ftp_source = id.id
47else:
48        print "Unable to find ID for source '%s' - error code %d" % (ftp_source_name, len(ids))
49
50
51# See if our delete reason exists, and if no, add it
52delete_reason = None
53reason_text = "Gone from FTP"
54while delete_reason == None:
55        sql = "SELECT id FROM delete_reasons WHERE reason = %s"
56        sth = dbh.cursor()
57        sth.execute(sql, reason_text)
58        ids = sth.fetchall()
59        sth.close()
60        if len(ids) == 1:
61                for id in (ids):
62                        delete_reason = id.id
63        else:
64                sth = dbh.cursor()
65                sth.execute("INSERT INTO delete_reasons (reason) VALUES (%s)", reason_text)
66                sth.close()
67
68
69# Download the latest list of postcodes if needed
70download = False
71current_file = None
72if os.path.isfile("currentlist"):
73        current_file = "currentlist"
74if os.path.isfile("/tmp/currentlist"):
75        current_file = "/tmp/currentlist"
76
77if not current_file == None:
78        print "Data found, do you wish to re-download?"
79        redownload = raw_input("")
80        if redownload == "y" or redownload == "yes":
81                download = True
82else:
83        download = True
84
85if download:
86        url = "http://www.freethepostcode.org/currentlist"
87        print "Downloading from %s" % url
88        os.system("wget -O currentlist '%s'" % url)
89        current_file = "currentlist"
90print ""
91
92
93# Read in the new list
94postcodes = []
95ftpc = open(current_file, 'r')
96for line in ftpc:
97        line = line[0:-1]
98        if line[0:1] == "#":
99                continue
100
101        parts = line.split(" ")
102        if not len(parts) == 4:
103                print "Invalid line '%s'" % line
104
105        pc = {}
106        pc["outer"] = parts[2]
107        pc["inner"] = parts[3]
108        pc["raw"] = "%s %s" % (parts[2],parts[3])
109        pc["raw_outer"] = parts[2]
110        pc["raw_inner"] = parts[3]
111
112        pc["easting"] = None
113        pc["northing"] = None
114        pc["ie_easting"] = None
115        pc["ie_northing"] = None
116
117        # Turn lat+long into easting+northing
118        # All NI postcodes are BT
119        if pc["outer"][0:2] == 'BT':
120                osll = turn_wgs84_into_osie36(parts[0], parts[1], 0)
121                en = turn_osie36_into_eastingnorthing(osll[0], osll[1])
122                pc["ie_easting"] = en[0]
123                pc["ie_northing"] = en[1]
124        else:
125                osll = turn_wgs84_into_osgb36(parts[0], parts[1], 0)
126                en = turn_osgb36_into_eastingnorthing(osll[0], osll[1])
127                pc["easting"] = en[0]
128                pc["northing"] = en[1]
129
130        postcodes.append(pc)
131
132
133# Grab all of the current ones
134sql = "SELECT id, outward, inward, deleted FROM postcodes WHERE source = %s AND NOT deleted"
135sth = dbh.cursor()
136sth.execute(sql, ftp_source)
137
138ftps = {}
139while 1:
140        row = sth.fetchone()
141        if row == None:
142                break
143        id, outward, inward, deleted = row
144        ftps["%s %s" % (outward, inward)] = \
145                                { 'outward':outward, 'inward':inward, 'id':id, 'deleted':deleted, 'done':False }
146sth.close()
147count = len(ftps.keys())
148
149print "There are currently %d entries in the database from freethepostcode.org" % count
150print "The new import contains %d entries" % len(postcodes)
151
152# Only prompt if it's a big difference
153if count == 0 or len(postcodes) == 0 or len(postcodes) < count or (len(postcodes)-count) > 50:
154        print "Are you sure you wish to run an import?"
155        confirm = raw_input("")
156        print ""
157
158        if confirm == "y" or confirm == "yes":
159                # Good, go ahead
160                pass
161        else:
162                print ""
163                raise Exception("Aborting import")
164
165
166# Add the latest list to the database, tweaking if already there
167add_sql = "INSERT INTO postcodes (outward, inward, raw_postcode_outward, raw_postcode_inward, easting, northing, ie_easting, ie_northing, source) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
168upd_sql = "UPDATE postcodes SET outward=%s, inward=%s, raw_postcode_outward=%s, raw_postcode_inward=%s, easting=%s, northing=%s, ie_easting=%s, ie_northing=%s WHERE id=%s"
169del_sql = "UPDATE postcodes SET deleted='t', delete_reason=%s WHERE id=%s"
170sth = dbh.cursor()
171worked = 0
172
173# Add the postcodes
174for postcode in postcodes:
175        pc = "%s %s" % (postcode["outer"], postcode["inner"])
176        print "Processing %s" % pc
177
178        if ftps.has_key(pc):
179                # Update existing one
180                ftps[pc]['done'] = True
181                sth.execute(upd_sql, (postcode["outer"], postcode["inner"], postcode["raw_outer"], postcode["raw_inner"], postcode["easting"], postcode["northing"], postcode["ie_easting"], postcode["ie_northing"], ftps[pc]['id']))
182                print "\tupdated record at %d" % ftps[pc]['id']
183        else:
184                # New record, add
185                sth.execute(add_sql, (postcode["outer"], postcode["inner"], postcode["raw_outer"], postcode["raw_inner"], postcode["easting"], postcode["northing"], postcode["ie_easting"], postcode["ie_northing"], ftp_source))
186                print "\tadded new postcode"
187        worked = worked + 1
188
189print "Processed %d entries" % worked
190
191
192# Find ones that have gone
193print "\nDeleting any postcodes no longer in FTP:"
194for gone_pc in [ pc for pc in ftps.keys() if not ftps[pc]['done'] ]:
195        print "\tflagging as deleted old FTP postcode %s" % gone_pc
196        sth.execute(del_sql, (delete_reason, ftps[gone_pc]['id']))
197
198# All done
199print "\nAll Done"
200sth.close()
201dbh.commit()
202dbh.close()
Note: See TracBrowser for help on using the repository browser.