source: trunk/npemap.org.uk/scripts/freethepostcode.org-importer/importer.py @ 575

Last change on this file since 575 was 575, checked in by Nick Burch, 13 years ago

Few little tweaks to the importer

  • Property svn:executable set to *
File size: 5.9 KB
RevLine 
[33]1#!/usr/bin/python
2#               Importer from FreeThePostcode.org
3#               ---------------------------------
4#
5# Imports FreeThePostcode data into the current schema, removing any
6#  existing FreeThePostcode data.
7#
8# You will need to tweak this script with your database settings for it
9#  to work.
10
11from pyPgSQL import PgSQL
[508]12from geo_helper import turn_wgs84_into_osgb36, turn_osgb36_into_eastingnorthing, \
13                                                turn_wgs84_into_osie36, turn_osie36_into_eastingnorthing
[34]14import os
[33]15
16# Database settings
[34]17dbtype = "postgres"
[509]18dbname = "npemap"
[575]19dbhost = ""
[509]20dbuser = "npemap"
[508]21dbpass = ""
[33]22
23
24# Connect to the database
25dbh = None
[34]26if dbtype == "pgsql" or dbtype == "postgres" or dbtype == "postgresql":
27        if len(dbhost):
28                dbh = PgSQL.connect(database=dbname, host=dbhost, user=dbuser, password=dbpass)
29        else:
30                dbh = PgSQL.connect(database=dbname, user=dbuser, password=dbpass)
[33]31else:
[34]32        raise Exception("Unknown dbtype %s" % dbtype)
[33]33
34
35# Check what source value FreeThePostcode data will have
36ftp_source_name = "FreeThePostcode.org Importer"
37ftp_source = None
38
39sql = "SELECT id FROM sources WHERE name = %s"
40sth = dbh.cursor()
41sth.execute(sql, ftp_source_name)
42ids = sth.fetchall()
[35]43sth.close()
[33]44if len(ids) == 1:
45        for id in (ids):
46                ftp_source = id.id
47else:
48        print "Unable to find ID for source '%s' - error code %d" % (ftp_source_name, len(ids))
49
50
[509]51# See if our delete reason exists, and if no, add it
52delete_reason = None
53reason_text = "Gone from FTP"
54while delete_reason == None:
55        sql = "SELECT id FROM delete_reasons WHERE reason = %s"
56        sth = dbh.cursor()
57        sth.execute(sql, reason_text)
58        ids = sth.fetchall()
59        sth.close()
60        if len(ids) == 1:
61                for id in (ids):
62                        delete_reason = id.id
63        else:
64                sth = dbh.cursor()
65                sth.execute("INSERT INTO delete_reasons (reason) VALUES (%s)", reason_text)
66                sth.close()
67
68
[34]69# Download the latest list of postcodes if needed
70download = False
71current_file = None
72if os.path.isfile("currentlist"):
73        current_file = "currentlist"
74if os.path.isfile("/tmp/currentlist"):
75        current_file = "/tmp/currentlist"
[33]76
[34]77if not current_file == None:
78        print "Data found, do you wish to re-download?"
79        redownload = raw_input("")
80        if redownload == "y" or redownload == "yes":
81                download = True
82else:
83        download = True
84
85if download:
86        url = "http://www.freethepostcode.org/currentlist"
87        print "Downloading from %s" % url
88        os.system("wget -O currentlist '%s'" % url)
89        current_file = "currentlist"
90print ""
91
92
93# Read in the new list
94postcodes = []
95ftpc = open(current_file, 'r')
96for line in ftpc:
97        line = line[0:-1]
98        if line[0:1] == "#":
99                continue
100
[575]101        # Replace double spaces
102        line = line.replace("  "," ")
103
[34]104        parts = line.split(" ")
105        if not len(parts) == 4:
106                print "Invalid line '%s'" % line
107
108        pc = {}
109        pc["outer"] = parts[2]
110        pc["inner"] = parts[3]
111        pc["raw"] = "%s %s" % (parts[2],parts[3])
[127]112        pc["raw_outer"] = parts[2]
113        pc["raw_inner"] = parts[3]
[34]114
[508]115        pc["easting"] = None
116        pc["northing"] = None
117        pc["ie_easting"] = None
118        pc["ie_northing"] = None
119
120        # Turn lat+long into easting+northing
121        # All NI postcodes are BT
122        if pc["outer"][0:2] == 'BT':
123                osll = turn_wgs84_into_osie36(parts[0], parts[1], 0)
124                en = turn_osie36_into_eastingnorthing(osll[0], osll[1])
125                pc["ie_easting"] = en[0]
126                pc["ie_northing"] = en[1]
127        else:
128                osll = turn_wgs84_into_osgb36(parts[0], parts[1], 0)
129                en = turn_osgb36_into_eastingnorthing(osll[0], osll[1])
130                pc["easting"] = en[0]
131                pc["northing"] = en[1]
132
[34]133        postcodes.append(pc)
134
135
[509]136# Grab all of the current ones
137sql = "SELECT id, outward, inward, deleted FROM postcodes WHERE source = %s AND NOT deleted"
[34]138sth = dbh.cursor()
139sth.execute(sql, ftp_source)
[509]140
141ftps = {}
142while 1:
143        row = sth.fetchone()
144        if row == None:
145                break
146        id, outward, inward, deleted = row
147        ftps["%s %s" % (outward, inward)] = \
148                                { 'outward':outward, 'inward':inward, 'id':id, 'deleted':deleted, 'done':False }
[35]149sth.close()
[509]150count = len(ftps.keys())
[33]151
[34]152print "There are currently %d entries in the database from freethepostcode.org" % count
153print "The new import contains %d entries" % len(postcodes)
154
[127]155# Only prompt if it's a big difference
156if count == 0 or len(postcodes) == 0 or len(postcodes) < count or (len(postcodes)-count) > 50:
[509]157        print "Are you sure you wish to run an import?"
[127]158        confirm = raw_input("")
[34]159        print ""
160
[127]161        if confirm == "y" or confirm == "yes":
162                # Good, go ahead
163                pass
164        else:
165                print ""
166                raise Exception("Aborting import")
[34]167
[127]168
[509]169# Add the latest list to the database, tweaking if already there
170add_sql = "INSERT INTO postcodes (outward, inward, raw_postcode_outward, raw_postcode_inward, easting, northing, ie_easting, ie_northing, source) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
171upd_sql = "UPDATE postcodes SET outward=%s, inward=%s, raw_postcode_outward=%s, raw_postcode_inward=%s, easting=%s, northing=%s, ie_easting=%s, ie_northing=%s WHERE id=%s"
172del_sql = "UPDATE postcodes SET deleted='t', delete_reason=%s WHERE id=%s"
[34]173sth = dbh.cursor()
[35]174worked = 0
[508]175
176# Add the postcodes
[35]177for postcode in postcodes:
[509]178        pc = "%s %s" % (postcode["outer"], postcode["inner"])
179        print "Processing %s" % pc
[508]180
[509]181        if ftps.has_key(pc):
182                # Update existing one
183                ftps[pc]['done'] = True
184                sth.execute(upd_sql, (postcode["outer"], postcode["inner"], postcode["raw_outer"], postcode["raw_inner"], postcode["easting"], postcode["northing"], postcode["ie_easting"], postcode["ie_northing"], ftps[pc]['id']))
185                print "\tupdated record at %d" % ftps[pc]['id']
186        else:
187                # New record, add
188                sth.execute(add_sql, (postcode["outer"], postcode["inner"], postcode["raw_outer"], postcode["raw_inner"], postcode["easting"], postcode["northing"], postcode["ie_easting"], postcode["ie_northing"], ftp_source))
189                print "\tadded new postcode"
190        worked = worked + 1
[34]191
[509]192print "Processed %d entries" % worked
[34]193
[509]194
195# Find ones that have gone
196print "\nDeleting any postcodes no longer in FTP:"
197for gone_pc in [ pc for pc in ftps.keys() if not ftps[pc]['done'] ]:
198        print "\tflagging as deleted old FTP postcode %s" % gone_pc
199        sth.execute(del_sql, (delete_reason, ftps[gone_pc]['id']))
200
[35]201# All done
[509]202print "\nAll Done"
[35]203sth.close()
204dbh.commit()
205dbh.close()
Note: See TracBrowser for help on using the repository browser.