source: trunk/npemap.org.uk/scripts/freethepostcode.org-importer/importer.py @ 35

Last change on this file since 35 was 35, checked in by Nick Burch, 15 years ago

Finish the import script

  • Property svn:executable set to *
File size: 3.6 KB
Line 
1#!/usr/bin/python
2#               Importer from FreeThePostcode.org
3#               ---------------------------------
4#
5# Imports FreeThePostcode data into the current schema, removing any
6#  existing FreeThePostcode data.
7#
8# You will need to tweak this script with your database settings for it
9#  to work.
10
11from pyPgSQL import PgSQL
12from geo_helper import turn_wgs84_into_osgb36, turn_osgb36_into_eastingnorthing
13import os
14
15# Database settings
16dbtype = "postgres"
17dbname = "npe_postcodes"
18dbhost = "localhost"
19dbuser = "nick"
20dbpass = ""
21
22
23# Connect to the database
24dbh = None
25if dbtype == "pgsql" or dbtype == "postgres" or dbtype == "postgresql":
26        if len(dbhost):
27                dbh = PgSQL.connect(database=dbname, host=dbhost, user=dbuser, password=dbpass)
28        else:
29                dbh = PgSQL.connect(database=dbname, user=dbuser, password=dbpass)
30else:
31        raise Exception("Unknown dbtype %s" % dbtype)
32
33
34# Check what source value FreeThePostcode data will have
35ftp_source_name = "FreeThePostcode.org Importer"
36ftp_source = None
37
38sql = "SELECT id FROM sources WHERE name = %s"
39sth = dbh.cursor()
40sth.execute(sql, ftp_source_name)
41ids = sth.fetchall()
42sth.close()
43if len(ids) == 1:
44        for id in (ids):
45                ftp_source = id.id
46else:
47        print "Unable to find ID for source '%s' - error code %d" % (ftp_source_name, len(ids))
48
49
50# Download the latest list of postcodes if needed
51download = False
52current_file = None
53if os.path.isfile("currentlist"):
54        current_file = "currentlist"
55if os.path.isfile("/tmp/currentlist"):
56        current_file = "/tmp/currentlist"
57
58if not current_file == None:
59        print "Data found, do you wish to re-download?"
60        redownload = raw_input("")
61        if redownload == "y" or redownload == "yes":
62                download = True
63else:
64        download = True
65
66if download:
67        url = "http://www.freethepostcode.org/currentlist"
68        print "Downloading from %s" % url
69        os.system("wget -O currentlist '%s'" % url)
70        current_file = "currentlist"
71print ""
72
73
74# Read in the new list
75postcodes = []
76ftpc = open(current_file, 'r')
77for line in ftpc:
78        line = line[0:-1]
79        if line[0:1] == "#":
80                continue
81
82        parts = line.split(" ")
83        if not len(parts) == 4:
84                print "Invalid line '%s'" % line
85
86        # Turn lat+long into easting+northing
87        osll = turn_wgs84_into_osgb36(parts[0], parts[1], 0)
88        en = turn_osgb36_into_eastingnorthing(osll[0], osll[1])
89
90        pc = {}
91        pc["easting"] = en[0]
92        pc["northing"] = en[1]
93        pc["outer"] = parts[2]
94        pc["inner"] = parts[3]
95        pc["raw"] = "%s %s" % (parts[2],parts[3])
96
97        postcodes.append(pc)
98
99
100# Prompt before removing all the current ones
101sql = "SELECT COUNT(*) AS count FROM postcodes WHERE source = %s"
102sth = dbh.cursor()
103sth.execute(sql, ftp_source)
104counts = sth.fetchall()
105count = counts[0][0]
106sth.close()
107
108print "There are currently %d entries in the database from freethepostcode.org" % count
109print "The new import contains %d entries" % len(postcodes)
110print "Are you sure you wish to remove the old entries, to add new ones?"
111confirm = raw_input("")
112print ""
113
114if confirm == "y" or confirm == "yes":
115        # Good, go ahead
116        pass
117else:
118        print ""
119        raise Exception("Aborting import")
120
121
122# Delete the old entries
123sql = "DELETE FROM postcodes WHERE source = %s"
124sth = dbh.cursor()
125sth.execute(sql, ftp_source)
126print "Deleted %d entries" % sth.rowcount
127sth.close()
128dbh.commit()
129
130
131# Add the latest list to the database
132sql = "INSERT INTO postcodes (outward, inward, raw_postcode, easting, northing, source) VALUES (%s, %s, %s, %s, %s, %s)"
133sth = dbh.cursor()
134worked = 0
135for postcode in postcodes:
136        sth.execute(sql, (postcode["outer"], postcode["inner"], postcode["raw"], postcode["easting"], postcode["northing"], ftp_source))
137        worked = worked + sth.rowcount
138print "Added %d entries" % worked
139
140
141# All done
142sth.close()
143dbh.commit()
144dbh.close()
Note: See TracBrowser for help on using the repository browser.