source: trunk/npemap.org.uk/scripts/freethepostcode.org-importer/importer.py @ 165

Last change on this file since 165 was 165, checked in by Dominic Hargreaves, 14 years ago

disable (references #39)

  • Property svn:executable set to *
File size: 3.9 KB
Line 
1#!/usr/bin/python
2#               Importer from FreeThePostcode.org
3#               ---------------------------------
4#
5# Imports FreeThePostcode data into the current schema, removing any
6#  existing FreeThePostcode data.
7#
8# You will need to tweak this script with your database settings for it
9#  to work.
10
11from pyPgSQL import PgSQL
12from geo_helper import turn_wgs84_into_osgb36, turn_osgb36_into_eastingnorthing
13import os
14
15# Database settings
16dbtype = "postgres"
17dbname = "npemaps"
18dbhost = "localhost"
19dbuser = "npemaps"
20dbpass = "npemaps"
21
22
23raise Exception("Script needs to be updated as per ticket #39")
24
25
26# Connect to the database
27dbh = None
28if dbtype == "pgsql" or dbtype == "postgres" or dbtype == "postgresql":
29        if len(dbhost):
30                dbh = PgSQL.connect(database=dbname, host=dbhost, user=dbuser, password=dbpass)
31        else:
32                dbh = PgSQL.connect(database=dbname, user=dbuser, password=dbpass)
33else:
34        raise Exception("Unknown dbtype %s" % dbtype)
35
36
37# Check what source value FreeThePostcode data will have
38ftp_source_name = "FreeThePostcode.org Importer"
39ftp_source = None
40
41sql = "SELECT id FROM sources WHERE name = %s"
42sth = dbh.cursor()
43sth.execute(sql, ftp_source_name)
44ids = sth.fetchall()
45sth.close()
46if len(ids) == 1:
47        for id in (ids):
48                ftp_source = id.id
49else:
50        print "Unable to find ID for source '%s' - error code %d" % (ftp_source_name, len(ids))
51
52
53# Download the latest list of postcodes if needed
54download = False
55current_file = None
56if os.path.isfile("currentlist"):
57        current_file = "currentlist"
58if os.path.isfile("/tmp/currentlist"):
59        current_file = "/tmp/currentlist"
60
61if not current_file == None:
62        print "Data found, do you wish to re-download?"
63        redownload = raw_input("")
64        if redownload == "y" or redownload == "yes":
65                download = True
66else:
67        download = True
68
69if download:
70        url = "http://www.freethepostcode.org/currentlist"
71        print "Downloading from %s" % url
72        os.system("wget -O currentlist '%s'" % url)
73        current_file = "currentlist"
74print ""
75
76
77# Read in the new list
78postcodes = []
79ftpc = open(current_file, 'r')
80for line in ftpc:
81        line = line[0:-1]
82        if line[0:1] == "#":
83                continue
84
85        parts = line.split(" ")
86        if not len(parts) == 4:
87                print "Invalid line '%s'" % line
88
89        # Turn lat+long into easting+northing
90        osll = turn_wgs84_into_osgb36(parts[0], parts[1], 0)
91        en = turn_osgb36_into_eastingnorthing(osll[0], osll[1])
92
93        pc = {}
94        pc["easting"] = en[0]
95        pc["northing"] = en[1]
96        pc["outer"] = parts[2]
97        pc["inner"] = parts[3]
98        pc["raw"] = "%s %s" % (parts[2],parts[3])
99        pc["raw_outer"] = parts[2]
100        pc["raw_inner"] = parts[3]
101
102        postcodes.append(pc)
103
104
105# Prompt before removing all the current ones
106sql = "SELECT COUNT(*) AS count FROM postcodes WHERE source = %s"
107sth = dbh.cursor()
108sth.execute(sql, ftp_source)
109counts = sth.fetchall()
110count = counts[0][0]
111sth.close()
112
113print "There are currently %d entries in the database from freethepostcode.org" % count
114print "The new import contains %d entries" % len(postcodes)
115
116# Only prompt if it's a big difference
117if count == 0 or len(postcodes) == 0 or len(postcodes) < count or (len(postcodes)-count) > 50:
118        print "Are you sure you wish to remove the old entries, to add new ones?"
119        confirm = raw_input("")
120        print ""
121
122        if confirm == "y" or confirm == "yes":
123                # Good, go ahead
124                pass
125        else:
126                print ""
127                raise Exception("Aborting import")
128
129
130# Delete the old entries
131sql = "DELETE FROM postcodes WHERE source = %s"
132sth = dbh.cursor()
133sth.execute(sql, ftp_source)
134print "Deleted %d entries" % sth.rowcount
135sth.close()
136dbh.commit()
137
138
139# Add the latest list to the database
140sql = "INSERT INTO postcodes (outward, inward, raw_postcode_outward, raw_postcode_inward, easting, northing, source) VALUES (%s, %s, %s, %s, %s, %s, %s)"
141sth = dbh.cursor()
142worked = 0
143for postcode in postcodes:
144        sth.execute(sql, (postcode["outer"], postcode["inner"], postcode["raw_outer"], postcode["raw_inner"], postcode["easting"], postcode["northing"], ftp_source))
145        worked = worked + sth.rowcount
146print "Added %d entries" % worked
147
148
149# All done
150sth.close()
151dbh.commit()
152dbh.close()
Note: See TracBrowser for help on using the repository browser.