Changeset 648 for trunk


Ignore:
Timestamp:
Sep 15, 2008, 10:20:42 PM (11 years ago)
Author:
Nick Burch
Message:

Refactor the FTP importer

Location:
trunk/npemap.org.uk/scripts
Files:
2 added
1 edited
1 moved

Legend:

Unmodified
Added
Removed
  • trunk/npemap.org.uk/scripts/freethepostcode.org-importer/importer.py

    r644 r648  
    2929#  to work.
    3030
    31 from pyPgSQL import PgSQL
    32 from geo_helper import turn_wgs84_into_osgb36, turn_osgb36_into_eastingnorthing, \
    33                                                 turn_wgs84_into_osie36, turn_osie36_into_eastingnorthing
    3431import os
    3532import sys
    3633
    37 # Database settings
    38 dbtype = "postgres"
    39 dbname = "npemap"
    40 dbhost = ""
    41 dbuser = "npemap"
    42 dbpass = ""
     34# Pull in the path to the main importer
     35sys.path.append(os.path.join(os.path.dirname(__file__), "..", "generic-python-import"))
    4336
    44 
    45 # What arguments did they pass in?
    46 verbose = False
    47 download = False
    48 confirm_update = True
    49 for arg in sys.argv[1:]:
    50         if arg == "--verbose":
    51                 verbose = True
    52         if arg == "--download":
    53                 download = True
    54         if arg == "--no-confirm":
    55                 confirm_update = False
    56 
    57 
    58 # Connect to the database
    59 dbh = None
    60 if dbtype == "pgsql" or dbtype == "postgres" or dbtype == "postgresql":
    61         if len(dbhost):
    62                 dbh = PgSQL.connect(database=dbname, host=dbhost, user=dbuser, password=dbpass)
    63         else:
    64                 dbh = PgSQL.connect(database=dbname, user=dbuser, password=dbpass)
    65 else:
    66         raise Exception("Unknown dbtype %s" % dbtype)
    67 
    68 
    69 # Check what source value FreeThePostcode data will have
    70 ftp_source_name = "FreeThePostcode.org Importer"
    71 ftp_source = None
    72 
    73 sql = "SELECT id FROM sources WHERE name = %s"
    74 sth = dbh.cursor()
    75 sth.execute(sql, ftp_source_name)
    76 ids = sth.fetchall()
    77 sth.close()
    78 if len(ids) == 1:
    79         for id in (ids):
    80                 ftp_source = id.id
    81 else:
    82         print "Unable to find ID for source '%s' - error code %d" % (ftp_source_name, len(ids))
    83 
    84 
    85 # See if our delete reason exists, and if no, add it
    86 delete_reason = None
    87 reason_text = "Gone from FTP"
    88 while delete_reason == None:
    89         sql = "SELECT id FROM delete_reasons WHERE reason = %s"
    90         sth = dbh.cursor()
    91         sth.execute(sql, reason_text)
    92         ids = sth.fetchall()
    93         sth.close()
    94         if len(ids) == 1:
    95                 for id in (ids):
    96                         delete_reason = id.id
    97         else:
    98                 sth = dbh.cursor()
    99                 sth.execute("INSERT INTO delete_reasons (reason) VALUES (%s)", reason_text)
    100                 sth.close()
    101 
    102 
    103 # Download the latest list of postcodes if needed
    104 current_file = None
    105 if os.path.isfile("currentlist"):
    106         current_file = "currentlist"
    107 if os.path.isfile("/tmp/currentlist"):
    108         current_file = "/tmp/currentlist"
    109 
    110 if not download and not current_file == None:
    111         print "Data found, do you wish to re-download?"
    112         redownload = raw_input("")
    113         if redownload == "y" or redownload == "yes":
    114                 download = True
    115 else:
    116         download = True
    117 
    118 if download:
    119         url = "http://www.freethepostcode.org/currentlist"
    120         if verbose:
    121                 print "Downloading from %s" % url
    122         os.system("wget --quiet -O currentlist '%s'" % url)
    123         current_file = "currentlist"
    124         if verbose:
    125                 print ""
    126 
    127 
    128 # Read in the new list
    129 postcodes = []
    130 ftpc = open(current_file, 'r')
    131 for line in ftpc:
    132         line = line[0:-1]
    133         if line[0:1] == "#":
    134                 continue
    135 
    136         # Replace double spaces
    137         line = line.replace("  "," ")
    138 
    139         parts = line.split(" ")
    140         if not len(parts) == 4:
    141                 print "Invalid line '%s'" % line
    142 
    143         pc = {}
    144         pc["outer"] = parts[2]
    145         pc["inner"] = parts[3]
    146         pc["raw"] = "%s %s" % (parts[2],parts[3])
    147         pc["raw_outer"] = parts[2]
    148         pc["raw_inner"] = parts[3]
    149 
    150         pc["easting"] = None
    151         pc["northing"] = None
    152         pc["ie_easting"] = None
    153         pc["ie_northing"] = None
    154 
    155         # Turn lat+long into easting+northing
    156         # All NI postcodes are BT
    157         if pc["outer"][0:2] == 'BT':
    158                 osll = turn_wgs84_into_osie36(parts[0], parts[1], 0)
    159                 en = turn_osie36_into_eastingnorthing(osll[0], osll[1])
    160                 pc["ie_easting"] = en[0]
    161                 pc["ie_northing"] = en[1]
    162         else:
    163                 osll = turn_wgs84_into_osgb36(parts[0], parts[1], 0)
    164                 en = turn_osgb36_into_eastingnorthing(osll[0], osll[1])
    165                 pc["easting"] = en[0]
    166                 pc["northing"] = en[1]
    167 
    168         postcodes.append(pc)
    169 
    170 
    171 # Grab all of the current ones
    172 sql = "SELECT id, outward, inward, deleted FROM postcodes WHERE source = %s AND NOT deleted"
    173 sth = dbh.cursor()
    174 sth.execute(sql, ftp_source)
    175 
    176 ftps = {}
    177 while 1:
    178         row = sth.fetchone()
    179         if row == None:
    180                 break
    181         id, outward, inward, deleted = row
    182         ftps["%s %s" % (outward, inward)] = \
    183                                 { 'outward':outward, 'inward':inward, 'id':id, 'deleted':deleted, 'done':False }
    184 sth.close()
    185 count = len(ftps.keys())
    186 
    187 print "There are currently %d entries in the database from freethepostcode.org" % count
    188 print "The new import contains %d entries" % len(postcodes)
    189 
    190 # Only prompt if it's a big difference
    191 if confirm_update:
    192         if (count == 0 or len(postcodes) == 0 or
    193                         len(postcodes) < count or (len(postcodes)-count) > 50):
    194                 print "Are you sure you wish to run an import?"
    195                 confirm = raw_input("")
    196                 print ""
    197 
    198                 if confirm == "y" or confirm == "yes":
    199                         # Good, go ahead
    200                         pass
    201                 else:
    202                         print ""
    203                         raise Exception("Aborting import")
    204 else:
    205         # Don't trash everything even with --no-confirm
    206         if len(postcodes) == 0 or abs(len(postcodes)-count) > 100:
    207                 raise Exception("Postcode count too different, not running (re-run without --no-confirm to allow")
    208 
    209 
    210 # Add the latest list to the database, tweaking if already there
    211 add_sql = "INSERT INTO postcodes (outward, inward, raw_postcode_outward, raw_postcode_inward, easting, northing, ie_easting, ie_northing, source) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
    212 upd_sql = "UPDATE postcodes SET outward=%s, inward=%s, raw_postcode_outward=%s, raw_postcode_inward=%s, easting=%s, northing=%s, ie_easting=%s, ie_northing=%s WHERE id=%s"
    213 del_sql = "UPDATE postcodes SET deleted='t', delete_reason=%s WHERE id=%s"
    214 sth = dbh.cursor()
    215 worked = 0
    216 
    217 # Add the postcodes
    218 for postcode in postcodes:
    219         pc = "%s %s" % (postcode["outer"], postcode["inner"])
    220         if verbose:
    221                 print "Processing %s" % pc
    222 
    223         if ftps.has_key(pc):
    224                 # Update existing one
    225                 ftps[pc]['done'] = True
    226                 sth.execute(upd_sql, (postcode["outer"], postcode["inner"], postcode["raw_outer"], postcode["raw_inner"], postcode["easting"], postcode["northing"], postcode["ie_easting"], postcode["ie_northing"], ftps[pc]['id']))
    227                 if verbose:
    228                         print "\tupdated record at %d" % ftps[pc]['id']
    229         else:
    230                 # New record, add
    231                 sth.execute(add_sql, (postcode["outer"], postcode["inner"], postcode["raw_outer"], postcode["raw_inner"], postcode["easting"], postcode["northing"], postcode["ie_easting"], postcode["ie_northing"], ftp_source))
    232                 if verbose:
    233                         print "\tadded new postcode"
    234                 else:
    235                         print "Added postcode %s" % pc
    236         worked = worked + 1
    237 
    238 print "Processed %d entries" % worked
    239 
    240 
    241 # Find ones that have gone
    242 if verbose:
    243         print "\nDeleting any postcodes no longer in FTP:"
    244 for gone_pc in [ pc for pc in ftps.keys() if not ftps[pc]['done'] ]:
    245         print "\tflagging as deleted old FTP postcode %s" % gone_pc
    246         sth.execute(del_sql, (delete_reason, ftps[gone_pc]['id']))
    247 
    248 # All done
    249 if verbose:
    250         print "\nAll Done"
    251 sth.close()
    252 dbh.commit()
    253 dbh.close()
     37from generic_importer import FreeThePostcodeImporter
     38ftp = FreeThePostcodeImporter()
     39ftp.handle_arguments(sys.argv)
     40ftp.process()
Note: See TracChangeset for help on using the changeset viewer.