Changeset 736 for trunk


Ignore:
Timestamp:
Feb 24, 2010, 1:25:27 PM (10 years ago)
Author:
Nick Burch
Message:

Re-jig how we handle deleted entries in the import, so we can handle postcodes that go away and then come back

Location:
trunk/npemap.org.uk/scripts/generic-python-import
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/npemap.org.uk/scripts/generic-python-import

    • Property svn:ignore
      •  

        old new  
        11*.swp
        22*.pyc
         3.vimrc
  • trunk/npemap.org.uk/scripts/generic-python-import/generic_importer.py

    r735 r736  
    193193
    194194                # Grab all of the current ones, including deleted ones
    195                 sql = "SELECT id, outward, inward, deleted FROM postcodes WHERE source = %s ORDER BY deleted DESC"
     195                # Make sure that un-deleted ones come first, so that if one postcode
     196                #  has deleted and undeleted forms, we prefer the undeleted form
     197                sql = "SELECT id, outward, inward, deleted, delete_reason FROM postcodes WHERE source = %s ORDER BY deleted"
    196198                sth = dbh.cursor()
    197199                sth.execute(sql, source_id)
    198200
    199                 # Loop over all rows. Non-deleted come last, so it doesn't matter
    200                 #  if we override early occurances of a postcode
    201201                spcs = {}
    202202                while 1:
     
    204204                        if row == None:
    205205                                break
    206                         id, outward, inward, deleted = row
    207                         spcs["%s %s" % (outward, inward)] = \
    208                                                 { 'outward':outward, 'inward':inward, 'id':id, 'deleted':deleted, 'done':False }
     206                        id, outward, inward, deleted, delete_reason = row
     207
     208                        postcode = "%s %s" % (outward, inward)
     209                        if not spcs.has_key(postcode):
     210                                spcs[postcode] = {
     211                                        'outward':outward, 'inward':inward, 'id':id, 'done':False,
     212                                        'deleted':deleted, 'delete_reason': delete_reason, }
    209213                sth.close()
    210214                count = len(spcs.keys())
    211 
    212                 # And for interest, the count of deleted ones
    213                 sql = "SELECT COUNT(distinct outward||inward) FROM postcodes WHERE source = %s AND deleted"
     215                deleted_count = len([a for a in spcs.values() if a['deleted']])
     216
     217                # And for interest, grab the raw deleted count
     218                sql = "SELECT COUNT(id) FROM postcodes WHERE source = %s AND deleted"
    214219                sth = dbh.cursor()
    215220                sth.execute(sql, source_id)
    216                 deleted_count = sth.fetchone()[0]
     221                raw_deleted_count = sth.fetchone()[0]
    217222                sth.close()
    218223
    219                 print "There are currently %d entries in the database from the %s." % (count, self.source_name)
     224                print "There are currently %d distinct entries in the database from the %s." % (count, self.source_name)
    220225                print " Of these, %d are live, and %d are deleted" % (count-deleted_count, deleted_count)
     226                print " There are also %d raw deleted entries (may include duplicates)" % (raw_deleted_count)
    221227                print "The new import contains %d entries" % len(postcodes)
    222228
     
    243249                # Add the latest list to the database, tweaking if already there
    244250                add_sql = "INSERT INTO postcodes (outward, inward, raw_postcode_outward, raw_postcode_inward, easting, northing, ie_easting, ie_northing, source) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
    245                 upd_sql = "UPDATE postcodes SET outward=%s, inward=%s, raw_postcode_outward=%s, raw_postcode_inward=%s, easting=%s, northing=%s, ie_easting=%s, ie_northing=%s WHERE id=%s"
     251                upd_sql = "UPDATE postcodes SET outward=%s, inward=%s, raw_postcode_outward=%s, raw_postcode_inward=%s, easting=%s, northing=%s, ie_easting=%s, ie_northing=%s, deleted='f', delete_reason=NULL WHERE id=%s"
    246252                del_sql = "UPDATE postcodes SET deleted='t', delete_reason=%s WHERE id=%s"
    247253                sth = dbh.cursor()
     
    255261
    256262                        if spcs.has_key(pc):
    257                                 # If it's previously been deleted, just skip it
     263                                # If it's previously been deleted, just skip it, unless
     264                                #  it's one that went and came back
    258265                                if spcs[pc]['deleted']:
    259                                         if self.verbose:
    260                                                 print "\tSkipping as previously deleted (id %d)" % spcs[pc]['id']
    261                                         continue
     266                                        if spcs[pc]['delete_reason'] == reason_id:
     267                                                if self.verbose:
     268                                                        print "\tPreviously missing postcode has returned (id %d)" % spcs[pc]['id']
     269                                        else:
     270                                                if self.verbose:
     271                                                        print "\tSkipping as previously deleted - %s (id %d)" % (spcs[pc]['delete_reason'], spcs[pc]['id'])
     272                                                continue
    262273
    263274                                # Otherwise, update the existing entry
     
    281292                if self.verbose:
    282293                        print "\nDeleting any postcodes no longer in source:"
    283                 for gone_pc in [ pc for pc in spcs.keys() if not spcs[pc]['done'] ]:
     294                gone = [ pc for pc in spcs.keys() if not spcs[pc]['done'] and not spcs[pc]['deleted'] ]
     295                for gone_pc in gone:
    284296                        print "\tflagging as deleted old postcode %s" % gone_pc
    285297                        sth.execute(del_sql, (reason_id, spcs[gone_pc]['id']))
Note: See TracChangeset for help on using the changeset viewer.