# PARAMETERS THAT CHANGE transmindateflag = 2 # if 1 then check whether any trans # pipe has an install date # that is greater than pipes on either side of it nearby # and put into transmission_toonew.csv # if 2 then replace by the dates suggested and put suggestions in transmission_toonew.csv # if 0 (don't use any more), # then don't change trans date due to other transes. transfixedflag = 2 # if 1 then don't change the dates on the trans pipes # due to dists # if 0 then allowed to change based on dists # if 2, then allow changes if done by certain (N) # pipes but not otherwise gapthresh = 20 # if a suggested change is more than gapthresh, then # it is a likely replacement lengthgapallowed = 1.05 # if carriedinstalldate of two paths to pipe p are equal # but one path has a length that is more than a factor of # lengthgapallowed different, then choose the shorter length. floorto = 10 # if 10, then go down to 10s digit in all processing; if 5 then lower 5 int(floorto*math.floor(x/floorto)) import csv # Parameters to Change Based on Input File # (i.e. first pipeid, then transdistflag, then x1, ...) # If order changes, just change these index values indata = 'pipedatein' # name of file input thresh = 0.001 # threshold of differences for two pipes to be connected, probably fixed pipein = csv.reader(open(indata, 'rb'), delimiter =',', quotechar='"') pipeid = 0 transdistflag = 1 x1 = 2 y1 = 3 x2 = 4 y2 = 5 diam = 6 mat = 7 pipelength = 8 # installdate = 7 # for my test data installdate = 10 # for feb 28, 2015 this is year of installation but would # work also if we had something like 20150403 estimated = 11 # if 'N' then they believe this date of installation if 'Y' they don't # This algorithm works in two phases: # Phase 0: adjust trans pipes based on neighboring transes. # This might mean declaring some transes to be suspected replicates # (if younger by more than gapthresh) or just inconsistent with # neighbors (if younger than both neighbors for exampe) # Depth first search phase: flatten the dates based on floorto and then # compute best paths. This is somewhat modified by the certainarr. # This phase works downstream starting at trans pipes. # Upstream phase: Then come up from downstream pipes to adjust upstream ones. # Certain pipes are treated first and then other pipes. # Notes: for depthfirstsearch don't go two ways from any pipe and count it. # Otherwise suppose A has two neighbors B and C could go from A to B # and then from B to C. # Maybe simply change while (j >= 0) & (j not in traversedpipes) to # while (j not in traversedpipes) in compress function # If pipe p has date d and the minimum dated path goes through # pipes having date > d + 20 (called the newbies), then all newbies are # replacements. # In that case, I will consider the original dates of the newbies to be 0 # (as if the dates were unknown # at the beginning). # 15340 as target has no transmission. # New scheme doesn't use material at all. # Instead, we find the earliest carredinstalldate to each pipe and record # the transmission pipe that got us there. # If that pipe is older than anything on its path, having date d, then make sure # every ancestor pipe (up towards trans) has a date that is d or less. # That is, make ancestors at least as old. # This includes the trans. # A suggested change becomes a possible replacement if the difference # with the initial DOI is 20 years at least (a parameter gapthresh). # At the end we are guaranteed a coherent path in which every pipe nearer # the transmission has an earlier (or no later date) by construction. # Other things to do with trans: for each trans, if it has more than one # neighbor, then see whether it is newer than the minimum age neighbor # for all of its neighbors. # If so, then flag is as too new and a possible replacement. # Pipe Algorithm # Basic rule: for a pipe p to have date of installation d, there must # be a pipe path from a transmission line to p where every date <= d. # An edge/segment in a pipe path can go only from diameter x to a diameter # smaller than or equal to x. Also there cannot be an edge from a distribution # pipe to a transmission pipe. # Assume the data is in the form: # pipeid, # Transmission or Distribution # x1 # y1 # x2 # y2 # diam # date of installation # So first read and parse this into several arrays. # Next, sort based on x1, y1, pipeid # Next, sort based on x2, y2, pipeid # Now for x1, y1 associated with pipe p find all pipes p' # (different from p) such that p' has either the same x1, y1 or the same # x2, y2. # Ditto for each x2, y2 associated with p. # Every time, you find pipes connected, then define an edge from p to p' # if diam(p) >= diam(p') and # (either p is transmission or p' is not transmission) # So now we have associated with each pipe its date of installation. # Need some rule to decide whether the pipe is a replacement. # Use depth first search as follows. Start from the pipes along # the transmission line. # Assign an initial "path installation date" of 5000 to each pipe # off the transmission lines. # Do depth first search starting from the transmission line. # When we arrive at a pipe p in the depth first search, we look at the path # installation date the DFS algorithm is carrying along and compare it # with the path installation date of the pipe as it is. # If the carried path installation date <= path installation date for p, # then replace the path installation date for p by the max of the # carried path installation and the date on p. # Keep track of how we got to p, i.e. from which route. # Now look at all pipes whose path installation date > installation date for p. # Those are bad. # Now trace backwards on those paths to find out which pipes are witnesses # of the badness of p (i.e. all pipes on that path whose instllation date # > installation date for p). # Get a count for each such accusing pipe. # The ones that accuse the most are the ones we want to focus on. # When the depth first search arrives at a pipe it updates its path # installation date then it looks for all neighbors of that pipe and proceeds. # Ok, now if a pipe is an accusee but not an accuser, then it might be # incorrectly dated as too early. # If a pipe is an accuser but not an accusee, then it might be a replacement. # For example, if I have a sequence 1910 .. 1920 .. 1910, then the 1920 pipe # will be an accuser but not an accusee. # Theorem: if we decrease a pure accuser's pipe's P's # age to the age of the path to that # accuser, this cannot cause cause any new accusations. # Proof: If P has its time decreased to that extent, # then it will not be accused and can only reduce accusations downstream. # # Theorem: If we increase a pure accusee's pipe's P's # age to the age of that path to P, then this cannot cause any new accusations. # Proof: P will be fine of course and any descendant of P will have # the same pathinstalldate. # # Note however that if we have pipes in sequence with dates: # 1910 1925 1910 1920 1910, then the 1925 is a pure accuser, so it will # be changed to 1910. In the meantime, the 1910s are pure accusees so they # will be raised to 1925, but the 1920 is both, so it won't change. # There is an option called the takesuggestions flag that will read # suggestions.csv and change dates accordingly. # Preparation for predicting breaks import sys import math import os import copy import operator import doctest import collections import datetime import random from operator import itemgetter, attrgetter sys.setrecursionlimit(20000) # ??? might have to extend for different cities # alternatively, the depth first search algorithm could work like this: # set up a workqueue. # If a pipe causes a change to some carriedinstalldate, # then add its neighbors to the work queue unless # they are already there. Continually try to remove a node from # the work queue and deal with it. now = datetime.datetime.now() currentyear = now.year # APPLICATION-SPECIFIC # Convert two digit and four digit years def convertyear(num): outnum = num if num <= (currentyear % 100): outnum+= 2000 elif (num > (currentyear % 100)) and (num < 100): outnum+= 1900 return outnum # Figure out which direction edges should go between edge i and edge j # where i and j are simply indexes into our arrays. # Side effects to myedges1 # ??? Right now we have no edges between transmission pipes. # ??? Right now we've eliminated the dependency on diameter def adjustedges(i,j): if (transdistflagarr[i] == 'TRANS') and (transdistflagarr[j] == 'TRANS'): mytransedges1.append(i) mytransedges1.append(j) mytransedges2.append(j) mytransedges2.append(i) if (transdistflagarr[i] == 'DIST') or (transdistflagarr[j] == 'DIST'): # at least one must be distribution myedges1.append(i) myedges2.append(j) myedges1.append(j) myedges2.append(i) # if called for, make the installdate of all neighboring transmission pipes # myinstalldate if myinstalldate is the least (oldest) def transdepthfirstsearch(i, myinstalldate): if (i in hashedtrans): myneighs = set(hashedtrans[i]) # print "In transdepthfirstsearch, pipe ", i, " is a ", transdistflagarr[i], " having neighbors ", myneighs, " and install date ", originstalldatearr[i] for j in myneighs: if(installdatearr[j] > myinstalldate): # print "trans pipe ", pipeidarr[j], " changes install date from ", installdatearr[j], " to ", myinstalldate installdatearr[j] = myinstalldate transdepthfirstsearch(j, myinstalldate) # if trans pipe i has a neighbor that is at least gapthresh older, then change this pipe's installdatefortransmin # to the greatest of all its neighbors that is at least gapthresh older # Do not propagate # Note that pipe i is a transpipe that is suspected of needing an adjustment def transflaggap(i): if i in hashedtrans: # print "in transflaggap i = ", i, " and trans neighbors of i are ", hashedtrans[i] mytransneighs = set(hashedtrans[i]) if (1 < len(mytransneighs)): olderyears = [] for j in mytransneighs: if ((installdatefortransmin[j] + gapthresh) <= installdatefortransmin[i]): olderyears.append(installdatefortransmin[j]) priorityvote[j]+= 1 if 0 < len(olderyears): transchanged = 1 # surrounded by pipes at least gapthresh older # print "Pipe ", i, " changed from ", installdatefortransmin[i], " to ", max(olderyears) if (certainarr[i] == 0): installdatefortransmin_tmp[i] = max(olderyears) # always goes down if (pipeidarr[i] == '13692') and (1902 == max(olderyears)): for k in mytransneighs: print 'transflaggap: installdatefortransmin[k]:', installdatefortransmin[k], ' pipeidarr[k]:', pipeidarr[k] newneedchecks.append(i) for k in mytransneighs: if (installdatefortransmin[k] > installdatefortransmin_tmp[i]): newneedchecks.append(k) priorityvote[i]+= 1 # if trans pipe i has at least two trans neighbors and some descendant # of those neighbors is older than pipe i, then pipe i has a bad date def transflag(i, myinstalldate): if (i in hashedtrans): myreason = "" if (installdatefortransmin[i] < installdatearr[i]): myreason+= "The year is reduced because difference with near neighbors exceeds "+str(gapthresh) + ". " mytransneighs = set(hashedtrans[i]) if (1 < len(mytransneighs)): allminyears = [] for j in mytransneighs: # allminyears.append(findminyear([i], j, installdatearr[i])) allminyears.append(installdatefortransmin[j]) if (installdatefortransmin[i] > max(allminyears)) and (1 < len(allminyears)): # surrounded by older pipes installdatefortransmin[i] = max(allminyears) if (pipeidarr[i] == '13692') and (1902 == max(allminyears)): for k in mytransneighs: print 'transflag: installdatefortransmin[k]:', installdatefortransmin[k], ' pipeidarr[k]:', pipeidarr[k] # print "trans pipe ", pipeidarr[i], " having install date of ", installdatefortransmin[i], " should have install date of", installdatefortransmin[i], "based on near neighbors of:", allminyears myreason+= " Neighboring pipes having years within " + str(gapthresh) + " pull down the date of this pipe to " + str(installdatefortransmin[i]) + "." transmission_toonew.writerow([pipeidarr[i], installdatearr[i], installdatefortransmin[i], myreason]) elif (installdatefortransmin[i] < installdatearr[i]): transmission_toonew.writerow([pipeidarr[i], installdatearr[i], installdatefortransmin[i], myreason]) # find min year of neighbors of alreadyseen TRANS starting with j # and change mininstallsofar if you find something less # ??? notice that we're looking only at a local neighborhood def findminyear(alreadyseen, j, mininstallsofar): mytransneighs = set(hashedtrans[j]) mininstallsofar = min(mininstallsofar, installdatearr[j]) # print "Node: ", j for k in mytransneighs: if not k in alreadyseen: x = copy.deepcopy(alreadyseen) # print "alreadyseen is: ", alreadyseen x.append(k) # print "x is: ", x, "k is: ", k if (10 > len(x)): # only look at a local neighborhood mininstallsofar = min(mininstallsofar, findminyear(x, k, mininstallsofar)) return mininstallsofar # Depth first search starting from a node and adjusts pathinstalldatearr # Idea here is that pathinstalldatearr represents the minimum year that # a pipe could be installed by assuming that a pipe cannot be installed # before a path to that pipe has been built. # pathinstalldatearr starts with a high value, but depth first search # tries to bring it down. # carriedinstalldate is the date of the current depth first search path. # pipe having latest date will be an id (the one responsible for carriedinstalleddate) # if two paths have the same carriedinstalldate but one path is shorter # by a factor of lengthgapallowed or more, then we change paths. # Carriedinstalldate is not changed if a pipe's installdate is the # badpathinstalldate or if its installdate is at least gapthresh more # than carriedinstalldate. # if the transmission pipe of i has been changed, we have to propagate that # to children # pathcarriedlength is the analogy to pathinstalldatearr and is used # when there are tie-breakers to pathinstalldatearr # The certainflag is set to 1 for the path if we have run across an 'N' # (i.e. non-estimated and therefore correct) pipe. In such a case, the # carriedinstalldate goes to the N pipe and is raised only if there are other # N pipes. # Here is full algorithm: Now, what we do is use normal carried install date # except that whenever we see an N, we reset carriedinstall # date to be the one of that N. # We keep going using that N as carried install date # unless modified by another N. # Let's say that afer an N we see a Y and the Y has a # greater date (is younger), we don't change anything. # We make sure that we don't revisit already visited pipes on this path. def depthfirstsearch(i, carriedinstalldate, carriedlength, pipehavinglatestdate, transhaschanged, certainflag, traversedpipes): # invariant: there is a path from a transmission pipe to i. # true on first call. if (i in hashed): myneighs = hashed[i] for j in myneighs: if (not transdistflagarr[j] == 'TRANS') and ((pathinstalldatearr[j] > carriedinstalldate) or ((pathinstalldatearr[j] == carriedinstalldate) and (pathcarriedlength[j] > lengthgapallowed * carriedlength)) or ((pathinstalldatearr[j] == carriedinstalldate) and (pathcarriedlength[j] >= carriedlength) and (transhaschanged == 1) and (not mytransmissionpipe[i] == mytransmissionpipe[j])) or ((pathinstalldatearr[j] == carriedinstalldate) and (pathcarriedlength[j] >= carriedlength) and (mytransmissionpipe[i] == mytransmissionpipe[j]) and (disttotrans[j] > disttotrans[i] + 1))): if (not j in traversedpipes): # without the above we might have a downstream # certain pipe whose date is earlier than an # upstream pipe and then the downstream pipe # will try to change the date of the upstream # pipe. We want to avoid that. # The problem is that this copy operation is a killer newtraversedpipes = copy.copy(traversedpipes) newtraversedpipes.append(j) # print "Visiting pipe index", j, " with global id ", pipeidarr[j] # print "having pathinstalldatearr value", pathinstalldatearr[j] # print " and having pathcarriedlength value", pathcarriedlength[j] # print "from pipe index ", i, " with global id ", pipeidarr[i] # print "with carriedinstalledate", carriedinstalldate # print "with carriedlength", carriedlength # print "with transhaschanged", transhaschanged # print "trans of i is", mytransmissionpipe[i] # print "trans of j is", mytransmissionpipe[j] # print "Have already visited:" if (prevpipearr[i] == j) : print "depthfirstsearch: Make parent of ", pipeidarr[j], " be ", pipeidarr[i] print "carriedinstalldate is ", carriedinstalldate print "pathinstalldatearr[j] is ", pathinstalldatearr[j] print "carriedlength is ", carriedlength print "pathcarriedlength[j] is ", pathcarriedlength[j] print "mytransmissionpipe[j] is ", pipeidarr[mytransmissionpipe[j]] print "mytransmissionpipe[i] is ", pipeidarr[mytransmissionpipe[i]] print "disttotrans[i] is ", disttotrans[i] print "disttotrans[j] is ", disttotrans[j] print "certainflag is ", certainflag print "certainarr[j] is ", certainarr[j] print "certainarr[i] is ", certainarr[i] print "installdatearr[j] is ", installdatearr[j] print "installdatearr[i] is ", installdatearr[i] pathinstalldatearr[j] = carriedinstalldate # date of path coming to this node pathcarriedlength[j] = carriedlength # date of path coming to this node prevpipearr[j] = i if mytransmissionpipe[j] == mytransmissionpipe[i]: z = 0 else: z = 1 mytransmissionpipe[j] = mytransmissionpipe[i] disttotrans[j] = disttotrans[i] + 1 # invariant: j is reachable from a transmission pipe if ((certainarr[j] == 0) and (certainflag == 0)) or ((certainarr[j] == 1) and (certainflag == 1)): # either we are in the case that we've seen only # uncertains (Y to estimated) or we have seen a certain # and this one is certain. In either case, we can # update this pipe and the carriedinstalldate newcertainflag = certainflag # doesn't change x = pathinstalldatearr[j] if (installdatearr[j] == badpathinstalldate): # pipe without a date minsuggestdatearr[j] = x piperespforchange[j] = pipehavinglatestdate # this the id of the pipe that is responsible # for carriedinstalldate elif (installdatearr[j] >= (gapthresh + x)) and (certainarr[j] == 0): # the installdate is suspiciously large # (so large it looks (for estimated pipes) # like a replacement). # Take the date from the path. piperespforchange[j] = pipehavinglatestdate # do not take current pipe else: y = installdatearr[j] if (x < y): pipehavinglatestdate = j # needed for pipes having no ids piperespforchange[j] = j # going forward x = y if ((certainarr[j] == 1) and (certainflag == 0)): # j is our first certain # we use j's date for carried install date going forward x = installdatearr[j] # Effective date for posterity is # installdate of j. Don't worry about gap. # pathinstalldatearr[j] = min(installdatearr[j], pathinstalldatearr[j]) # THIS ONE IS KEY: otherwise we can have # loops in which the child of j comes back # and changes j using the pathinstalldate[j] # which, until this assignment could be larger # than j. # This is ok, because all upstream pipes from j # will be as old as j or older. # Maybe we don't want though because then # this will cut off other methods of getting # to j. We avoid the problem of j modifying # upstream pipes with our traversedpipes # idea. pipehavinglatestdate = j piperespforchange[j] = j # going forward # that will be the current pipe j newcertainflag = 1 if ((certainarr[j] == 0) and (certainflag == 1)): # we have been certain and this pipe is not, so just # use carriedinstalldate x = carriedinstalldate piperespforchange[j] = pipehavinglatestdate newcertainflag = 1 if (prevpipearr[i] == j): print " depthfirstsearch(j, x, carriedlength + pipelengtharr[j], pipehavinglatestdate, z, newcertainflag, newtraversedpipes) ", j, x, carriedlength + pipelengtharr[j], pipehavinglatestdate, z, newcertainflag, newtraversedpipes depthfirstsearch(j, x, carriedlength + pipelengtharr[j], pipehavinglatestdate, z, newcertainflag, newtraversedpipes) # This will find all pipes A that are involved in accusing a given pipe # of being too old (because pipes in A are younger). # i is a pipeid of a previous pipe. Side effect to accusers. def findaccusers(i, myinstalldate, accusedpipe): if(i >= 0): # This indicates that this is a real pipe if (installdatearr[i] > myinstalldate): accusers.append(i) accused.append(accusedpipe) findaccusers(prevpipearr[i], myinstalldate, accusedpipe) # compressprevpipe # Here what we do is look at the prevpipearr and see if we can # cut out edges that needn't be there. # we might have something like (3,4) (5,6); (7,8) (5,6); (6,9) (7,8); (5,6) (10,11). # In that case, we cut out the pipes having (7,8) (5,6); (6,9) (7,8) # We do this pipe by pipe. def compressprevpipe(): i = 0 while i < len(pipeidarr): traversedpipes = [] k = prevpipearr[i] if (k >= 0): if not ((mytransmissionpipe[i] == mytransmissionpipe[k]) and (disttotrans[i] == disttotrans[k] + 1)): print "Pipe", pipeidarr[i], "is at distance", disttotrans[i] print "Previous pipe", pipeidarr[k], "is at distance", disttotrans[k] while (k >= 0) and (k not in traversedpipes): # not a transmission path traversedpipes.append(k) j = prevpipearr[k] # interested starting with grandparents of i if (i in hashed): if (pipeidarr[i] == '13166') and (pipeidarr[k] == '13410') and (pipeidarr[j] == '13673'): print 'order is', pipeidarr[i], pipeidarr[k], pipeidarr[j] print 'mytransmissionpipe[i]', mytransmissionpipe[i] print 'mytransmissionpipe[k]', mytransmissionpipe[k] print 'mytransmissionpipe[j]', mytransmissionpipe[j] print 'disttotrans[i]', disttotrans[i] print 'disttotrans[k]', disttotrans[k] print 'disttotrans[j]', disttotrans[j] print 'pipeidarr[j] is a neighbor of pipeidarr[i]', j in hashed[i] if (j in hashed[i]) and (mytransmissionpipe[j] == mytransmissionpipe[i]) and (disttotrans[i] > disttotrans[j]) and (disttotrans[k] > disttotrans[j]) and (prevpipearr[k] == j): print "In compressprevpipe, changed previous pipe of ", pipeidarr[i], " from ", pipeidarr[prevpipearr[i]], " to ", pipeidarr[j] print "In compressprevpipe, changed distance of ", pipeidarr[i], " from ", disttotrans[i], "to",disttotrans[j] + 1 prevpipearr[i] = j disttotrans[i] = disttotrans[j] + 1 k = j # continue try to shortening i+= 1 # As of now, the return value is not really used # pipes on the path to trans or if those certain pipes all have at least # as old a date else return year of newest pipe # (if there are certain pipes having new dates) # side effect: if the date of this pipe (uncertainpipe) # is less than the date of the nearest upstream certain pipe (or trans pipe) # then make sure uncertainpipe's minsuggestdate is at least the age of that # upstream certain pipe's minsuggestdate def checkforcertains(parent, uncertainpipe, yoi_accused): flag = 1 while(flag == 1): if(certainarr[parent] == 1) or ((transdistflagarr[parent] == 'TRANS') and (not transfixedflag == 0)): if (minsuggestdatearr[parent] > minsuggestdatearr[uncertainpipe]): # print "Checkforcertains: pipe ", pipeidarr[uncertainpipe], " changed from ", minsuggestdatearr[uncertainpipe], " to ", minsuggestdatearr[parent], " because of pipe ", pipeidarr[parent], " having trans/dist flag", transdistflagarr[parent] minsuggestdatearr[uncertainpipe] = minsuggestdatearr[parent] piperespforchange[uncertainpipe] = parent return 1 flag = 0 elif ((transdistflagarr[parent] == 'TRANS') and (transfixedflag == 0)): flag = 0 # just stop. No reason to change uncertainpipe. parent = prevpipearr[parent] return 0 # check that path upstream from startingpipe is coherent def checkforcoherence(parent, startingpipe, traversedpipes): while(parent >= 0): if (minsuggestdatearr[parent] > minsuggestdatearr[startingpipe]): return parent parent = prevpipearr[parent] # print "Pipe ", pipeidarr[startingpipe], " is coherent." return 0 # if flag is 1 then return 'N' else return 'Y' def convertcertain(flag): if (flag == 1): return 'N' else: return 'Y' # This will ensure that no pipe in the reverse path from an old pipe # to the transmission line will have a date that is more recent # than that old pipe (June 28, 2014). # If there is a gap of gapthresh then the pipe that is gapthresh newer # is a likely replacement. # i is a pipeid of a previous pipe. Side effect to badpaths. # alfreadyseenflag: has accused pipe already been printed. # traversedpipes are Pipes already visited in the march towards a transmission. # make badpath be local and be passed just like traversedpipes. # If transfixedflag == 0 then allow trans pipes to have their # dates changed because of non-certain dists. Otherwise treat trans pipes as # certain. # When encountering a certain pipe, # force all downstream pipes to later dates if necessary. # If an upstream pipe is to be changed, then add to the priority of both # the downstream pipe and the upstream pipe. # Give priority to pipes based on how much it accuses or is accused. def findbestbadpath_time(i, accusedpipe, alreadyseenflag, oldestmaterialtype, traversedpipes): if (alreadyseenflag == 0): badpath.append([pipeidarr[accusedpipe], transdistflagarr[accusedpipe], matarr[accusedpipe], diamarr[accusedpipe], originstalldatearr[accusedpipe], minsuggestdatearr[accusedpipe], disttotrans[accusedpipe], convertcertain(estimatedarr[accusedpipe]), ' ', ' ', ' '] ) # eventually will use certainarr alreadyseenflag = 1 if (i in traversedpipes): k = 0 print "Seems to be a cycle in findbestbadpath_time involving pipe ", pipeidarr[i] while (k < len(traversedpipes)): print "findbestbadpath_time: Pipe", pipeidarr[traversedpipes[k]], "has distance", disttotrans[traversedpipes[k]], "and transmission pipe", mytransmissionpipe[traversedpipes[k]] k+= 1 # if((i >= 0) and (i not in traversedpipes)): if((i >= 0) ): # could work for transmission # This indicates that this is a real pipe and not already seen traversedpipes.append(i) # First check if this particular pipe i could be a replacement # If so, adjust its status. oldmin = minsuggestdatearr[i] if (minsuggestdatearr[accusedpipe] < minsuggestdatearr[i]) and ((transdistflagarr[i] == 'DIST') or (transfixedflag == 0)): minsuggestdatearr[i] = minsuggestdatearr[accusedpipe] # This could cause a pipe to get an earlier date piperespforchange[i] = accusedpipe priorityvote[i]+= 1 priorityvote[accusedpipe]+= 1 # print "Suggest earlier date for", pipeidarr[i], "due to", pipeidarr[accusedpipe] badpath.append([' ', ' ', ' ', pipeidarr[i], transdistflagarr[i], matarr[i], diamarr[i], originstalldatearr[i], minsuggestdatearr[i], disttotrans[i], convertcertain(estimatedarr[i]) ] ) # eventually will use certainarr if (i > 0): # don't recurse if at trans findbestbadpath_time(prevpipearr[i], accusedpipe, 1, oldestmaterialtype, traversedpipes) else: badpath.append(['Date for', pipeidarr[accusedpipe], ' is ', originstalldatearr[accusedpipe], ' ', ' ', ' ', ' ']) if (pathinstalldatearr[accusedpipe] == badpathinstalldate): badpath.append(['It seems that ', pipeidarr[accusedpipe], ' having diameter ', diamarr[accusedpipe], 'has no path from a transmission pipe', ' ', ' ', ' ']) priorityvote[accusedpipe]+= 1 if (i == accusedpipe): badpath.append(['Pipe ', pipeidarr[i], ' seems to have parent ', pipeidarr[prevpipearr[i]], ' but has no path to transmission.', ' ', ' ', ' ']) # Here we are starting from a certain node (estimated = 'N') and we # can roll all the way up through all nodes including transmissions (except we don't include # transmissions if transfixedflag is 1). def findbestbadpath_time_certain(i, certainpipe, alreadyseenflag, oldestmaterialtype, traversedpipes): if (alreadyseenflag == 0): alreadyseenflag = 1 if (i in traversedpipes): k = 0 print "Seems to be a cycle in findbestbadpath_time_certain" while (k < len(traversedpipes)): print "findbestbadpath_time_certain: Pipe", pipeidarr[traversedpipes[k]], "has distance", disttotrans[traversedpipes[k]], "and transmission pipe", pipeidarr[mytransmissionpipe[traversedpipes[k]]], " starting from certain pipe ", pipeidarr[certainpipe] k+= 1 if((i >= 0) ): # could work for transmission # This indicates that this is a real pipe and not already seen traversedpipes.append(i) # First check if this particular pipe i could be a replacement # If so, adjust its status. oldmin = minsuggestdatearr[i] if (minsuggestdatearr[certainpipe] < minsuggestdatearr[i]) and (transdistflagarr[i] == 'DIST' or transfixedflag == 0 or transfixedflag == 2): minsuggestdatearr[i] = minsuggestdatearr[certainpipe] # This could cause a pipe to get an earlier date piperespforchange[i] = certainpipe priorityvote[i]+= 1 print "Suggest earlier date for", pipeidarr[i], "due to", pipeidarr[certainpipe] badpath.append([' ', ' ', ' ', pipeidarr[i], transdistflagarr[i], matarr[i], diamarr[i], originstalldatearr[i], minsuggestdatearr[i], disttotrans[i], convertcertain(estimatedarr[i]) ] ) # eventually will use certainarr if (i > 0): # don't recurse on trans findbestbadpath_time_certain(prevpipearr[i], certainpipe, 1, oldestmaterialtype, traversedpipes) else: badpath.append(['Date for', pipeidarr[certainpipe], ' is ', originstalldatearr[certainpipe], ' ', ' ', ' ', ' ']) if (pathinstalldatearr[certainpipe] == badpathinstalldate): badpath.append(['It seems that ', pipeidarr[certainpipe], ' having diameter ', diamarr[certainpipe], 'has no path from a transmission pipe', ' ', ' ', ' ']) if (i == certainpipe): badpath.append(['Pipe ', pipeidarr[i], ' seems to have parent ', pipeidarr[prevpipearr[i]], ' but has no path to transmission.', ' ', ' ', ' ']) # This will find all pipes A that are involved in the path from # the transmission line to a good pipe # i is a pipeid of a previous pipe. Side effect to goodpath. # UNUSED def findbestgoodpath(i, goodpipe, alreadyseenflag, traversedpipes): traversedpipes.append(i) if (alreadyseenflag == 0): goodpath.append([pipeidarr[goodpipe], matarr[goodpipe], diamarr[goodpipe], replacementarr[goodpipe], originstalldatearr[goodpipe], minsuggestdatearr[goodpipe],' ', ' ', ' '] ) if(i >= 0): # This indicates that this is a real pipe goodpath.append([' ', ' ', ' ', pipeidarr[i], matarr[i], diamarr[i], replacementarr[i], originstalldatearr[i],minsuggestdatearr[i]] ) if dateupperboundarr[i] > installdatearr[goodpipe]: # then in order for goodpipe to be good, # pipe i cannot have a later install year dateupperboundarr[i] = installdatearr[goodpipe] pipeupperboundarr[i] = pipeidarr[goodpipe] # print ("Pipe "), pipeidarr[i], (" changes its upper bound to "), dateupperboundarr[i] # goodpipe is responsible for this upper bound if prevpipearr[i] in traversedpipes: goodpath.append('Seem to have a cycle in pipeidarr') else: findbestgoodpath(prevpipearr[i], goodpipe, 1, traversedpipes) else: goodpath.append(' ') # given a bunch of index values, produce a string consisting of # a bunch of space-separated pipe values def spitspace(indexes): mystring = "" for i in indexes: mystring+= pipeidarr[i]+ (" ") return mystring # DATA # Parameters that don't change makeestimatescertain = 0 # if 0 then estimates are left as estimates # if 1, then all estimates are made certain badpathinstalldate = 5000 # a very big year suggestflag = 0 # take suggestions if 1 and 0 otherwise ??? # Always set to 0. insuggest = 'suggestedchanges.csv' minaccusethresh = 1 # interested in accusers if they accuse at least minaccusethresh pipes, probably fixed replacematerialflag = 0 # we assert that pipes have been replaced based on material replacetimeflag = 1 # we assert that pipes should be replaced based on gapthresh # We are going to fill in several arrays, corresponding to these fields. pipeidarr = [] transdistflagarr = [] x1arr = [] y1arr = [] x2arr = [] y2arr = [] diamarr = [] matarr = [] priorityvote = [] # priority for checking pipelengtharr = [] replacementarr = [] installdatearr = [] # modified for processing originstalldatearr = [] # kept for suggestedates installdatefortransmin = [] # really used only for trans but set for all pipes # Used to set a value in transmission_toonew.csv. # If transmindateflag is 2, then change installdatearr to this value installdatefortransmin_tmp = [] # used for updates to transmission pipes certainarr = [] estimatedarr = [] pathinstalldatearr = [] pathcarriedlength = [] maxsuggestdatearr = [] # recommend later install date minsuggestdatearr = [] # recommend earlier installdate piperespforchange = [] # pipe responsible for causing a given pipe to change date dateupperboundarr = [] # upper bound on date. If a good path requires a date # that is no greater than x for pipe i then dateupperboundarr[i] <= x pipeupperboundarr = [] # pipe that caused the dateupperbound to be reduced prevpipearr = [] # previous edge/pipe along lowest year path disttotrans = [] # distance to transmission pipe mytransmissionpipe = [] # transmission pipe that governs me transids = [] # an array of indexes corresponding to TRANS pipes # EXECUTION hashedsuggest = {} # if we have suggested install date changes, take them here if (suggestflag == 1) and (os.path.exists(insuggest)): suggestin = csv.reader(open(insuggest, 'rb'), delimiter =',', quotechar='"') for s in suggestin: hashedsuggest.setdefault(s[0],[]).append(s[1]) # Set up given that the input file is read into pipein. somethingsuggestedflag = 0 # indicates that suggestions have no effect pipearray = [] i = 0 for r09 in pipein: pipearray.append(r09) pipeidarr.append((r09[pipeid])) transdistflagarr.append(r09[transdistflag]) x1arr.append(float(r09[x1])) y1arr.append(float(r09[y1])) x2arr.append(float(r09[x2])) y2arr.append(float(r09[y2])) diamarr.append(float(r09[diam])) matarr.append((r09[mat])) priorityvote.append(0) pipelengtharr.append(float(r09[pipelength])) if('N' == r09[estimated]): estimatedarr.append(1) else: estimatedarr.append(0) # fixedarr.append((r09[certainflag])) # ??? for fixed # temporarily assign fixed arbitrarily 1 means fixed # they believe this date replacementarr.append('Orig') # a priori not a replacement installsplit = (r09[installdate]).split("/") if(3 == len(installsplit)): installyear = convertyear(int(installsplit[2])) elif (4 <= len(r09[installdate])): installyear = int(r09[installdate][0:4]) # ??? first four digits if (installyear < 1000): print (r09[pipeid]), "warning: installdate with weird format is: ", r09[installdate], " Assume first four digits are year:", installyear else: print (r09[pipeid]), " Error: installdate is too short: ", r09[installdate] installyear = badpathinstalldate # way too late a date if (r09[pipeid] in hashedsuggest): somethingsuggestedflag = 1 x = hashedsuggest[r09[pipeid]] installyear = int(x[0]) print (r09[pipeid]), (" has install year changed to "), installyear originstalldatearr.append(installyear) if(r09[transdistflag] == 'TRANS'): installdatearr.append(installyear) transids.append(i) else: installdatearr.append(int(floorto*math.floor(installyear/floorto))) installdatefortransmin.append(installyear) installdatefortransmin_tmp.append(installyear) maxsuggestdatearr.append(installyear) minsuggestdatearr.append(installyear) piperespforchange.append(-1) # non-existent pipe dateupperboundarr.append(30000) # ?? very big number pipeupperboundarr.append('No Pipe') # pipe id that caused the upper # bound on date. # Because any lower number would cause the date of # pipeupperboundarr[i] to be bad if(r09[transdistflag] == 'TRANS'): pathinstalldatearr.append(installyear) # for transmissions pathcarriedlength.append(0) # 0 distance from a trans prevpipearr.append(-1) # initial pipe for transmissions # This stops the recursive search for previous edges. disttotrans.append(0) # i am a transmission mytransmissionpipe.append(i) # i am a transmission else: pathinstalldatearr.append(badpathinstalldate) pathcarriedlength.append(999999999.0) # high value of length # initialize to high value for distributions # If a pipe is completely inaccessible from transmission line # then this pipe will be flagged as bad. prevpipearr.append(-99) # impossible pipe for dists disttotrans.append(10000) # very far mytransmissionpipe.append(-90) # impossible transmission # if False and (random.random() < 0.1) and (not installyear == badpathinstalldate) and (not r09[transdistflag] == 'TRANS') : if (makeestimatescertain == 1) and ('N' == r09[estimated]): certainarr.append(1) # only sometimes else: certainarr.append(0) i+= 1 # just to keep counts # print pipeidarr # print x1arr # print diamarr # print matarr # print replacementarr # print installdatearr # Find the connections from pipes to pipes. These are the edges of # the graphs. An edge is defined as myedges1[i], myedges2[i] myedges1 = [] myedges2 = [] mytransedges1 = [] mytransedges2 = [] i = 0 while (i < len(x1arr)): j = i+1 while (j < len(x1arr)): if (thresh >= abs(x1arr[i] - x1arr[j])): if (thresh >= abs(y1arr[i] - y1arr[j])): adjustedges(i,j) if (thresh >= abs(x1arr[i] - x2arr[j])): if (thresh >= abs(y1arr[i] - y2arr[j])): adjustedges(i,j) if (thresh >= abs(x2arr[i] - x1arr[j])): if (thresh >= abs(y2arr[i] - y1arr[j])): adjustedges(i,j) if (thresh >= abs(x2arr[i] - x2arr[j])): if (thresh >= abs(y2arr[i] - y2arr[j])): adjustedges(i,j) j+=1 i+=1 # print myedges1 # print myedges2 # hash edges so we know how to go from one edge to another. hashed = {} i = 0 while i < len(myedges1): hashed.setdefault(myedges1[i],[]).append(myedges2[i]) i+= 1 # edges among the trans guys hashedtrans = {} i = 0 while i < len(mytransedges1): hashedtrans.setdefault(mytransedges1[i],[]).append(mytransedges2[i]) i+= 1 # print hashed # At this point, every transmission edge has a pathinstalldatearr # value equal to its installyear value (this is Annie's preprocessing). # However the distribution pipes all have high values (e.g. year 5000). # So now we do depth first search from each such pipe. if (transmindateflag >= 1): print "transmission_toonew.csv has the transmission pipes that are surrounded by older transmission pipes in their nearby neighborhood" transmission_toonew = csv.writer(open('transmission_toonew.csv', 'wb'), delimiter=',', quotechar = '"', quoting = csv.QUOTE_MINIMAL) transmission_toonew.writerow(['pipeid', 'install year', 'suggested year', 'Reason: neighboring transmission pipes']) # first take care of trans pipes if necessary transchanged = 1 needchecks = set(transids) while (transmindateflag >= 1) and (0 < len(needchecks)): print "About to do transflaggap" print "len(needchecks) is:", len(needchecks) transchanged = 0 # no change unless there is a reason to find a change newneedchecks = [] for i in needchecks: transflaggap(i) # change installdatefortransmin for pipes if difference greater than gapthresh installdatefortransmin = copy.deepcopy(installdatefortransmin_tmp) needchecks = set(newneedchecks) if False: # Annie on March 17 says to not do this part for i in transids: if (transdistflagarr[i] == 'TRANS') and (transmindateflag >= 1): transflag(i, installdatefortransmin[i]) if (transmindateflag == 2): for i in transids: minsuggestdatearr[i] = installdatefortransmin[i] maxsuggestdatearr[i] = installdatefortransmin[i] # print "pathinstalldatearr is: ", pathinstalldatearr printedtransguys = [] # for debugging for i in transids: ifloor = int(floorto*math.floor(installdatefortransmin[i]/floorto)) installdatearr[i] = ifloor pathinstalldatearr[i] = ifloor depthfirstsearch(i, pathinstalldatearr[i], 0, i, 1, 0, [i]) installdatearr = copy.deepcopy(originstalldatearr) # now that we're done finding best paths we can go back to the original dates. # For Transes, reset installdatearr to be the one we got from installdatefortransmin for i in transids: installdatearr[i] = installdatefortransmin[i] # print "prevpipearr is ", prevpipearr # Now all we do is find bad pipes based on having an pathinstalldatearr # value larger than the installdatearr value # with total carried length as a tie-breaker badlist = [] mergelist = [] # take all suggestions goodlist = [] accusers = [] accused = [] badpath = [] badpath_fixed = [] badpath_fixedall = [] goodpath = [] i = 0 while (0 == 1) and (i < len(pipeidarr)): k = prevpipearr[i] if (k >= 0): if not (disttotrans[i] == disttotrans[k] + 1) and (mytransmissionpipe[i] == mytransmissionpipe[k]): print "Pipe", pipeidarr[i], "has dist", disttotrans[i], "totransmission pipe", pipeidarr[mytransmissionpipe[i]], "and previous pipe", pipeidarr[prevpipearr[i]], "having dist", disttotrans[prevpipearr[i]], "to", pipeidarr[mytransmissionpipe[prevpipearr[i]]] i+= 1 newprevpipearr = compressprevpipe() i = 0 while i < len(pipeidarr): if (certainarr[i] == 1) and (transdistflagarr[i] == 'DIST'): print "Pipe ", pipeidarr[i], " is certain." findbestbadpath_time_certain(prevpipearr[i], i, 0, matarr[i], [i]) i+= 1 # now do a pass for uncertain pipes i = 0 while i < len(pipeidarr): # findaccusers(prevpipearr[i], installdatearr[i], i) # we are going to use findbestbadpath to find accusers # and assign suggested dates. # Arguments are parent pipe, accused pipe, has accused pipe # already been printed. The running maximum of pipes # leading up to the accused pipe. # The material type of the oldest pipe in the path # Pipes already traversed. if (1 == replacematerialflag) and (certainarr[i] == 0): findbestbadpath(prevpipearr[i], i, 0, originstalldatearr[i], matarr[i], [i]) if (1 == replacetimeflag) and (certainarr[i] == 0) and (transdistflagarr[i] == 'DIST'): alloweddate = checkforcertains(prevpipearr[i],i, originstalldatearr[i]) # above will perhaps change minsuggested date to be # no older than nearest upstream N or TRANS pipe i+=1 # second pass: sweep up from adjusted pipes i = 0 while i < len(pipeidarr): if (1 == replacetimeflag) and (certainarr[i] == 0) and (transdistflagarr[i] == 'DIST'): findbestbadpath_time(prevpipearr[i],i, 0, matarr[i], [i]) i+=1 # now check for coherence i = 0 while i < len(pipeidarr): youngerpipe = checkforcoherence(prevpipearr[i], i, [i]) if (youngerpipe > 0): print "Pipe ", pipeidarr[i], " with suggested date ", minsuggestdatearr[i], " has an incoherent path" print " going through pipe ", pipeidarr[youngerpipe], " whose suggested date is ", minsuggestdatearr[youngerpipe], " with trans/dist flag ", transdistflagarr[youngerpipe] i+= 1 i = 0 while i < len(pipeidarr): if(estimatedarr[i] == 1): # eventually will look at certainarray x = 'N' # not estimated else: x = 'Y' # estimated if(certainarr[i] == 1): # certainarray y = 'N' # not estimated else: y = 'Y' # estimated if (piperespforchange[i] == -1) or (piperespforchange[i] == i): pp = -1 else: pp = pipeidarr[piperespforchange[i]] if (transdistflagarr[i] == 'DIST'): if (installdatearr[i] >= gapthresh + pathinstalldatearr[i]) and (not originstalldatearr[i] == badpathinstalldate) and (minsuggestdatearr[i] == originstalldatearr[i]): badlist.append([pipeidarr[i], originstalldatearr[i], pathinstalldatearr[i], pathinstalldatearr[i],diamarr[i], matarr[i], round(pathcarriedlength[i],1), transdistflagarr[i], x, priorityvote[i], abs(minsuggestdatearr[i] - originstalldatearr[i]),y, pp, 'Possible replacement even though coherent']) elif (installdatearr[i] >= gapthresh + pathinstalldatearr[i]) and (originstalldatearr[i] == badpathinstalldate): badlist.append([pipeidarr[i], originstalldatearr[i], minsuggestdatearr[i], pathinstalldatearr[i],diamarr[i], matarr[i], round(pathcarriedlength[i],1), transdistflagarr[i], x, priorityvote[i], abs(minsuggestdatearr[i] - originstalldatearr[i]),y, pp, 'Install year was missing, default value: 5000']) elif (pathinstalldatearr[i] > installdatearr[i]): badlist.append([pipeidarr[i], originstalldatearr[i], minsuggestdatearr[i], pathinstalldatearr[i],diamarr[i], matarr[i], round(pathcarriedlength[i],1), transdistflagarr[i], x, priorityvote[i], abs(minsuggestdatearr[i] - originstalldatearr[i]),y, pp, 'Incoherent: older than at least one upstream pipe on best path']) elif (originstalldatearr[i] > minsuggestdatearr[i]): badlist.append([pipeidarr[i], originstalldatearr[i], minsuggestdatearr[i], pathinstalldatearr[i],diamarr[i], matarr[i], round(pathcarriedlength[i],1), transdistflagarr[i], x, priorityvote[i], abs(minsuggestdatearr[i] - originstalldatearr[i]),y, pp, 'Downstream older pipe suggests a change']) elif (originstalldatearr[i] == minsuggestdatearr[i]) and (originstalldatearr[i] >= (gapthresh + pathinstalldatearr[i])): badlist.append([pipeidarr[i], originstalldatearr[i], pathinstalldatearr[i], pathinstalldatearr[i],diamarr[i], matarr[i], round(pathcarriedlength[i],1), transdistflagarr[i], x, priorityvote[i], abs(minsuggestdatearr[i] - originstalldatearr[i]),y, pp, 'Possible Replacement even though coherent']) elif (originstalldatearr[i] < minsuggestdatearr[i]): badlist.append([pipeidarr[i], originstalldatearr[i], minsuggestdatearr[i], pathinstalldatearr[i],diamarr[i], matarr[i], round(pathcarriedlength[i],1), transdistflagarr[i], x, priorityvote[i], abs(minsuggestdatearr[i] - originstalldatearr[i]),y, pp, 'Upstream pipe (either TRANS or certain) suggests this date to be younger.']) else: # (originstalldatearr[i] == minsuggestdatearr[i]) badlist.append([pipeidarr[i], originstalldatearr[i], minsuggestdatearr[i], pathinstalldatearr[i],diamarr[i], matarr[i], round(pathcarriedlength[i],1), transdistflagarr[i], x, priorityvote[i], abs(minsuggestdatearr[i] - originstalldatearr[i]),y, pp, 'Coherent: no change required from upstream or downstream pipes']) else: # TRANS if (installdatearr[i] == originstalldatearr[i]): badlist.append([pipeidarr[i], originstalldatearr[i], minsuggestdatearr[i], pathinstalldatearr[i],diamarr[i], matarr[i], round(pathcarriedlength[i],1), transdistflagarr[i], x, priorityvote[i], abs(minsuggestdatearr[i] - originstalldatearr[i]),y, pp, 'TRANS pipe']) else: z = 'TRANS pipe changed by neighboring transpipes from ' + str(originstalldatearr[i]) + ' to ' + str(installdatearr[i]) badlist.append([pipeidarr[i], originstalldatearr[i], minsuggestdatearr[i], pathinstalldatearr[i],diamarr[i], matarr[i], round(pathcarriedlength[i],1), transdistflagarr[i], x, priorityvote[i], abs(minsuggestdatearr[i] - originstalldatearr[i]),y, pp, z]) # Now fill in the merge file # mergelist.append([ pipeidarr[i], transdistflagarr[i], originstalldatearr[i], minsuggestdatearr[i], diamarr[i], matarr[i]]) i+= 1 badout = csv.writer(open('status.csv', 'wb'), delimiter=',', quotechar = '"', quoting = csv.QUOTE_MINIMAL) badout.writerow([ 'pipeid', 'install year', 'suggested year', 'carried year on best path', 'diameter', 'material', 'length to TRANS', 'trans/dist', 'estimated by consultant', 'coherence contribution', 'difference of install year with suggested year','unchecked', 'pipe responsible for suggested year', 'comment' ]) badout_gis = csv.writer(open('gisdata.csv', 'wb'), delimiter=',', quotechar = '"', quoting = csv.QUOTE_MINIMAL) badout_gis.writerow([ 'pipeid', 'YOI', 'SGD', 'CARRIED', 'EST_D', 'UNCH', 'ACCUSER']) badlistsorted = sorted(badlist, key=itemgetter(11,9,10), reverse=True) for b in badlistsorted: # print b[0], (" having installdate: "), b[1], (" and possible date: "), b[2] badout.writerow(b) badout_gis.writerow([b[0], b[1], b[2], b[3], b[8], b[11], b[12]]) # mergeout = csv.writer(open('montpellier.csv', 'wb'), delimiter=',', quotechar = '"', quoting = csv.QUOTE_MINIMAL) # mergeout.writerow([ 'pipeid', 'transdistflag', 'original year of installation', 'suggested year of installation', 'diameter', 'material']) # for b in mergelist: # mergeout.writerow(b) # Now for each accuser, find all the accused hashedaccusers = {} i = 0 while i < len(accusers): hashedaccusers.setdefault(accusers[i],[]).append(accused[i]) i+= 1 # print hashedaccusers # Now sort the accusers by length and recover the pipeid # of the biggest accusers and the identities of the accused. accusertup = [] for mykey in hashedaccusers: accusertup.append([mykey, pipeidarr[mykey], diamarr[mykey], matarr[mykey], replacementarr[mykey], len(hashedaccusers[mykey]), spitspace(hashedaccusers[mykey])]) # spitspace converts keys to pipeids accusersorted = sorted(accusertup, key=itemgetter(2), reverse=True) if (0 == 1) and ( (suggestflag == 0) or (somethingsuggestedflag == 0)): accuserout = csv.writer(open('accuser.csv', 'wb'), delimiter=',', quotechar = '"', quoting = csv.QUOTE_MINIMAL) # if(0 < len(badlist)): # print "Probably not needed anymore: accuser.csv/accuser_postsuggest.csv has the accusers and their accused." # print "Probably not needed anymore: We show only those accusers that accuse at least ", minaccusethresh, " pipes." # for a in accusersorted: # if (a[2] >= minaccusethresh): # print (a[1]), (": "), a[3] # accuserout.writerow(a[1:]) # Now for each accused find all accusers hashedaccusees = {} for mykey in hashedaccusers: x = hashedaccusers[mykey] for a in x: hashedaccusees.setdefault(a,[]).append(mykey) # print hashedaccusees # Now sort the accusees by length and recover the pipeid # of the biggest accusees and the identities of the accuser. accuseetup = [] for mykey in hashedaccusees: accuseetup.append([mykey, pipeidarr[mykey], diamarr[mykey], matarr[mykey],len(hashedaccusees[mykey]), spitspace(hashedaccusees[mykey])]) accuseesorted = sorted(accuseetup, key=itemgetter(2), reverse=True) if (0 == 1) and ((suggestflag == 0) or (somethingsuggestedflag == 0)): accuseeout = csv.writer(open('accusee.csv', 'wb'), delimiter=',', quotechar = '"', quoting = csv.QUOTE_MINIMAL) # if(0 < len(badlist)): # print "Probably not needed anymore: accusee.csv/acusee_postsuggest.csv has the accusees and their accusers." # print "Probably not needed anymore: We show only those accusees that are accused by at least ", minaccusethresh, " pipes." # for a in accuseesorted: # if (a[2] >= minaccusethresh): # print (a[1]), (": "), a[3] # accuseeout.writerow(a[1:]) # Now find pure accusers sorted in descending order by accusees if False and ((suggestflag == 0) or (somethingsuggestedflag == 0)): suggestreplace = csv.writer(open('suggestedreplacement.csv', 'wb'), delimiter=',', quotechar = '"', quoting = csv.QUOTE_MINIMAL) # suggestout_toaccusers = csv.writer(open('suggestedchanges_toaccusers.csv', 'wb'), delimiter=',', quotechar = '"', quoting = csv.QUOTE_MINIMAL) suggestchange = csv.writer(open('suggestedchanges.csv', 'wb'), delimiter=',', quotechar = '"', quoting = csv.QUOTE_MINIMAL) # suggestout = csv.writer(open('suggestedchangescontradictory.csv', 'wb'), delimiter=',', quotechar = '"', quoting = csv.QUOTE_MINIMAL) # suggestout_toaccusees = csv.writer(open('suggestedchanges_toaccusees.csv', 'wb'), delimiter=',', quotechar = '"', quoting = csv.QUOTE_MINIMAL) # print "suggestedchanges.csv has the suggested changes for pipes that should not be replaced in the format: pipeid, transdistflag, material, diameter, given installed date, suggested date, pipe suggesting earlier date, transdist of suggesting pipe" # suggestchange.writerow([ 'pipeid', 'transdistflag', 'material', 'diameter', 'given installed date', 'suggested date', 'pipe suggesting different date','transdist of suggesting pipe']) # print "suggestedreplacement.csv has the suggested changes for pipes that should be replaced in the same format as above: pipeid, transdistflag, material, diameter, given installed date, suggested date, pipe suggesting different date, transdist of suggesting pipe" # print "Note that a given installed date of", badpathinstalldate, "means that the data field was empty or misformatted." # suggestreplace.writerow([ 'pipeid', 'transdistflag', 'material', 'diameter', 'given installed date', 'suggested date', 'pipe suggesting different date','transdist of suggesting pipe']) replacedlist = [] i = 0 while False and (i < len(pipeidarr)): if (piperespforchange[i] == -1) and (transdistflagarr[i] == 'DIST') and (transfixedflag == 1): x = mytransmissionpipe[i] if (minsuggestdatearr[i] == installdatearr[x]): piperespforchange[i] = x replacedlist.append(i) if False and (originstalldatearr[i] >= 1925) and (originstalldatearr[i] >= (gapthresh + minsuggestdatearr[i])): suggestreplace.writerow([pipeidarr[i], transdistflagarr[i], matarr[i], diamarr[i], originstalldatearr[i], minsuggestdatearr[i], pipeidarr[piperespforchange[i]], transdistflagarr[piperespforchange[i]]]) replacedlist.append(i) elif False and (not (installdatearr[i] == minsuggestdatearr[i])): suggestchange.writerow([pipeidarr[i], transdistflagarr[i], matarr[i], diamarr[i], originstalldatearr[i], minsuggestdatearr[i], pipeidarr[piperespforchange[i]], transdistflagarr[piperespforchange[i]]]) replacedlist.append(i) i+= 1 badpathout = csv.writer(open('bestpath.csv', 'wb'), delimiter=',', quotechar = '"', quoting = csv.QUOTE_MINIMAL) badpathout.writerow(['pipeid', 'trans/dist', 'material', 'diameter', 'install year', 'suggested year', 'number pipe in best path', 'estimated by consultant']) for b in badpath: badpathout.writerow(b) # ??? Do something similar for badpath_fixed and badpath_fixedall # goodpathout = csv.writer(open('goodpath.csv', 'wb'), delimiter=',', quotechar = '"', quoting = csv.QUOTE_MINIMAL) # i = 0 # while i < len(pipeidarr): # if i not in replacedlist: # findbestgoodpath(prevpipearr[i], i, 0, [i]) # i+= 1 # print "goodpath has ", len(goodpath), " elements. " # for a in goodpath: # # print a # goodpathout.writerow(a) # if (suggestflag == 0) or (somethingsuggestedflag == 0): # print "Probably not needed: If you rerun with the suggestflag set to 1 and suggestions have ben made, you will see the date incoherencies after these adjustments have been made" # else: # print "Have just run with suggestflag set to 1 and there were suggestions, meaning you have inferred new installdates based on incoherencies. "