-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclean.py
executable file
·70 lines (55 loc) · 1.87 KB
/
clean.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/python
'''
This script will clean unused records in our database, keeps it small and easier to query
'''
from pymongo import MongoClient
import pickle
import re
client = MongoClient('localhost', 40000)
db = client.furman
#STEP1: Remove all BK_ and FT_ records
'''
regex1 = re.compile(r'^FT_|^BK_')
print "Cleaning lot_records..."
db.lot_records.remove({'key':regex1})
db.lot_records.remove({'key':regex1})
print "Cleaning master_records..."
db.master_records.remove({'key':regex1})
db.master_records.remove({'key':regex1})
print "Cleaning party_records..."
db.party_records.remove({'key':regex1})
db.party_records.remove({'key':regex1})
print "Cleaning remarks_records..."
db.remarks_records.remove({'key':regex1})
db.remarks_records.remove({'key':regex1})
'''
'''
real 5m22.290s
user 0m0.125s
sys 0m0.072s
'''
#STEP2: How to remove BBL properly in PLUTO?
#First, get all BBLs according to the query:
fbbl = open('bbls.txt', 'a')
fkey = open('keys.txt', 'a')
bbls = list(db.pluto.find({'$or':[{'UnitsRes':{'$lte':3}},{'BldgClass':'C6'},{'BldgClass':'C8'},{'BldgClass':'D0'},{'BldgClass':'D4'},{'BldgClass':{'$regex':'^R'}}]}, {'BBL':1,'BoroCode':1,'Block':1,'Lot':1}))
for bbl in bbls:
fbbl.write(str(bbl['BBL']) + '\n')
fbbl.flush()
#Drop data in PLUTO
db.pluto.remove({'BBL':bbl['BBL']})
#Query Lots
lots = db.lot_records.find({'borough':bbl['BoroCode'],'block':bbl['Block'],'lot':bbl['Lot']},{'key':1})
unique_keys = set()
for lot in lots:
unique_keys.add(lot['key'])
if len(unique_keys) == 0:
continue
for key in unique_keys:
fkey.write(str(key) + '\n')
fkey.flush()
unique_keys = list(unique_keys)
#Drop data in ACRIS:
db.lot_records.remove({'key':{'$in': unique_keys}})
db.master_records.remove({'key':{'$in': unique_keys}})
db.party_records.remove({'key':{'$in': unique_keys}})