Downloading DOB Job Application Filings from Socrata

[1]:
# Download the full 'DOB Job Application Filings' dataset.

import gzip

from openclean.data.source.socrata import Socrata


outfile = './ic3t-wcy2.tsv.gz'

with gzip.open(outfile, 'wb') as f:
    Socrata().dataset('ic3t-wcy2').write(f)
[2]:
# Verify that the download was successful. Print dataset columns and number of rows.
# This example makes use of the streaming option to avoid loading the full data frame
# into memory.

from openclean.pipeline import stream

df = stream(outfile)


print('Schema\n------')
for col in df.columns:
    print("  '{}'".format(col))

print('\n{} rows.'.format(df.count()))
Schema
------
  'Job #'
  'Doc #'
  'Borough'
  'House #'
  'Street Name'
  'Block'
  'Lot'
  'Bin #'
  'Job Type'
  'Job Status'
  'Job Status Descrp'
  'Latest Action Date'
  'Building Type'
  'Community - Board'
  'Cluster'
  'Landmarked'
  'Adult Estab'
  'Loft Board'
  'City Owned'
  'Little e'
  'PC Filed'
  'eFiling Filed'
  'Plumbing'
  'Mechanical'
  'Boiler'
  'Fuel Burning'
  'Fuel Storage'
  'Standpipe'
  'Sprinkler'
  'Fire Alarm'
  'Equipment'
  'Fire Suppression'
  'Curb Cut'
  'Other'
  'Other Description'
  'Applicant's First Name'
  'Applicant's Last Name'
  'Applicant Professional Title'
  'Applicant License #'
  'Professional Cert'
  'Pre- Filing Date'
  'Paid'
  'Fully Paid'
  'Assigned'
  'Approved'
  'Fully Permitted'
  'Initial Cost'
  'Total Est. Fee'
  'Fee Status'
  'Existing Zoning Sqft'
  'Proposed Zoning Sqft'
  'Horizontal Enlrgmt'
  'Vertical Enlrgmt'
  'Enlargement SQ Footage'
  'Street Frontage'
  'ExistingNo. of Stories'
  'Proposed No. of Stories'
  'Existing Height'
  'Proposed Height'
  'Existing Dwelling Units'
  'Proposed Dwelling Units'
  'Existing Occupancy'
  'Proposed Occupancy'
  'Site Fill'
  'Zoning Dist1'
  'Zoning Dist2'
  'Zoning Dist3'
  'Special District 1'
  'Special District 2'
  'Owner Type'
  'Non-Profit'
  'Owner's First Name'
  'Owner's Last Name'
  'Owner's Business Name'
  'Owner's House Number'
  'Owner'sHouse Street Name'
  'City '
  'State'
  'Zip'
  'Owner'sPhone #'
  'Job Description'
  'DOBRunDate'
  'JOB_S1_NO'
  'TOTAL_CONSTRUCTION_FLOOR_AREA'
  'WITHDRAWAL_FLAG'
  'SIGNOFF_DATE'
  'SPECIAL_ACTION_STATUS'
  'SPECIAL_ACTION_DATE'
  'BUILDING_CLASS'
  'JOB_NO_GOOD_COUNT'
  'GIS_LATITUDE'
  'GIS_LONGITUDE'
  'GIS_COUNCIL_DISTRICT'
  'GIS_CENSUS_TRACT'
  'GIS_NTA_NAME'
  'GIS_BIN'

1762407 rows.