diff --git a/00-Example_of_AvallDB_Class.ipynb b/00-Example_of_AvallDB_Class.ipynb new file mode 100644 index 0000000..6847adb --- /dev/null +++ b/00-Example_of_AvallDB_Class.ipynb @@ -0,0 +1,777 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "from flight_safety.queries import AvallDB" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "db = AvallDB('data/avall.db', far_parts=['121 '], inc=False, acc=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['_get_ev_ids_ev_type_query', '_get_ev_ids_far_part_query']\n" + ] + }, + { + "data": { + "text/plain": [ + "\"ev_id IN (SELECT ev_id FROM events WHERE ev_type='ACC') AND ev_id IN (SELECT ev_id FROM aircraft WHERE far_part IN ('121 '))\"" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db._get_conditions_query()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['_get_ev_ids_ev_type_query', '_get_ev_ids_far_part_query']\n" + ] + } + ], + "source": [ + "db._set_matching_ev_ids()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"'20140507X51410', '20030314X00332', '20020814X01387', '20101124X20432', '20001206X01744', '20160106X80332', '20020917X02404', '20001212X20624', '20050623X00849', '20001205X00295', '20001214X43270', '20001208X08009', '20001206X02233', '20020123X00104', '20040305X00271', '20001212X23372', '20041020X01659', '20001208X05244', '20010112X00292', '20061017X01529', '20001211X10743', '20001213X32184', '20001213X32545', '20140314X21725', '20160827X31134', '20001206X01608', '20010308X00571', '20101103X44221', '20160803X71952', '20040510X00578', '20120807X32623', '20001206X02604', '20001208X07126', '20100507X94749', '20090421X81417', '20110607X73034', '20090921X05830', '20001208X05463', '20001205X00131', '20001213X24948', '20001212X19309', '20001213X28786', '20001205X00555', '20001206X00784', '20050112X00043', '20001208X05238', '20020311X00325', '20150305X42958', '20020520X00704', '20130429X12734', '20010531X01039', '20001213X34548', '20030909X01495', '20001213X25412', '20121005X05550', '20141023X80357', '20030425X00566', '20001212X22266', '20030603X00763', '20110513X41407', '20001204X00044', '20001212X24136', '20020322X00387', '20030514X00656', '20070105X00013', '20041027X01700', '20080713X01025', '20080723X01108', '20001211X11141', '20001208X09107', '20001211X11077', '20001214X45342', '20130723X13256', '20010214X00449', '20001214X39893', '20001208X08351', '20001213X29192', '20091028X64921', '20001213X32134', '20001213X27562', '20120221X32033', '20001211X11582', '20051104X01781', '20050413X00446', '20030423X00554', '20120613X32202', '20130901X95021', '20160222X93713', '20090115X73226', '20060721X00983', '20080513X00660', '20030813X01324', '20100423X01145', '20001212X20788', '20091124X94107', '20001212X24539', '20001214X44795', '20001211X16222', '20001213X30579', '20030501X00608', '20130220X51849', '20020319X00362', '20150506X41316', '20091029X02444', '20090527X25328', '20001213X25510', '20020917X02079', '20001213X27558', '20001213X35149', '20030826X01404', '20001211X11399', '20001213X33262', '20001213X26975', '20080915X01465', '20010307X00558', '20020612X00871', '20001213X27869', '20060510X00542', '20001208X09248', '20001208X05330', '20001208X06027', '20001206X00741', '20060425X00486', '20001211X11475', '20041209X01953', '20001211X11006', '20001214X42387', '20001214X41754', '20001213X33517', '20030605X00800', '20001207X03592', '20001211X13312', '20001213X32749', '20001208X06952', '20001213X32759', '20001213X25438', '20001212X20794', '20060808X01115', '20001212X20729', '20060908X01318', '20001208X07076', '20001208X05409', '20001208X06205', '20070808X01153', '20120224X42146', '20020604X00806', '20001214X45262', '20001211X11043', '20001214X39536', '20010108X00056', '20101203X60119', '20020917X01909', '20001207X03162', '20001214X41967', '20001214X36521', '20151216X41606', '20001207X03670', '20001208X08808', '20030205X00166', '20151107X52955', '20001206X00986', '20051026X01739', '20001211X09672', '20010321X00614', '20060803X01079', '20001214X44103', '20010503X00872', '20060508X00522', '20050713X01012', '20001207X04369', '20080703X00974', '20001213X30693', '20060807X01099', '20001211X14662', '20001213X31706', '20051213X01965', '20060808X01112', '20100312X84023', '20001212X22386', '20091102X82140', '20001212X20571', '20030917X01555', '20001211X15409', '20001207X03786', '20001212X20116', '20050526X00682', '20071231X02012', '20001212X19027', '20001213X25297', '20001212X23667', '20010816X01695', '20110628X11855', '20001211X11924', '20051209X01956', '20091223X50505', '20030729X01210', '20001207X03067', '20001212X20030', '20001213X29023', '20001213X29575', '20001213X32982', '20030304X00276', '20110228X54428', '20030214X00221', '20110401X24330', '20150523X13659', '20001213X26783', '20031010X01706', '20001207X04932', '20001211X09761', '20030110X00049', '20101220X14621', '20001206X02087', '20001213X35054', '20170508X85232', '20151023X50751', '20061117X01683', '20120302X00956', '20100222X84927', '20120514X15148', '20080413X00464', '20050627X00874', '20001214X38384', '20151130X61148', '20001208X08003', '20001211X14322', '20001214X45028', '20001211X12114', '20001213X27404', '20001211X11500', '20101203X90900', '20001208X06964', '20001213X29459', '20011004X02036', '20150212X54957', '20060522X00602', '20001208X09334', '20050207X00140', '20021118X05480', '20001214X42592', '20140814X52852', '20001212X18718', '20010507X00887', '20001207X04273', '20001214X38649', '20001212X19871', '20001213X25439', '20001213X26908', '20001211X12202', '20001214X39020', '20010117X00316', '20001212X20452', '20001211X09740', '20161028X72837', '20001211X10832', '20001212X17092', '20120322X25316', '20001206X02667', '20001212X16907', '20050616X00796', '20001214X44797', '20010712X01409', '20001212X20085', '20001212X19933', '20001212X22245', '20160805X44633', '20141006X73012', '20001212X19842', '20020913X01603', '20001213X24905', '20030812X01306', '20030807X01283', '20130712X61436', '20040825X01286', '20001213X35305', '20090724X34459', '20010501X00841', '20020308X00316', '20100714X63624', '20001212X17570', '20001205X00384', '20001207X04953', '20110407X95823', '20001213X30691', '20090422X95319', '20001214X42096', '20001213X35210', '20010919X01964', '20001213X31105', '20080107X00024', '20010319X00608', '20140212X03156', '20020606X00821', '20020917X04908', '20110328X80923', '20140829X32709', '20001213X32999', '20001212X16433', '20001211X12108', '20001208X05167', '20001208X08922', '20001207X03512', '20001205X00507', '20001208X06712', '20001211X11205', '20100903X54913', '20090323X01952', '20100525X31845', '20001214X43232', '20020917X04219', '20130502X52359', '20001205X00183', '20070501X00494', '20001207X04839', '20170222X80758', '20001214X39980', '20001207X03355', '20090303X74824', '20060623X00811', '20001212X17434', '20130517X14847', '20020917X01910', '20001220X45463', '20021029X05398', '20001213X30412', '20151124X65446', '20001208X08497', '20050310X00294', '20070817X01201', '20001214X45258', '20001208X09084', '20001211X09531', '20001212X19822', '20001208X09076', '20001213X24850', '20001211X11530', '20051028X01749', '20001213X28348', '20150812X63537', '20001212X21701', '20051206X01938', '20001213X30244', '20001212X19442', '20001212X21929', '20001214X38844', '20001213X35148', '20140110X15754', '20001213X24978', '20051213X01964', '20030905X01467', '20001211X09452', '20120118X91324', '20140828X43935', '20001212X19817', '20001206X01727', '20081003X16303', '20001212X20463', '20050323X00350', '20001211X09935', '20071005X01522', '20041104X01757', '20001211X11341', '20040813X01219', '20001208X05206', '20001211X12029', '20010108X00063', '20001212X22101', '20001211X11023', '20080304X00260', '20101208X40921', '20001214X38179', '20001212X22066', '20001211X10520', '20031021X01788', '20001213X32842', '20001208X07667', '20001204X00109', '20001208X07893', '20001208X06346', '20020313X00335', '20060111X00048', '20001213X30197', '20101116X51712', '20001211X10292', '20001208X06148', '20010531X01037', '20001213X31926', '20040121X00082', '20011018X02112', '20071130X01878', '20001212X19260', '20070625X00787', '20070409X00387', '20001214X41968', '20041028X01714', '20001214X39331', '20111018X03714', '20110621X32040', '20020917X04570', '20040902X01347', '20050322X00346', '20040319X00351', '20001208X06132', '20001213X28889', '20001208X07943', '20040408X00430', '20001214X41276', '20001213X34081', '20001208X07495', '20161216X60544', '20051025X01717', '20060425X00482', '20031122X01939', '20001212X21540', '20001211X12113', '20151022X65901', '20001214X40201', '20001212X19286', '20010223X00504', '20060720X00974', '20070426X00458', '20100606X02721', '20001212X23320', '20001214X35494', '20051210X01960', '20050609X00744', '20001212X18299', '20001208X08067', '20001212X21253', '20001211X11594', '20001214X38546', '20011022X02123', '20001214X37434', '20081229X80551', '20001213X32679', '20001208X07619', '20030109X00039', '20150422X71127', '20001207X03934', '20091020X14635', '20011130X02321', '20001214X35493', '20001206X02120', '20050118X00061', '20001211X12079', '20001212X22314', '20060618X00758', '20001212X18456', '20001208X07854', '20001213X32929', '20001212X20164', '20081027X75039', '20001208X07780', '20020917X03653', '20081113X92722', '20030128X00117', '20100520X02527', '20111020X14740', '20110329X33129', '20001208X08993', '20001208X07168', '20001213X32472', '20160811X81653', '20001207X02895', '20001206X02570', '20001208X07810', '20001213X27867', '20001213X27734', '20001208X08814', '20041019X01653', '20030910X01507', '20001212X21128', '20010507X00899', '20020506X00632', '20001211X10824', '20001212X18177', '20001211X15478', '20031121X01938', '20020114X00069', '20090806X21108', '20001208X06203', '20001212X21909', '20030409X00468', '20001211X16147', '20170224X03359', '20130128X11324', '20020411X00491', '20160927X51937', '20001206X01026', '20120511X73555', '20001211X12347', '20001212X20472', '20101027X04916', '20130220X85257', '20130519X11737', '20161028X93712', '20001212X21365', '20001207X04988', '20090805X42956', '20050104X00010', '20001213X27315', '20001213X32769', '20020917X03515', '20150701X91130', '20160126X75122', '20001211X10847', '20150818X14949', '20001213X28113', '20020731X01263', '20001212X18378', '20020125X00135', '20150311X03628', '20090114X61852', '20001211X11993', '20001212X20517', '20140624X40942', '20001212X21038', '20140814X52213', '20001212X22163', '20080728X01116', '20001213X33259', '20130930X75500', '20010608X01136', '20001213X30396', '20001212X20564', '20020510X00653', '20010416X00753', '20001212X22375', '20001207X03511', '20001226X45475', '20001212X22320', '20001211X09454', '20001213X26646', '20001213X35287', '20001208X08117', '20001208X07726', '20011003X02029', '20141121X15631', '20001211X13127', '20001208X05677', '20001212X22410', '20030409X00464', '20001213X30439', '20001214X45026', '20070503X00504', '20001213X27351', '20001208X07272', '20001212X18361', '20001208X09214', '20010904X01867', '20001212X21264', '20061006X01478', '20031105X01865', '20080118X00073', '20030129X00123', '20120924X32520', '20110730X60307', '20001214X38298', '20121017X14720', '20010830X01825', '20001211X15436', '20020104X00035', '20081124X34243', '20120612X01249', '20001211X12369', '20001213X29815', '20001212X21738', '20001212X20606', '20080620X00890', '20001213X32183', '20001212X16938', '20070712X00920', '20090522X80352', '20001214X36569', '20001213X27707', '20001208X05743', '20091030X64143', '20070504X00513', '20001208X06204', '20041027X01709', '20001212X20217', '20080722X01094', '20001213X32835', '20001214X43089', '20020328X00415', '20001207X04990', '20011129X02310', '20100811X20501', '20170703X71412', '20001212X20029', '20001208X07192', '20001211X09821', '20050318X00330', '20001212X22400', '20001205X00234', '20001213X33518', '20001212X22065', '20010402X00689', '20020606X00822', '20001208X06344', '20020429X00592', '20150824X02325', '20030305X00289', '20101130X64248', '20001212X23082', '20001214X37757', '20050621X00819', '20001212X18366', '20040630X00887', '20011106X02201', '20071217X01939', '20001213X34942', '20001212X24506', '20001208X07343', '20020801X01282', '20001208X07185', '20001212X23524', '20001208X09368', '20120229X70237', '20001213X31759', '20001211X11566', '20001213X35405', '20001211X14270', '20001212X20693', '20110417X11718', '20120915X93442', '20001207X04894', '20120302X22622', '20110418X12310', '20150219X14806', '20020729X01242', '20040618X00835', '20001212X24012', '20001212X22460', '20001212X18874', '20001212X21079', '20020917X02737', '20010727X01542', '20001211X11485', '20001211X09413', '20100809X75512', '20100216X20235', '20001212X19398', '20030509X00638', '20001212X22742', '20051110X01830', '20001208X07825', '20020718X01166', '20001211X09482', '20140110X14059', '20001212X24751', '20001208X08536', '20031017X01759', '20020625X00969', '20120719X94744', '20131220X70905', '20110701X11957', '20001213X30626', '20001212X16912', '20001208X07558', '20120713X01334', '20001208X09047', '20010713X01428', '20110912X61152', '20070418X00436', '20001212X22138', '20001211X12985', '20001212X19015', '20001213X32346', '20001213X29335', '20001211X13331', '20001214X36375', '20140320X53321', '20001208X05584', '20001212X19008', '20001207X04726', '20081003X17330', '20001208X06963', '20001207X04870', '20001211X13617', '20001212X16675', '20001214X36732', '20150717X41748', '20001207X03655', '20140211X00047', '20001211X10601', '20001212X19589', '20001206X02536', '20001212X18876', '20001214X36115', '20001206X02285', '20011030X02159', '20141003X84008', '20090724X12005', '20030410X00473', '20070520X00598', '20001211X13714', '20001208X05462', '20110930X20857', '20090828X61756', '20001212X19345', '20001212X16577', '20001213X26528', '20110715X40120', '20020703X01048', '20031209X02011', '20001207X03362', '20001212X18073', '20001211X09494', '20021204X05561', '20150701X91338', '20001213X27868', '20001211X13939', '20001212X20700', '20110525X42547', '20120530X64155', '20060308X00278', '20050825X01311', '20001212X19804', '20001211X10043', '20001212X21420', '20001212X17517', '20070705X00879', '20001211X15125', '20101006X00507', '20001211X09578', '20060807X01100', '20110412X23201', '20110907X15938', '20150611X74851', '20001213X30257', '20001212X20208', '20001212X20339', '20001208X07000', '20130314X15433', '20001212X23595', '20001212X19792', '20020919X05176', '20050901X01366', '20001208X05988', '20001212X21686', '20060329X00359', '20090712X94532', '20020621X00955', '20001212X20503', '20001212X23762', '20001207X04356', '20081228X25156', '20160129X24835', '20081221X14648', '20001206X02586', '20001211X09708', '20001211X11159', '20081003X62950', '20001211X11597', '20001214X43286', '20001214X35671', '20001212X18074', '20051229X02027', '20060131X00140', '20020306X00309', '20001211X11972', '20001213X26163', '20001212X17522', '20020923X05198', '20001211X12996', '20020123X00105', '20080701X00963', '20001211X14022', '20020419X00545', '20120207X34555', '20051103X01780', '20001212X20894', '20020717X01153', '20001212X20972', '20001213X28483', '20070423X00446', '20001212X24215', '20050620X00811', '20070911X01360', '20030505X00618', '20050929X01547', '20050921X01501', '20051026X01729', '20040812X01200', '20130314X14422', '20001212X20388', '20010110X00148', '20071029X01680', '20001213X29982', '20001208X08078', '20020917X01911', '20140529X04134', '20141211X62917', '20001214X36171', '20100719X80209', '20120417X44353', '20001208X06588', '20001207X02821', '20060918X01362', '20001214X39783', '20001205X00149', '20170308X73155', '20101103X34855', '20030624X00944', '20070323X00325', '20070111X00042', '20140620X45424', '20001208X07395', '20001212X23760', '20001213X28497', '20011204X02343', '20001207X04196', '20001211X11298', '20100616X60229', '20001213X30279', '20001213X25535', '20140806X21104', '20001212X16566', '20001207X03623', '20100701X93344', '20060106X00018', '20060828X01244', '20001213X26494', '20001208X09273', '20030423X00557', '20001208X05207', '20001212X16302', '20001213X24955', '20001213X32752', '20030514X00654', '20001211X10040', '20030707X01028', '20001208X08607', '20001214X41550', '20081216X41655', '20001208X08714', '20021030X05404', '20070720X00966', '20060622X00799', '20070718X00958', '20050323X00356', '20001213X27705', '20090401X82602', '20151213X84149', '20001213X27706', '20001206X02420', '20001206X01808', '20160112X24550', '20030612X00865', '20020124X00119', '20091223X12805', '20001208X05168', '20001211X11147', '20061015X01521', '20030606X00808', '20100423X02633', '20020501X00603', '20001207X02869', '20001208X07906', '20080720X01081', '20110323X24235', '20001211X10013', '20001213X30416', '20150211X14605', '20001213X32573', '20140826X64917', '20160330X72920', '20030715X01106', '20001212X20825', '20071231X02013', '20010821X01745', '20130814X15751', '20001211X10120', '20020104X00022', '20060627X00840', '20001211X09692', '20090220X30535', '20001206X02584', '20001213X26803', '20111208X41332', '20001211X14301', '20010423X00790', '20051229X02026', '20020306X00308', '20020917X02937', '20001211X13132', '20010910X01911', '20001211X10221', '20110219X32035', '20070601X00676', '20090505X04553', '20001212X20714', '20040601X00708', '20020412X00510', '20001213X27446', '20040803X01126', '20001207X03295', '20020917X04699', '20020402X00443', '20001211X12558', '20021112X05460', '20001213X33229', '20001214X39535', '20170317X92137', '20141217X43728', '20020214X00216', '20001214X37681', '20001214X36734', '20040205X00163', '20030305X00291', '20001214X41751', '20040421X00488', '20001214X44799', '20080723X01111', '20031027X01814', '20010110X00270', '20001212X18961', '20001208X07632', '20150116X52840', '20010216X00462', '20001212X20609', '20001212X19796', '20001211X09910', '20001211X11299', '20001211X11549', '20001212X18968', '20001214X36851', '20001207X03292', '20001211X15987', '20010330X00671', '20051020X01700', '20121018X62210', '20001214X35492', '20001208X05312', '20020110X00051', '20001213X26437', '20130408X10949', '20001211X12582', '20001213X24862', '20001212X20847', '20001211X14503', '20001213X31236', '20160723X91246', '20001212X18967', '20010226X00510', '20001204X00105', '20001213X32186', '20001212X21656', '20001208X07347', '20001212X18677', '20020123X00106', '20050617X00804', '20001211X15920', '20131205X11728', '20001213X25043', '20001212X17381', '20001213X29315', '20090728X71721', '20001207X05039', '20001208X08233', '20150224X45723', '20001208X08354', '20001214X45366', '20001211X16023', '20090127X92950', '20001211X11734', '20001206X02322', '20001205X00124', '20001207X03788', '20001212X23970', '20001214X36685', '20001213X30543', '20010911X01915', '20001214X43392', '20041118X01838', '20001206X00889', '20001207X04748', '20001212X16434', '20001207X04665', '20050317X00323', '20001208X07191', '20001213X27403', '20080220X00214', '20001212X22040', '20001212X18850', '20001220X45467', '20001213X25705', '20101108X84714', '20030530X00743', '20001213X29997', '20001206X01590', '20041109X01790', '20070309X00265', '20001213X29969', '20020925X05213', '20001212X19989', '20001205X00380', '20001208X06595', '20001207X04323', '20001212X16583', '20001212X19142', '20001212X18295', '20001208X08916', '20001207X04545', '20001213X30451', '20030902X01433', '20001213X32505', '20001208X07286', '20001211X11489', '20001205X00272', '20130103X44411', '20150608X90730', '20151026X81321', '20001211X11863', '20080125X00103', '20001207X03300', '20050111X00033', '20001212X22748', '20001214X35621', '20001213X28286', '20100410X34740', '20001208X08661', '20040730X01113', '20020702X01033', '20060329X00364', '20001212X18945', '20040226X00241', '20001213X26819', '20020917X04018', '20031219X02063', '20001212X22938', '20001213X28785', '20030624X00945', '20131023X24855', '20120409X05702', '20001208X07896', '20001214X45309', '20001212X19162', '20001211X15117', '20001213X34240', '20001213X25883', '20020917X01907', '20090714X83900', '20050120X00079', '20001213X29644', '20120608X43116', '20060711X00910', '20030702X00998', '20140203X35450', '20090102X60836', '20001211X14094', '20110510X84704', '20001206X00888', '20050825X01313', '20001212X22037', '20151029X44249', '20020917X03104', '20140619X52159', '20001214X39459', '20110502X93814', '20030813X01321', '20001214X43381', '20031022X01799', '20020917X04534', '20001208X09291', '20110510X01046', '20060504X00511', '20111206X05636', '20070614X00724', '20001213X34943', '20090507X00926', '20121231X42414', '20090213X13613', '20001208X07429', '20130515X53650', '20001214X35471', '20070223X00215', '20100405X40816', '20031105X01861', '20001214X45346', '20020123X00103', '20001211X10090', '20001208X06738', '20081229X40417', '20160805X51304', '20001213X32341', '20001208X07738', '20131121X13345', '20001212X17657', '20001212X23119', '20160805X50153', '20080107X00025', '20130614X14844', '20110426X03427'\"" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db._ev_ids" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 1095 entries, 20140507X51410 to 20110426X03427\n", + "Data columns (total 70 columns):\n", + "ntsb_no 1095 non-null object\n", + "ev_type 1095 non-null object\n", + "ev_date 1095 non-null object\n", + "ev_dow 1095 non-null object\n", + "ev_time 1095 non-null object\n", + "ev_tmzn 1095 non-null object\n", + "ev_city 1095 non-null object\n", + "ev_state 1095 non-null object\n", + "ev_country 1095 non-null object\n", + "ev_site_zipcode 1095 non-null object\n", + "ev_year 1095 non-null int64\n", + "ev_month 1095 non-null int64\n", + "mid_air 1095 non-null object\n", + "on_ground_collision 1095 non-null object\n", + "latitude 1095 non-null object\n", + "longitude 1095 non-null object\n", + "latlong_acq 1095 non-null object\n", + "apt_name 1095 non-null object\n", + "ev_nr_apt_id 1095 non-null object\n", + "ev_nr_apt_loc 1095 non-null object\n", + "apt_dist 1095 non-null object\n", + "apt_dir 1095 non-null object\n", + "apt_elev 1095 non-null object\n", + "wx_brief_comp 1095 non-null object\n", + "wx_src_iic 1095 non-null object\n", + "wx_obs_time 1095 non-null object\n", + "wx_obs_dir 1095 non-null object\n", + "wx_obs_fac_id 1095 non-null object\n", + "wx_obs_elev 1095 non-null object\n", + "wx_obs_dist 1095 non-null object\n", + "wx_obs_tmzn 1095 non-null object\n", + "light_cond 1095 non-null object\n", + "sky_cond_nonceil 1095 non-null object\n", + "sky_nonceil_ht 1095 non-null object\n", + "sky_ceil_ht 1095 non-null object\n", + "sky_cond_ceil 1095 non-null object\n", + "vis_rvr 1095 non-null object\n", + "vis_rvv 1095 non-null object\n", + "vis_sm 1095 non-null object\n", + "wx_temp 1095 non-null object\n", + "wx_dew_pt 1095 non-null object\n", + "wind_dir_deg 1095 non-null object\n", + "wind_dir_ind 1095 non-null object\n", + "wind_vel_kts 1095 non-null object\n", + "wind_vel_ind 1095 non-null object\n", + "gust_ind 1095 non-null object\n", + "gust_kts 1095 non-null object\n", + "altimeter 1095 non-null object\n", + "wx_dens_alt 1095 non-null object\n", + "wx_int_precip 1095 non-null object\n", + "metar 1095 non-null object\n", + "ev_highest_injury 1095 non-null object\n", + "inj_f_grnd 1095 non-null object\n", + "inj_m_grnd 1095 non-null object\n", + "inj_s_grnd 1095 non-null object\n", + "inj_tot_f 1095 non-null object\n", + "inj_tot_m 1095 non-null object\n", + "inj_tot_n 1095 non-null object\n", + "inj_tot_s 1095 non-null object\n", + "inj_tot_t 1095 non-null object\n", + "invest_agy 1095 non-null object\n", + "ntsb_docket 1095 non-null object\n", + "ntsb_notf_from 1095 non-null object\n", + "ntsb_notf_date 1095 non-null object\n", + "ntsb_notf_tm 1095 non-null object\n", + "fiche_number 1095 non-null object\n", + "lchg_date 1095 non-null object\n", + "lchg_userid 1095 non-null object\n", + "wx_cond_basic 1095 non-null object\n", + "faa_dist_office 1095 non-null object\n", + "dtypes: int64(2), object(68)\n", + "memory usage: 607.4+ KB\n" + ] + } + ], + "source": [ + "db._execute_query(\"SELECT * FROM events\").info()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 1154 entries, 20140507X51410 to 20110426X03427\n", + "Data columns (total 92 columns):\n", + "Aircraft_Key 1154 non-null int64\n", + "regis_no 1154 non-null object\n", + "ntsb_no 1154 non-null object\n", + "acft_missing 1154 non-null object\n", + "far_part 1154 non-null object\n", + "flt_plan_filed 1154 non-null object\n", + "flight_plan_activated 1154 non-null object\n", + "damage 1154 non-null object\n", + "acft_fire 1154 non-null object\n", + "acft_expl 1154 non-null object\n", + "acft_make 1154 non-null object\n", + "acft_model 1154 non-null object\n", + "acft_series 1154 non-null object\n", + "acft_serial_no 1154 non-null object\n", + "cert_max_gr_wt 1154 non-null object\n", + "acft_category 1154 non-null object\n", + "acft_reg_cls 1154 non-null object\n", + "homebuilt 1154 non-null object\n", + "fc_seats 1154 non-null object\n", + "cc_seats 1154 non-null object\n", + "pax_seats 1154 non-null object\n", + "total_seats 1154 non-null object\n", + "num_eng 1154 non-null object\n", + "fixed_retractable 1154 non-null object\n", + "type_last_insp 1154 non-null object\n", + "date_last_insp 1154 non-null object\n", + "afm_hrs_last_insp 1154 non-null object\n", + "afm_hrs 1154 non-null object\n", + "elt_install 1154 non-null object\n", + "elt_oper 1154 non-null object\n", + "elt_aided_loc_ev 1154 non-null object\n", + "elt_type 1154 non-null object\n", + "owner_acft 1154 non-null object\n", + "owner_street 1154 non-null object\n", + "owner_city 1154 non-null object\n", + "owner_state 1154 non-null object\n", + "owner_country 1154 non-null object\n", + "owner_zip 1154 non-null object\n", + "oper_individual_name 1154 non-null object\n", + "oper_name 1154 non-null object\n", + "oper_same 1154 non-null object\n", + "oper_dba 1154 non-null object\n", + "oper_addr_same 1154 non-null object\n", + "oper_street 1154 non-null object\n", + "oper_city 1154 non-null object\n", + "oper_state 1154 non-null object\n", + "oper_country 1154 non-null object\n", + "oper_zip 1154 non-null object\n", + "oper_code 1154 non-null object\n", + "certs_held 1154 non-null object\n", + "oprtng_cert 1154 non-null object\n", + "oper_cert 1154 non-null object\n", + "oper_cert_num 1154 non-null object\n", + "oper_sched 1154 non-null object\n", + "oper_dom_int 1154 non-null object\n", + "oper_pax_cargo 1154 non-null object\n", + "type_fly 1154 non-null object\n", + "second_pilot 1154 non-null object\n", + "dprt_pt_same_ev 1154 non-null object\n", + "dprt_apt_id 1154 non-null object\n", + "dprt_city 1154 non-null object\n", + "dprt_state 1154 non-null object\n", + "dprt_country 1154 non-null object\n", + "dprt_time 1154 non-null object\n", + "dprt_timezn 1154 non-null object\n", + "dest_same_local 1154 non-null object\n", + "dest_apt_id 1154 non-null object\n", + "dest_city 1154 non-null object\n", + "dest_state 1154 non-null object\n", + "dest_country 1154 non-null object\n", + "phase_flt_spec 1154 non-null object\n", + "report_to_icao 1154 non-null object\n", + "evacuation 1154 non-null object\n", + "lchg_date 1154 non-null object\n", + "lchg_userid 1154 non-null object\n", + "afm_hrs_since 1154 non-null object\n", + "rwy_num 1154 non-null object\n", + "rwy_len 1154 non-null object\n", + "rwy_width 1154 non-null object\n", + "site_seeing 1154 non-null object\n", + "air_medical 1154 non-null object\n", + "med_type_flight 1154 non-null object\n", + "acft_year 1154 non-null object\n", + "fuel_on_board 1154 non-null object\n", + "commercial_space_flight 1154 non-null int64\n", + "unmanned 1154 non-null int64\n", + "ifr_equipped_cert 1154 non-null int64\n", + "elt_mounted_aircraft 1154 non-null int64\n", + "elt_connected_antenna 1154 non-null int64\n", + "elt_manufacturer 1154 non-null object\n", + "elt_model 1154 non-null object\n", + "elt_reason_other 1154 non-null object\n", + "dtypes: int64(6), object(86)\n", + "memory usage: 838.5+ KB\n" + ] + } + ], + "source": [ + "db._execute_query(\"SELECT * FROM aircraft\").info()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'acc': True, 'far_parts': ['121 '], 'inc': True}" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.set_filtering_conditions(inc=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['_get_ev_ids_ev_type_query', '_get_ev_ids_far_part_query']\n" + ] + }, + { + "data": { + "text/plain": [ + "\"ev_id IN (SELECT ev_id FROM events WHERE ev_type='ACC' OR ev_type='INC') AND ev_id IN (SELECT ev_id FROM aircraft WHERE far_part IN ('121 '))\"" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db._get_conditions_query()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['_get_ev_ids_ev_type_query', '_get_ev_ids_far_part_query']\n", + "\n", + "Index: 2176 entries, 20140507X51410 to 20110426X03427\n", + "Data columns (total 70 columns):\n", + "ntsb_no 2176 non-null object\n", + "ev_type 2176 non-null object\n", + "ev_date 2176 non-null object\n", + "ev_dow 2176 non-null object\n", + "ev_time 2176 non-null object\n", + "ev_tmzn 2176 non-null object\n", + "ev_city 2176 non-null object\n", + "ev_state 2176 non-null object\n", + "ev_country 2176 non-null object\n", + "ev_site_zipcode 2176 non-null object\n", + "ev_year 2176 non-null int64\n", + "ev_month 2176 non-null int64\n", + "mid_air 2176 non-null object\n", + "on_ground_collision 2176 non-null object\n", + "latitude 2176 non-null object\n", + "longitude 2176 non-null object\n", + "latlong_acq 2176 non-null object\n", + "apt_name 2176 non-null object\n", + "ev_nr_apt_id 2176 non-null object\n", + "ev_nr_apt_loc 2176 non-null object\n", + "apt_dist 2176 non-null object\n", + "apt_dir 2176 non-null object\n", + "apt_elev 2176 non-null object\n", + "wx_brief_comp 2176 non-null object\n", + "wx_src_iic 2176 non-null object\n", + "wx_obs_time 2176 non-null object\n", + "wx_obs_dir 2176 non-null object\n", + "wx_obs_fac_id 2176 non-null object\n", + "wx_obs_elev 2176 non-null object\n", + "wx_obs_dist 2176 non-null object\n", + "wx_obs_tmzn 2176 non-null object\n", + "light_cond 2176 non-null object\n", + "sky_cond_nonceil 2176 non-null object\n", + "sky_nonceil_ht 2176 non-null object\n", + "sky_ceil_ht 2176 non-null object\n", + "sky_cond_ceil 2176 non-null object\n", + "vis_rvr 2176 non-null object\n", + "vis_rvv 2176 non-null object\n", + "vis_sm 2176 non-null object\n", + "wx_temp 2176 non-null object\n", + "wx_dew_pt 2176 non-null object\n", + "wind_dir_deg 2176 non-null object\n", + "wind_dir_ind 2176 non-null object\n", + "wind_vel_kts 2176 non-null object\n", + "wind_vel_ind 2176 non-null object\n", + "gust_ind 2176 non-null object\n", + "gust_kts 2176 non-null object\n", + "altimeter 2176 non-null object\n", + "wx_dens_alt 2176 non-null object\n", + "wx_int_precip 2176 non-null object\n", + "metar 2176 non-null object\n", + "ev_highest_injury 2176 non-null object\n", + "inj_f_grnd 2176 non-null object\n", + "inj_m_grnd 2176 non-null object\n", + "inj_s_grnd 2176 non-null object\n", + "inj_tot_f 2176 non-null object\n", + "inj_tot_m 2176 non-null object\n", + "inj_tot_n 2176 non-null object\n", + "inj_tot_s 2176 non-null object\n", + "inj_tot_t 2176 non-null object\n", + "invest_agy 2176 non-null object\n", + "ntsb_docket 2176 non-null object\n", + "ntsb_notf_from 2176 non-null object\n", + "ntsb_notf_date 2176 non-null object\n", + "ntsb_notf_tm 2176 non-null object\n", + "fiche_number 2176 non-null object\n", + "lchg_date 2176 non-null object\n", + "lchg_userid 2176 non-null object\n", + "wx_cond_basic 2176 non-null object\n", + "faa_dist_office 2176 non-null object\n", + "dtypes: int64(2), object(68)\n", + "memory usage: 1.2+ MB\n" + ] + } + ], + "source": [ + "db._execute_query(\"SELECT * FROM events\").info()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "events = db.get_events()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 2176 entries, 20140507X51410 to 20110426X03427\n", + "Data columns (total 63 columns):\n", + "ntsb_no 2176 non-null object\n", + "ev_type 2176 non-null category\n", + "ev_tmzn 2176 non-null category\n", + "ev_city 2176 non-null object\n", + "ev_state 2176 non-null category\n", + "ev_country 2176 non-null object\n", + "ev_site_zipcode 2176 non-null object\n", + "mid_air 2176 non-null category\n", + "on_ground_collision 2176 non-null category\n", + "latitude 604 non-null float64\n", + "longitude 601 non-null float64\n", + "latlong_acq 2176 non-null category\n", + "apt_name 2176 non-null object\n", + "ev_nr_apt_id 2176 non-null object\n", + "ev_nr_apt_loc 2176 non-null category\n", + "apt_dist 1039 non-null float64\n", + "apt_dir 876 non-null float64\n", + "apt_elev 1327 non-null float64\n", + "wx_brief_comp 2176 non-null category\n", + "wx_src_iic 2176 non-null category\n", + "wx_obs_time 1896 non-null float64\n", + "wx_obs_dir 1596 non-null float64\n", + "wx_obs_fac_id 2176 non-null object\n", + "wx_obs_elev 1808 non-null float64\n", + "wx_obs_dist 1638 non-null float64\n", + "wx_obs_tmzn 2176 non-null object\n", + "light_cond 2176 non-null category\n", + "sky_cond_nonceil 2176 non-null category\n", + "sky_nonceil_ht 1662 non-null float64\n", + "sky_ceil_ht 1640 non-null float64\n", + "sky_cond_ceil 2176 non-null category\n", + "vis_rvr 1324 non-null float64\n", + "vis_rvv 1316 non-null float64\n", + "vis_sm 1940 non-null float64\n", + "wx_temp 1702 non-null float64\n", + "wx_dew_pt 1607 non-null float64\n", + "wind_dir_deg 1889 non-null float64\n", + "wind_dir_ind 2176 non-null category\n", + "wind_vel_kts 2176 non-null object\n", + "wind_vel_ind 2176 non-null category\n", + "gust_ind 2176 non-null category\n", + "gust_kts 1461 non-null float64\n", + "altimeter 1596 non-null float64\n", + "wx_dens_alt 484 non-null float64\n", + "wx_int_precip 2176 non-null category\n", + "metar 2176 non-null object\n", + "ev_highest_injury 2176 non-null category\n", + "inj_f_grnd 1363 non-null float64\n", + "inj_m_grnd 1369 non-null float64\n", + "inj_s_grnd 1370 non-null float64\n", + "inj_tot_f 101 non-null float64\n", + "inj_tot_m 459 non-null float64\n", + "inj_tot_n 2048 non-null float64\n", + "inj_tot_s 548 non-null float64\n", + "inj_tot_t 837 non-null float64\n", + "invest_agy 2176 non-null category\n", + "ntsb_docket 1925 non-null float64\n", + "ntsb_notf_from 2176 non-null object\n", + "fiche_number 2176 non-null object\n", + "wx_cond_basic 2176 non-null category\n", + "faa_dist_office 2176 non-null object\n", + "ev_date_time 2176 non-null datetime64[ns]\n", + "ntsb_notf_date_tm 1501 non-null datetime64[ns]\n", + "dtypes: category(19), datetime64[ns](2), float64(29), object(13)\n", + "memory usage: 812.6+ KB\n" + ] + } + ], + "source": [ + "events.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 2303 entries, 20140507X51410 to 20110426X03427\n", + "Data columns (total 91 columns):\n", + "Aircraft_Key 2303 non-null int64\n", + "regis_no 2303 non-null object\n", + "ntsb_no 2303 non-null object\n", + "acft_missing 2303 non-null category\n", + "far_part 2303 non-null category\n", + "flt_plan_filed 2303 non-null category\n", + "flight_plan_activated 2303 non-null category\n", + "damage 2303 non-null category\n", + "acft_fire 2303 non-null category\n", + "acft_expl 2303 non-null category\n", + "acft_make 2303 non-null category\n", + "acft_model 2303 non-null category\n", + "acft_series 2303 non-null category\n", + "acft_serial_no 2303 non-null object\n", + "cert_max_gr_wt 1992 non-null float64\n", + "acft_category 2303 non-null category\n", + "acft_reg_cls 2303 non-null object\n", + "homebuilt 2303 non-null category\n", + "date_last_insp 1258 non-null datetime64[ns]\n", + "afm_hrs 1392 non-null float64\n", + "afm_hrs_last_insp 1046 non-null float64\n", + "commercial_space_flight 2303 non-null category\n", + "unmanned 2303 non-null category\n", + "ifr_equipped_cert 2303 non-null category\n", + "elt_mounted_aircraft 2303 non-null category\n", + "elt_connected_antenna 2303 non-null category\n", + "afm_hrs_since 2303 non-null category\n", + "air_medical 2303 non-null category\n", + "certs_held 2303 non-null category\n", + "dest_apt_id 2303 non-null category\n", + "dest_country 2303 non-null category\n", + "dest_same_local 2303 non-null category\n", + "dest_state 2303 non-null category\n", + "dprt_apt_id 2303 non-null category\n", + "dprt_country 2303 non-null category\n", + "dprt_pt_same_ev 2303 non-null category\n", + "dprt_state 2303 non-null category\n", + "dprt_timezn 2303 non-null category\n", + "elt_aided_loc_ev 2303 non-null category\n", + "elt_install 2303 non-null category\n", + "elt_oper 2303 non-null category\n", + "elt_type 2303 non-null category\n", + "evacuation 2303 non-null category\n", + "fixed_retractable 2303 non-null category\n", + "oper_addr_same 2303 non-null category\n", + "oper_cert 2303 non-null category\n", + "oper_code 2303 non-null category\n", + "oper_country 2303 non-null category\n", + "oper_dom_int 2303 non-null category\n", + "oper_individual_name 2303 non-null category\n", + "oper_pax_cargo 2303 non-null category\n", + "oper_same 2303 non-null category\n", + "oper_sched 2303 non-null category\n", + "oper_state 2303 non-null category\n", + "oprtng_cert 2303 non-null category\n", + "owner_country 2303 non-null category\n", + "owner_state 2303 non-null category\n", + "report_to_icao 2303 non-null category\n", + "second_pilot 2303 non-null category\n", + "site_seeing 2303 non-null category\n", + "type_fly 2303 non-null category\n", + "type_last_insp 2303 non-null category\n", + "dest_city 2303 non-null category\n", + "dprt_city 2303 non-null category\n", + "med_type_flight 2303 non-null category\n", + "oper_cert_num 2303 non-null category\n", + "oper_city 2303 non-null category\n", + "oper_dba 2303 non-null category\n", + "oper_name 2303 non-null category\n", + "oper_street 2303 non-null category\n", + "oper_zip 2303 non-null category\n", + "owner_acft 2303 non-null category\n", + "owner_city 2303 non-null category\n", + "owner_street 2303 non-null category\n", + "owner_zip 2303 non-null category\n", + "rwy_num 2303 non-null category\n", + "fuel_on_board 2303 non-null category\n", + "elt_manufacturer 2303 non-null category\n", + "elt_model 2303 non-null category\n", + "elt_reason_other 2303 non-null category\n", + "cc_seats 190 non-null float64\n", + "fc_seats 214 non-null float64\n", + "pax_seats 241 non-null float64\n", + "phase_flt_spec 1931 non-null category\n", + "rwy_len 1027 non-null float64\n", + "rwy_width 1025 non-null float64\n", + "acft_year 33 non-null float64\n", + "dprt_time 2013 non-null float64\n", + "total_seats 1926 non-null float64\n", + "num_eng 2145 non-null float64\n", + "phase_flt_spec_gross 1931 non-null category\n", + "dtypes: category(73), datetime64[ns](1), float64(12), int64(1), object(4)\n", + "memory usage: 986.6+ KB\n" + ] + } + ], + "source": [ + "db.get_aircrafts().info()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 2356 entries, 20030314X00332 to 20080107X00025\n", + "Data columns (total 6 columns):\n", + "Aircraft_Key 2356 non-null int64\n", + "Occurrence_No 2356 non-null int64\n", + "Occurrence_Code 2356 non-null category\n", + "Phase_of_Flight 2356 non-null category\n", + "Altitude 2356 non-null int64\n", + "phase_flt_spec_gross 2356 non-null category\n", + "dtypes: category(3), int64(3)\n", + "memory usage: 84.5+ KB\n" + ] + } + ], + "source": [ + "db.get_occurrences().info()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 25273 entries, 20140507X51410 to 20110426X03427\n", + "Data columns (total 5 columns):\n", + "Aircraft_Key 25273 non-null int64\n", + "crew_no 25273 non-null int64\n", + "flight_type 25273 non-null category\n", + "flight_craft 25273 non-null category\n", + "flight_hours 25228 non-null float64\n", + "dtypes: category(2), float64(1), int64(2)\n", + "memory usage: 839.9+ KB\n" + ] + } + ], + "source": [ + "db.get_flight_time_accidents().info()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 6791 entries, 20030314X00332 to 20080107X00025\n", + "Data columns (total 8 columns):\n", + "Aircraft_Key 6791 non-null int64\n", + "Occurrence_No 6791 non-null int64\n", + "seq_event_no 6791 non-null int64\n", + "group_code 6791 non-null category\n", + "Subj_Code 6791 non-null category\n", + "Cause_Factor 6791 non-null category\n", + "Modifier_Code 6791 non-null category\n", + "Person_Code 6791 non-null category\n", + "dtypes: category(5), int64(3)\n", + "memory usage: 298.9+ KB\n" + ] + } + ], + "source": [ + "db.get_seq_of_events().info()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 2947 entries, 20140507X51410 to 20110426X03427\n", + "Data columns (total 5 columns):\n", + "Aircraft_Key 2947 non-null int64\n", + "crew_no 2947 non-null int64\n", + "crew_category 2947 non-null category\n", + "crew_age 2553 non-null float64\n", + "crew_sex 2947 non-null category\n", + "dtypes: category(2), float64(1), int64(2)\n", + "memory usage: 98.4+ KB\n" + ] + } + ], + "source": [ + "db.get_flight_crew().info()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/flight_safety/queries.py b/flight_safety/queries.py index 377cc4b..60e9f88 100644 --- a/flight_safety/queries.py +++ b/flight_safety/queries.py @@ -2,15 +2,15 @@ Queries to retrieve data from database """ +import sqlite3 import pandas as pd -from .utils import convert_lat, convert_lon, rename_categories +from .utils import convert_lat, convert_lon, rename_categories, \ + combine_date_time DATE_FORMAT = '%m/%d/%y %H:%M:%S' TIME_FORMAT = '%H%M' -FAR_PARTS = "'121 ', '125 '" - EVENTS_COLUMNS = ( 'ev_id', 'ntsb_no', @@ -92,7 +92,6 @@ "inj_tot_n", "inj_tot_s", "inj_tot_t", - "ntsb_notf_tm", "vis_rvv", "wind_dir_deg", "wx_obs_dist", @@ -405,246 +404,265 @@ ) -def get_codes_meaning(con, table, column): - query = ( - "select distinct code_iaids, meaning from eADMSPUB_DataDictionary " - f"where \"Table\"='{table}' and \"Column\"='{column}'" - ) - return pd.read_sql(query, con, index_col='code_iaids') +class AvallDB: + + def __init__(self, file, acc=True, inc=False, far_parts='ALL'): + + self.con = sqlite3.connect(file) + + self._acc = acc + self._inc = inc + self._far_parts = far_parts + + # Index matching conditions above. If None when executing a query, + # they will be gathered. + self._ev_ids = None + + def set_filtering_conditions(self, acc=None, inc=None, far_parts=None): + + if acc: + self._acc = acc + if inc: + self._inc = inc + if far_parts: + self._far_parts = far_parts + + # If conditions have changed, ev_ids must be gathered again + self._ev_ids = None + + current_filters = {'acc': self._acc, + 'inc': self._inc, + 'far_parts': self._far_parts} + + return current_filters + def _get_ev_ids_ev_type_query(self): -def get_events_accidents(con): - query = ("SELECT {cols} FROM events " - "WHERE ev_type='ACC' AND ev_date IS NOT NULL " - "AND ev_id IN (SELECT ev_id FROM aircraft WHERE " - "far_part IN ({far_parts}))".format( - cols=", ".join(EVENTS_COLUMNS), - far_parts=FAR_PARTS - ) - ) - events = pd.read_sql_query(query, con, - index_col='ev_id', - parse_dates={'ev_date': DATE_FORMAT, - 'ev_time': TIME_FORMAT, - 'ntsb_notf_date': DATE_FORMAT, - 'ntsb_notf_tm': TIME_FORMAT - } - ) + if self._acc and self._inc: + return ("SELECT ev_id FROM events WHERE ev_type='ACC' OR " + "ev_type='INC'") - for c in EVENTS_NUMERIC: - events[c] = pd.to_numeric(events[c], errors='coerce') + elif self._acc: + return "SELECT ev_id FROM events WHERE ev_type='ACC'" - for c in EVENTS_CATEGORICAL: - events[c] = events[c].astype('category') + else: + return "SELECT ev_id FROM events WHERE ev_type='INC'" - events['latitude'] = events['latitude'].apply(convert_lat) - events['longitude'] = events['longitude'].apply(convert_lon) + def _get_ev_ids_far_part_query(self): - return events + if isinstance(self._far_parts, str) and self._far_parts.upper() == 'ALL': + return "SELECT ev_id FROM aircraft" + else: + # Convert to string and add trailing space and enclose with '' + far_parts_ = ["'" + str(fp).strip() + ' ' + "'" for fp in + self._far_parts] -def get_events_all(con): - query = ("SELECT {cols} FROM events " - .format( - cols=", ".join(EVENTS_COLUMNS) - ) - ) - events = pd.read_sql_query(query, con, - index_col='ev_id', - parse_dates={'ev_date': DATE_FORMAT, - 'ev_time': TIME_FORMAT, - 'ntsb_notf_date': DATE_FORMAT, - 'ntsb_notf_tm': TIME_FORMAT - } - ) + # transform list to a string separated by , + far_parts_ = "(" + ", ".join(far_parts_) + ")" + return ("SELECT ev_id FROM aircraft WHERE far_part IN " + f"{far_parts_}") - for c in EVENTS_NUMERIC: - events[c] = pd.to_numeric(events[c], errors='coerce') + def _get_conditions_query(self): + """Executes any method starting with _get_ev_ids so all the + filtering queries will be gathered here + """ + sel_methods = [ii for ii in dir(self) if ii.startswith("_get_ev_ids")] - for c in EVENTS_CATEGORICAL: - events[c] = events[c].astype('category') + selections = [getattr(self, ii)() for ii in sel_methods] - events['latitude'] = events['latitude'].apply(convert_lat) - events['longitude'] = events['longitude'].apply(convert_lon) + conditions = " AND ".join([f"ev_id IN ({sel})" for sel in selections]) - return events + return conditions + def _set_matching_ev_ids(self): -def get_aircrafts_accidents(con): - ac_columns = ", ".join(AIRCRAFT_COLUMNS) + # Get conditions query to be written after WHERE + conds = self._get_conditions_query() + # Write query + query = f"SELECT ev_id FROM events WHERE {conds}" + # Get events matching conditions + ev_ids = pd.read_sql(query, self.con)['ev_id'].values - query = ( - f"SELECT {ac_columns} FROM aircraft WHERE ev_id IN " - "(SELECT ev_id FROM events WHERE ev_type='ACC' AND " - f"ev_date IS NOT NULL) AND far_part IN ({FAR_PARTS})" - ) + # Transform strings matching to string + self._ev_ids = "'" + "', '".join(ev_ids) + "'" - aircrafts = pd.read_sql(query, con, - parse_dates={'date_last_insp': DATE_FORMAT} - ) + def _execute_query(self, query, **kwargs): - for c in AIRCRAFT_NUMERIC: - aircrafts[c] = pd.to_numeric(aircrafts[c], errors='coerce') + if self._ev_ids is None: + self._set_matching_ev_ids() - # phase_flt_spec is parsed as numeric and this is used to get phases of - # flight with less detail (ie. "Takeoff - roll/run" -> "Takeoff) - aircrafts['phase_flt_spec_gross'] = ((aircrafts.phase_flt_spec // 10) * 10) + query = query + f" WHERE ev_id in ({self._ev_ids})" - new_categorical_cols = ['phase_flt_spec_gross', 'phase_flt_spec'] - for c in list(AIRCRAFT_CATEGORICAL) + new_categorical_cols: - aircrafts[c] = aircrafts[c].astype('category') + return pd.read_sql(query, self.con, index_col='ev_id', **kwargs) - PHASE_FLT_SPEC_DICT = get_codes_meaning(con, 'aircraft', 'phase_flt_spec') + def get_codes_meaning(self, table, column): + query = ( + "select distinct code_iaids, meaning from eADMSPUB_DataDictionary " + f"where \"Table\"='{table}' and \"Column\"='{column}'" + ) + return pd.read_sql(query, self.con, index_col='code_iaids') - # Change codes for names (ie. 570 to Landing) - cats = rename_categories(aircrafts['phase_flt_spec_gross'].cat.categories, - PHASE_FLT_SPEC_DICT) - aircrafts['phase_flt_spec_gross'].cat.rename_categories(cats, inplace=True) + def get_events(self): + ev_cols_ = ", ".join(EVENTS_COLUMNS) + query = (f"SELECT {ev_cols_} FROM events ") - cats = rename_categories(aircrafts['phase_flt_spec'].cat.categories, - PHASE_FLT_SPEC_DICT) - aircrafts['phase_flt_spec'].cat.rename_categories(cats, inplace=True) + events = self._execute_query( + query, + parse_dates={'ev_date': DATE_FORMAT, + 'ev_time': TIME_FORMAT, + 'ntsb_notf_date': DATE_FORMAT, + 'ntsb_notf_tm': TIME_FORMAT + } + ) - return aircrafts + # Substitute ev_date and ev_time for a datetime col: ev_date_time + events['ev_date_time'] = events.apply( + combine_date_time, 1, args=('ev_date', 'ev_time') + ) + events.drop(['ev_date', 'ev_time'], axis=1, inplace=True) + # Idem with ntsb_notf_date_tm + events['ntsb_notf_date_tm'] = events.apply( + combine_date_time, 1, args=('ntsb_notf_date', 'ntsb_notf_tm') + ) + events.drop(['ntsb_notf_date', 'ntsb_notf_tm'], axis=1, inplace=True) -def get_aircrafts_all(con): - ac_columns = ", ".join(AIRCRAFT_COLUMNS) + for c in EVENTS_NUMERIC: + events[c] = pd.to_numeric(events[c], errors='coerce') - query = ( - f"SELECT {ac_columns} FROM aircraft " - ) + for c in EVENTS_CATEGORICAL: + events[c] = events[c].astype('category') - aircrafts = pd.read_sql(query, con, - parse_dates={'date_last_insp': DATE_FORMAT} - ) + events['latitude'] = events['latitude'].apply(convert_lat) + events['longitude'] = events['longitude'].apply(convert_lon) - for c in AIRCRAFT_NUMERIC: - aircrafts[c] = pd.to_numeric(aircrafts[c], errors='coerce') + return events - # phase_flt_spec is parsed as numeric and this is used to get phases of - # flight with less detail (ie. "Takeoff - roll/run" -> "Takeoff) - aircrafts['phase_flt_spec_gross'] = ((aircrafts.phase_flt_spec // 10) * 10) + def get_aircrafts(self): + ac_columns = ", ".join(AIRCRAFT_COLUMNS) - new_categorical_cols = ['phase_flt_spec_gross', 'phase_flt_spec'] - for c in list(AIRCRAFT_CATEGORICAL) + new_categorical_cols: - aircrafts[c] = aircrafts[c].astype('category') + query = f"SELECT {ac_columns} FROM aircraft" - PHASE_FLT_SPEC_DICT = get_codes_meaning(con, 'aircraft', 'phase_flt_spec') + aircrafts = self._execute_query( + query, parse_dates={'date_last_insp': DATE_FORMAT} + ) - # Change codes for names (ie. 570 to Landing) - cats = rename_categories(aircrafts['phase_flt_spec_gross'].cat.categories, - PHASE_FLT_SPEC_DICT) - aircrafts['phase_flt_spec_gross'].cat.rename_categories(cats, inplace=True) + for c in AIRCRAFT_NUMERIC: + aircrafts[c] = pd.to_numeric(aircrafts[c], errors='coerce') - cats = rename_categories(aircrafts['phase_flt_spec'].cat.categories, - PHASE_FLT_SPEC_DICT) - aircrafts['phase_flt_spec'].cat.rename_categories(cats, inplace=True) + # phase_flt_spec is parsed as numeric and this is used to get phases of + # flight with less detail (ie. "Takeoff - roll/run" -> "Takeoff) + aircrafts['phase_flt_spec_gross'] = ( + (aircrafts.phase_flt_spec // 10) * 10) - return aircrafts + new_categorical_cols = ['phase_flt_spec_gross', 'phase_flt_spec'] + for c in list(AIRCRAFT_CATEGORICAL) + new_categorical_cols: + aircrafts[c] = aircrafts[c].astype('category') + PHASE_FLT_SPEC_DICT = self.get_codes_meaning( + 'aircraft', 'phase_flt_spec' + ) -def get_occurrences_accidents(con): - occurrence_cols = ", ".join(OCCURRENCES_COLUMNS) + # Change codes for names (ie. 570 to Landing) + cats = rename_categories( + aircrafts['phase_flt_spec_gross'].cat.categories, + PHASE_FLT_SPEC_DICT) + aircrafts['phase_flt_spec_gross'].cat.rename_categories(cats, + inplace=True) - query = ( - f"SELECT {occurrence_cols} FROM Occurrences WHERE ev_id IN " - "(SELECT ev_id FROM events WHERE ev_type='ACC' AND " - "ev_date IS NOT NULL) AND ev_id IN (SELECT ev_id FROM aircraft " - f"WHERE far_part in ({FAR_PARTS}))" - ) + cats = rename_categories(aircrafts['phase_flt_spec'].cat.categories, + PHASE_FLT_SPEC_DICT) + aircrafts['phase_flt_spec'].cat.rename_categories(cats, inplace=True) - occurrences = pd.read_sql(query, con) + return aircrafts - for c in OCCURENCES_NUMERIC: - occurrences[c] = pd.to_numeric(occurrences[c], errors='coerce') + def get_occurrences(self): + occurrence_cols = ", ".join(OCCURRENCES_COLUMNS) - # phase_flt_spec is parsed as numeric and this is used to get phases of - # flight with less detail (ie. "Takeoff - roll/run" -> "Takeoff) - occurrences['phase_flt_spec_gross'] = ((occurrences.Phase_of_Flight // 10) * 10) + query = f"SELECT {occurrence_cols} FROM Occurrences" + occurrences = self._execute_query(query) - for c in list(OCCURRENCE_CATEGORICAL) + ['phase_flt_spec_gross']: - occurrences[c] = occurrences[c].astype('category') + for c in OCCURENCES_NUMERIC: + occurrences[c] = pd.to_numeric(occurrences[c], errors='coerce') - PHASE_FLT_SPEC = get_codes_meaning(con, 'Occurrences', 'Phase_of_Flight') - OCCU_CODE_SPEC = get_codes_meaning(con, 'Occurrences', 'Occurrence_Code') + # phase_flt_spec is parsed as numeric and this is used to get phases of + # flight with less detail (ie. "Takeoff - roll/run" -> "Takeoff) + occurrences['phase_flt_spec_gross'] = ( + (occurrences.Phase_of_Flight // 10) * 10) - cats = rename_categories(occurrences['Phase_of_Flight'].cat.categories, - PHASE_FLT_SPEC) - occurrences['Phase_of_Flight'].cat.rename_categories(cats, inplace=True) + for c in list(OCCURRENCE_CATEGORICAL) + ['phase_flt_spec_gross']: + occurrences[c] = occurrences[c].astype('category') - cats = rename_categories(occurrences['phase_flt_spec_gross'].cat.categories, - PHASE_FLT_SPEC) - occurrences['phase_flt_spec_gross'].cat.rename_categories(cats, inplace=True) + PHASE_FLT_SPEC = self.get_codes_meaning( + 'Occurrences', 'Phase_of_Flight' + ) + OCCU_CODE_SPEC = self.get_codes_meaning( + 'Occurrences', 'Occurrence_Code' + ) - cats = rename_categories(occurrences['Occurrence_Code'].cat.categories, - OCCU_CODE_SPEC) - occurrences['Occurrence_Code'].cat.rename_categories(cats, inplace=True) + cats = rename_categories(occurrences['Phase_of_Flight'].cat.categories, + PHASE_FLT_SPEC) + occurrences['Phase_of_Flight'].cat.rename_categories(cats, + inplace=True) - return occurrences + cats = rename_categories( + occurrences['phase_flt_spec_gross'].cat.categories, + PHASE_FLT_SPEC) + occurrences['phase_flt_spec_gross'].cat.rename_categories(cats, + inplace=True) + cats = rename_categories(occurrences['Occurrence_Code'].cat.categories, + OCCU_CODE_SPEC) + occurrences['Occurrence_Code'].cat.rename_categories(cats, + inplace=True) -def get_flight_time_accidents(con): - flight_time_cols = ", ".join(FLIGHT_TIME_COLS) + return occurrences - query = ( - f"SELECT {flight_time_cols} FROM flight_time WHERE ev_id IN " - "(SELECT ev_id FROM events WHERE ev_type='ACC' AND " - "ev_date IS NOT NULL) AND ev_id IN (SELECT ev_id FROM aircraft WHERE " - f"far_part in ({FAR_PARTS}))" - ) + def get_flight_time(self): + flight_time_cols = ", ".join(FLIGHT_TIME_COLS) - flight_time = pd.read_sql(query, con) + query = f"SELECT {flight_time_cols} FROM flight_time" - for c in FLIGHT_TIME_NUMERIC: - flight_time[c] = pd.to_numeric(flight_time[c], errors='coerce') + flight_time = self._execute_query(query) - for c in FLIGHT_TIME_CATEGORICAL: - flight_time[c] = flight_time[c].astype('category') + for c in FLIGHT_TIME_NUMERIC: + flight_time[c] = pd.to_numeric(flight_time[c], errors='coerce') - return flight_time + for c in FLIGHT_TIME_CATEGORICAL: + flight_time[c] = flight_time[c].astype('category') + return flight_time -def get_seq_of_events_accidents(con): - seq_of_events_cols = ", ".join(SEQ_OF_EVETNS_COLUMNS) + def get_seq_of_events(self): + seq_of_events_cols = ", ".join(SEQ_OF_EVETNS_COLUMNS) - query = ( - f"SELECT {seq_of_events_cols} FROM seq_of_events WHERE ev_id IN " - "(SELECT ev_id FROM events WHERE ev_type='ACC' AND " - "ev_date IS NOT NULL) AND ev_id IN (SELECT ev_id FROM aircraft WHERE " - f"far_part IN ({FAR_PARTS}))" - ) + query = f"SELECT {seq_of_events_cols} FROM seq_of_events" - seq_of_events = pd.read_sql(query, con) + seq_of_events = self._execute_query(query) - # DROP GROUP_CODE = 0 because it is not codified - # seq_of_events = seq_of_events[seq_of_events.group_code != 0] + # DROP GROUP_CODE = 0 because it is not codified + # seq_of_events = seq_of_events[seq_of_events.group_code != 0] - for c in SEQ_OF_EVENTS_NUMERIC: - seq_of_events[c] = pd.to_numeric(seq_of_events[c], errors='coerce') + for c in SEQ_OF_EVENTS_NUMERIC: + seq_of_events[c] = pd.to_numeric(seq_of_events[c], errors='coerce') - for c in SEQ_OF_EVENTS_CATEGORICAL: - seq_of_events[c] = seq_of_events[c].astype('category') + for c in SEQ_OF_EVENTS_CATEGORICAL: + seq_of_events[c] = seq_of_events[c].astype('category') - return seq_of_events + return seq_of_events + def get_flight_crew(self): -def get_flight_crew_accidents(con): + flight_crew_cols = ', '.join(FLIGHT_CREW_COLS) - flight_crew_cols = ', '.join(FLIGHT_CREW_COLS) - - query = ( - f"SELECT {flight_crew_cols} FROM Flight_Crew WHERE ev_id IN " - "(SELECT ev_id FROM events WHERE ev_type='ACC' AND " - "ev_date IS NOT NULL) AND ev_id IN (SELECT ev_id FROM aircraft " - f"WHERE far_part in ({FAR_PARTS}))" - ) - flight_crew = pd.read_sql_query(query, con) + query = f"SELECT {flight_crew_cols} FROM Flight_Crew" + flight_crew = self._execute_query(query) - for c in FLIGHT_CREW_NUMERIC: - flight_crew[c] = pd.to_numeric(flight_crew[c], errors='coerce') + for c in FLIGHT_CREW_NUMERIC: + flight_crew[c] = pd.to_numeric(flight_crew[c], errors='coerce') - for c in FLIGHT_CREW_CATEGORICAL: - flight_crew[c] = flight_crew[c].astype('category') + for c in FLIGHT_CREW_CATEGORICAL: + flight_crew[c] = flight_crew[c].astype('category') - return flight_crew + return flight_crew diff --git a/flight_safety/utils.py b/flight_safety/utils.py index fb46d10..8b96977 100644 --- a/flight_safety/utils.py +++ b/flight_safety/utils.py @@ -3,6 +3,7 @@ """ import numpy as np +import pandas as pd def convert_lat(string): @@ -47,3 +48,15 @@ def rename_categories(old_categories, codes_meaning): else: new_categories.append(cat) return new_categories + + +def combine_date_time(df, date_col, time_col): + + date = df[date_col].date() + + try: + time = df[time_col].time() + except: + return date + + return pd.datetime.combine(date, time) \ No newline at end of file