PHD Project - Driver energy prediction
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1078 lines
459 KiB

2 years ago
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup\n",
"\n",
"`TRAINING_RANGE` und `TEST_RANGE` müssen je nach Länge des Datensatzes angepasst werden.\n",
"\n",
"**Keine Anpassung erforderlich** (Siehe Datensatz herunterladen)\n",
"- 30% Training-Daten (0-30%)\n",
"- 70% Test-Daten (30%-100%)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['curvature', 'radius', 'phiSegment', 'flt_DB_counter', 'flt_setup_id', 'flt_altitude', 'flt_go_elevation', 'flt_go_eleResolution', 'flt_osm_trafficSignal', 'flt_osm_w_wood', 'flt_join_idx', 'flt_curvature', 'flt_radius', 'flt_phiSegment', 'hr_latitude', 'hr_longitude', 'hr_elevation', 'hr_SpeedLimit', 'hr_LinkID', 'hr_shapeFirstPoint', 'hr_shapeLastPoint', 'hr_lengthSegemnt', 'hr_actualManeuver', 'hr_traficSpeed', 'hr_traficTime', 'hr_baseSpeed', 'hr_baseTime', 'hr_JamFactor', 'hr_FunctionalRoadClass', 'hr_consumption', 'hr_mTravelTime', 'hr_mLenght', 'hr_mFirstPoint', 'hr_mLastPoint', 'hr_mNextManeuver', 'hr_mTrafficTime', 'hr_mStartAngle', 'hr_leg_firtPoint', 'hr_leg_lastPoint', 'hr_leg_length', 'hr_leg_travelTime', 'hr_leg_trafficTime', 'hr_leg_baseTime', 'hr_leg_spot', 'hr_leg_shapeIndex', 'hr_IdxNP', 'hr_NearestPoint_1', 'hr_NearestPoint_2', 'hr_PointOnRoute_1', 'hr_PointOnRoute_2', 'hr_Dist2Origin', 'hr_Dist2Route', 'hr_osm_trafficSignal', 'hr_osm_w_wood', 'hr_join_idx', 'hr_curvature', 'hr_radius', 'hr_phiSegment', 'go_start_latitude', 'go_start_longitude', 'go_end_latitude', 'go_end_longitude', 'go_duration', 'go_latitude', 'go_longitude', 'go_routing_flag', 'go_mean_velocity_calc_pre', 'go_mean_velocity_calc', 'go_IdxNP', 'go_NearestPoint_1', 'go_NearestPoint_2', 'go_PointOnRoute_1', 'go_PointOnRoute_2', 'go_Dist2Origin', 'go_Dist2Route', 'go_join_idx', 'go_curvature', 'go_radius', 'go_phiSegment', 'osrm_latitude', 'osrm_longitude', 'osrm_seg_datasources', 'osrm_seg_weight', 'osrm_seg_duration', 'osrm_seg_nodeID', 'osrm_step_weight', 'osrm_step_duration', 'osrm_mn_bearing_before', 'osrm_mn_bearing_after', 'osrm_mn_exit', 'osrm_i_lanes_valid_1', 'osrm_i_lanes_valid_2', 'osrm_i_lanes_valid_3', 'osrm_i_lanes_valid_4', 'osrm_i_lanes_valid_5', 'osrm_i_lanes_valid_6', 'osrm_i_bearings_1', 'osrm_i_bearings_2', 'osrm_i_bearings_3', 'osrm_i_bearings_4', 'osrm_i_bearings_5', 'osrm_i_bearings_6', 'osrm_i_entry_1', 'osrm_i_entry_2', 'osrm_i_entry_3', 'osrm_i_entry_4', 'osrm_i_entry_5', 'osrm_i_entry_6', 'osrm_i_in', 'osrm_i_out', 'osrm_i_laneNumber', 'osrm_seg_speed', 'osrm_IdxNP', 'osrm_NearestPoint_1', 'osrm_NearestPoint_2', 'osrm_PointOnRoute_1', 'osrm_PointOnRoute_2', 'osrm_Dist2Origin', 'osrm_Dist2Route', 'osrm_join_idx', 'osrm_curvature', 'osrm_radius', 'osrm_phiSegment', 'ors_latitude', 'ors_longitude', 'ors_elevation', 'ors_long_duration', 'ors_ascent_route', 'ors_descent_route', 'ors_detourfactor', 'ors_percentage', 'ors_avgspeed', 'ors_seg_duration', 'ors_type', 'ors_maneuver_bearing_before', 'ors_maneuver_bearing_after', 'ors_seg_speed', 'ors_long_speed', 'ors_IdxNP', 'ors_NearestPoint_1', 'ors_NearestPoint_2', 'ors_PointOnRoute_1', 'ors_PointOnRoute_2', 'ors_Dist2Origin', 'ors_Dist2Route', 'ors_join_idx', 'ors_curvature', 'ors_radius', 'ors_phiSegment', 'osm_w_lanes', 'osm_w_lanes_forward', 'osm_w_lanes_backward', 'osm_w_maxspeed', 'osm_w_maxspeed_forward', 'osm_w_maxspeed_backward', 'osm_Node_ID_osrm', 'osm_Way_ID', 'osm_Way_direction', 'osm_Calc_Lanes', 'osm_w_maxspeed_new', 'osm_latitude', 'osm_longitude', 'osm_IdxNP', 'osm_NearestPoint_1', 'osm_NearestPoint_2', 'osm_PointOnRoute_1', 'osm_PointOnRoute_2', 'osm_Dist2Origin', 'osm_Dist2Route', 'osm_f_filt', 'osm_join_idx', 'osm_curvature', 'osm_radius', 'osm_phiSegment', 'tt_latitude', 'tt_longitude', 'tt_sec_Motorway', 'tt_sec_traffic', 'tt_calc_speedInKmPerH', 'tt_IdxNP', 'tt_NearestPoint_1', 'tt_NearestPoint_2', 'tt_PointOnRoute_1', 'tt_PointOnRoute_2', 'tt_Dist2Origin', 'tt_Dist2Route', 'tt_join_idx', 'tt_curvature', 'tt_radius', 'tt_phiSegment', 'weat_precipIntensity', 'weat_visibility', 'weat_cloudCover', 'weat_sunriseTime', 'weat_sunsetTime', 'weat_curvature', 'weat_radius', 'weat_phiSegment', 'mb_latitude', 'mb_longitude', 'mb_seg_speed', 'mb_seg_duration', 'mb_step_duration', 'mb_mn_bearing_before', 'mb_mn_bearing_after', 'mb_mn_exit', 'mb_mn_modifier', 'mb_i_bearings_1', 'mb_i_bearings_2', 'mb_i_bearings_3', 'mb_i_bearings_4', 'mb_i_bearings_5', 'mb_i_bearings_6', 'mb_i_bearings_7', 'mb_i_bearings_8', 'mb_i_entry_1', 'mb_i_entry_2', 'mb_i_ent
]
}
],
"source": [
"INPUT_FILE = 'data.csv'\n",
"TARGET_COLUMN = 'flt_obd_speed'\n",
"# Still contains positional information and acceleration; however we currently train\n",
"# sample by sample without knowledge of previous or other data, so it should not be\n",
"# possible for the Regressor to simply \"calculate\" the speed.\n",
"EXCLUDED_COLUMNS = ('flt_gps_speed', 'flt_obd_engine_load', 'flt_obd_engine_rpm',\n",
" 'flt_obd_maf', 'flt_obd_accelerator_pedal','flt_time','flt_time_system_clock',\n",
" 'flt_time_utc','flt_ax','flt_ay','flt_az','flt_gx','flt_gy','flt_gz','flt_compass',\n",
" 'flt_number_of_satelites','flt_accuracy','flt_gps_bearing','flt_calc_dist_gps',\n",
" 'flt_calc_dist_vt','flt_calc_ax_vt','flt_timeIP',\n",
" 'weat_latitude','weat_longitude','weat_distanceIP','weat_timeIP','weat_join_idx',\n",
" 'hAccel_1','hAccel_2','hAccel_3','flt_mAccel_1','flt_mAccel_2','flt_mAccel_3',\n",
" 'flt_mGier_1','flt_mGier_2','flt_mGier_3','rot_Accel_1','rot_Accel_2','rot_Accel_3',\n",
" 'rot_Gier_1','rot_Gier_2','rot_Gier_3','rot_Accel_flt_1','rot_Accel_flt_2','rot_Accel_flt_3',\n",
" 'rot_Gier_flt_1','rot_Gier_flt_2','rot_Gier_flt_3'\n",
" )\n",
"# See explanation below the feature importance plot\n",
"OVERFITTING_COLUMNS = ('weat_temperature', 'weat_humidity', 'join_idx', 'weat_windBearing', 'weat_windSpeed',\n",
" 'latitude', 'longitude', 'flt_latitude', 'flt_longitude',\n",
" 'ors_percentage_cumsum', 'flt_obd_air_temperature',\n",
" 'mb_step_weight')\n",
"# Since there are a lot of fields containing those\n",
"# Note: This breaks the map plotting\n",
"OVERFITTING_SUBWORDS = ('distance', 'remainDistance', 'remainDistanze', 'cumsumDistance', 'segDistance', 'time', 'remainTime')\n",
"\n",
"from runsql import runsql\n",
"DATA_COLUMNS = [c['Field']\n",
" for c in runsql('show columns from computeddata')\n",
" if c['Type'] == 'double'\n",
" and c['Field'] != TARGET_COLUMN\n",
" and c['Field'] not in EXCLUDED_COLUMNS\n",
" and c['Field'] not in OVERFITTING_COLUMNS\n",
" and not any([w in c['Field'] for w in OVERFITTING_SUBWORDS])]\n",
"len(DATA_COLUMNS)\n",
"print(DATA_COLUMNS)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"DECISION_TREE_IMPORTANT = ('gh_avgspeed', 'mq_avgspeed', 'osm_w_maxspeed', 'hr_traficSpeed', 'bg_avgspeed_leg',\n",
" 'mb_seg_speed', 'osm_w_maxspeed_new', 'hr_baseSpeed', 'hr_SpeedLimit', 'gh_elevation',\n",
" 'ors_seg_speed', 'osm_w_lanes', 'osrm_step_weight', 'mb_seg_speed_calc', 'hr_elevation',\n",
" 'bg_avgspeed', 'osrm_seg_speed', 'bg_avgspeed_subleg', 'go_mean_velocity_calc_pre', 'mb_i_laneNumber',\n",
" 'osrm_i_entry_1', 'ors_long_speed', 'mb_radius', 'ei_avgspeed', )#'flt_DB_counter') # overfitting -.-"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Datensatz herunterladen\n",
"\n",
"`SETUP_ID` anpassen, Rest läuft automatisch"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"SETUP_ID = 868\n",
"import csv\n",
"from runsql import runsql\n",
"reader = runsql('select * from computeddata where setup_id = {} order by distance asc'.format(SETUP_ID))\n",
"reader_data = list(reader) # list(...) so that following cells can be repeated"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import math\n",
"data = []\n",
"target = []\n",
"for row in reader_data:\n",
" data += [[float(row[c]) if row[c] != '' else math.nan for c in DATA_COLUMNS]]\n",
" target += [float(row[TARGET_COLUMN])] # Errors if NaN in TARGET_COLUMN"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"tr_st = 0\n",
"tr_ed = math.floor(len(data)*0.3)\n",
"TRAINING_RANGE = (tr_st, tr_ed)\n",
"TEST_RANGE = (tr_ed, len(data)) # TEST_RANGE = (len(data)-tr_ed, len(data))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Leere Zellen füllen\n",
"\n",
"Da nicht alle Datensätze alle Spalten haben – gäbe sicherlich bessere Strategien, aber das funktioniert erstaunlich gut (wahrscheinlich sind die \"wichtigen\" Spalten immer vorhanden)."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(7228, 311)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.impute import SimpleImputer\n",
"imp = SimpleImputer(strategy='constant', fill_value=0) # Other strategies remove fully null columns\n",
"data = imp.fit_transform(data)\n",
"import numpy as np\n",
"np.shape(data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"CSV-Export, für MATLAB o.Ä."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"np.savetxt('imputed-{}.csv'.format(SETUP_ID), data, delimiter=',')\n",
"np.savetxt('target-{}.csv'.format(SETUP_ID), target, delimiter=',')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Analyze INPUT DATA\n",
"Eingangsdaten analysieren"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"scrolled": false
},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'astropy'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-8-ed5ca62780b2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Convert to Table\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mastropy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtable\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mTable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mDATA_COLUMNS\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m#lat = t['latitude']\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'astropy'"
]
}
],
"source": [
"# Convert to Table\n",
"import sys\n",
"from astropy.table import Table\n",
"t = Table(data, names=DATA_COLUMNS)\n",
"#lat = t['latitude']\n",
"#lng = t['longitude']\n",
"# Subsampling ... use points every 50m for plotting\n",
"#lat = lat[::10]\n",
"#lng = lng[::10]\n",
"\n",
"# determine range to print based on min, max lat and lon of the data\n",
"#margin = 0 # buffer to add to the range\n",
"#lat_min = min(lat) - margin\n",
"#lat_max = max(lat) + margin\n",
"#lon_min = min(lng) - margin\n",
"#lon_max = max(lng) + margin\n",
"t"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'lon_min' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-9-d80dd05874af>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;31m# Converts given lat/lon in WGS84 Datum to XY in Spherical Mercator EPSG:900913\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0moriginShift\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpi\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;36m6378137\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m2.0\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0;31m# 20037508.342789244\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mxExtent_min\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlon_min\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0moriginShift\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m180\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0myExtent_min\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlog\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtan\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m90\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mlat_min\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpi\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m360\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpi\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m180\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0myExtent_min\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0myExtent_min\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0moriginShift\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m180\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'lon_min' is not defined"
]
}
],
"source": [
"# Calculation ZOOM LEVEL\n",
"width = 640\n",
"height = 640\n",
"tileSize= 256*4\n",
"\n",
"# Converts given lat/lon in WGS84 Datum to XY in Spherical Mercator EPSG:900913\"\n",
"originShift = 2 * math.pi * 6378137/2.0; # 20037508.342789244\n",
"xExtent_min = lon_min * originShift / 180;\n",
"yExtent_min = math.log(math.tan((90 + lat_min) * math.pi / 360 )) / (math.pi / 180);\n",
"yExtent_min = yExtent_min * originShift / 180;\n",
"xExtent_max = lon_max * originShift / 180;\n",
"yExtent_max = math.log(math.tan((90 + lat_max) * math.pi / 360 )) / (math.pi / 180);\n",
"yExtent_max = yExtent_max * originShift / 180;\n",
"\n",
"minResX = (xExtent_max-xExtent_min)/width;\n",
"minResY = (yExtent_max-yExtent_min)/height;\n",
"minRes = max([minResX, minResY]);\n",
"initialResolution = 2 * math.pi * 6378137 / tileSize; # 156543.03392804062 for tileSize 256 pixels\n",
"zoomlevel = math.floor(math.log2(initialResolution/minRes));\n",
"\n",
"# Enforce valid zoom levels\n",
"if zoomlevel < 0:\n",
" zoomlevel = 0\n",
"if zoomlevel > 19: \n",
" zoomlevel = 19"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'plotly'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-10-546f749e9267>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Analyze Data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mplotly\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mplotly\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgraph_objs\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mgo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mplotly\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplotly\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpy\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'plotly'"
]
}
],
"source": [
"# Analyze Data\n",
"import plotly\n",
"import plotly.graph_objs as go\n",
"import plotly.plotly as py\n",
"\n",
"plotly.tools.set_credentials_file(username='ziegmann', api_key='yGii8dk78Sjz7jzzad1n')\n",
"mapbox_access_token = 'pk.eyJ1Ijoiam9oYW5ubmVzLXppZWdtYW5uIiwiYSI6ImNqbDJmamo5bDFxNjQzcWxtd2IzejNhcXoifQ.iVXGH-jpe2FH3f52MM9yHQ'\n",
"\n",
"data_p = [\n",
" go.Scattermapbox(\n",
" lat=lat,\n",
" lon=lng,\n",
" mode='markers',\n",
" marker=dict(size=6))\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='OBD-II GPS Logging',\n",
" autosize=True,\n",
" hovermode='closest',\n",
" mapbox=dict(\n",
" accesstoken=mapbox_access_token,\n",
" bearing=0,\n",
" center=dict(\n",
" lon=(lon_max-lon_min)/2+lon_min,\n",
" lat=(lat_max-lat_min)/2+lat_min,\n",
" ),\n",
" style='dark',\n",
" pitch=0,\n",
" zoom=zoomlevel\n",
" ),\n",
")\n",
"\n",
"fig = dict(data=data_p, layout=layout)\n",
"#plotly.offline.plot(fig, filename='Mapbox.html')\n",
"py.iplot(fig, filename='Mapbox.html')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 't' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-11-6fad4a6b6b96>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mplot\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mtemp_d\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'distance'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mxaxis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtemp_d\u001b[0m \u001b[0;31m# range(int(temp_d[0]), int(temp_d[-1]))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mplot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfigure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfigsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m15\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mplot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maxvline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtemp_d\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTRAINING_RANGE\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 't' is not defined"
]
}
],
"source": [
"import matplotlib.pyplot as plot\n",
"temp_d=t['distance']\n",
"xaxis = temp_d # range(int(temp_d[0]), int(temp_d[-1]))\n",
"plot.figure(figsize=(15,10))\n",
"plot.axvline(x=temp_d[TRAINING_RANGE[0]])\n",
"plot.axvline(x=temp_d[TEST_RANGE[0]])\n",
"plot.plot(temp_d[TEST_RANGE[0]:TEST_RANGE[1]], target[TEST_RANGE[0]:TEST_RANGE[1]], 'b',\n",
" xaxis, t['hr_traficSpeed']*3.6, 'r',\n",
" xaxis, t['hr_SpeedLimit'],\n",
" )\n",
"plot.legend(['Training','Test','OBD Speed','HERE Traffic Speed', 'HERE Speed Limint'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Training\n",
"\n",
"Bei großen Datensätzen kann es zur Fehlerausgabe \"UserWarning: A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.\" kommen. Scheint vereinzelt am Ergebnis aber nicht viel zu ändern.\n",
"\n",
"Es werden alle gegebenen Parameterkombinationen mittels Cross-Validation getestet; die besten für die Vorhersage verwendet und dann auch ausgegeben."
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"from tensorflow.keras import layers\n",
"\n",
"model = tf.keras.Sequential([\n",
" layers.Lambda(lambda x: x, batch_input_shape = (1, np.shape(data)[1], 1)), # Hacky No-op layer for reshaping\n",
" layers.LSTM(256, stateful = True),\n",
" layers.Dense(1)\n",
"])\n",
"\n",
"model.compile(optimizer = tf.keras.optimizers.Adam(0.001),\n",
" loss = 'mse',\n",
" metrics = ['mae'])"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"scaler = StandardScaler()\n",
"scaler.fit(data[TRAINING_RANGE[0]:TRAINING_RANGE[1]])\n",
"\n",
"scaled_training_data = scaler.transform(data[TRAINING_RANGE[0]:TRAINING_RANGE[1]])\n",
"scaled_data = scaler.transform(data)\n",
"scaled_target = np.multiply(target, 0.01)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
"#from sklearn.decomposition import PCA\n",
"#pca = PCA(n_components = 50)\n",
"#pca_training_data = pca.fit_transform(scaled_training_data)\n",
"#pca_data = pca.transform(scaled_data)\n",
"pca_training_data = scaled_training_data\n",
"pca_data = scaled_data"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 2168 samples\n",
"Epoch 1/4\n",
"2168/2168 [==============================] - 187s 86ms/sample - loss: 0.1007 - mae: 0.2363\n",
"Epoch 2/4\n",
"2168/2168 [==============================] - 182s 84ms/sample - loss: 0.0789 - mae: 0.2323\n",
"Epoch 3/4\n",
"2168/2168 [==============================] - 183s 84ms/sample - loss: 0.0702 - mae: 0.2182\n",
"Epoch 4/4\n",
"2168/2168 [==============================] - 179s 82ms/sample - loss: 0.0423 - mae: 0.1636\n"
]
},
{
"data": {
"text/plain": [
"<tensorflow.python.keras.callbacks.History at 0x7fa202816610>"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"model.fit(np.array(pca_training_data).reshape(*np.shape(pca_training_data), 1), np.array(scaled_target[TRAINING_RANGE[0]:TRAINING_RANGE[1]]).reshape(len(pca_training_data), 1),\n",
" epochs = 4, batch_size = 1) # Seems to be the most common point of diminishing returns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Testen und Plotten\n",
"\n",
"Wenn mit anderem Datensatz getestet werden soll:\n",
"- Neuen Datensatz herunterladen und einlesen\n",
"- Eventuell `TEST_RANGE` anpassen\n",
"- Untere Zelle ausführen"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.legend.Legend at 0x7fa202866580>"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA3AAAAJCCAYAAACBLyXFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzdeXxTVfo/8M/tXnaFDjuCFZA9LW0VB5TIIsgiKptKQUctVMcFRQZ+3ynrjEBZnBEZUGcQBQSqCCKKsjQKiEipLbIJtVgVBKtFlK3Q5fz+eHp7b9oUuiTNwuf9euWV5ia5OUmTnDz3POc5mlIKRERERERE5Pn83N0AIiIiIiIiKh8GcERERERERF6CARwREREREZGXYABHRERERETkJRjAEREREREReQkGcERERERERF4i4Go30DRtKYCBALKVUh0dXP8CgIdM+2sHIEwpdVrTtCwAZwEUAMhXSkU5q+FERERERETXGu1q68BpmnY7gHMA3nIUwJW47SAA45VSdxZdzgIQpZT61TnNJSIiIiIiunZdNYVSKbUdwOly7u8BAKuq1CIiIiIiIiJy6KoplOWlaVoNAP0A/NW0WQHYrGmaAvCqUuq18uyrQYMGqmXLls5qmlMc++U8AODGsJpubgkR8fNIREREvi41NfVXpVRYye1OC+AADALwuVLKPFr3Z6XUT5qm/QnAFk3Tvika0StF07Q4AHEA0KJFC+zdu9eJTau6Ea9+AQBYM7abm1tCRPw8EhERka/TNO17R9udWYVyJEqkTyqlfio6zwawDkBMWXdWSr2mlIpSSkWFhZUKNImIiIiIiK55TgngNE2rC+AOAO+bttXUNK22/jeAvgAOOOPxiIiIiIiIrkXlWUZgFYCeABpomnYcwFQAgQCglFpSdLN7AWxWSp033bUhgHWapumP87ZS6mPnNZ2IiIiIiOjactUATin1QDluswzAshLbjgHoUtmGERERERF5gry8PBw/fhy5ubnubgr5oJCQEDRr1gyBgYHlur0zi5gQEREREfmc48ePo3bt2mjZsiWKssuInEIphZycHBw/fhytWrUq132cWcSEiIiIiMjn5Obmon79+gzeyOk0TUP9+vUrNLrLAI6IiIiI6CoYvJGrVPS9xQCOiIiIiIjISzCAIyIiIiLyYDk5ObBYLLBYLGjUqBGaNm1afPny5cvl2scjjzyCI0eOXPE2ixYtwsqVK53RZHIhFjEhIiIiIvJg9evXR3p6OgBg2rRpqFWrFiZMmGB3G6UUlFLw83M8PvPGG29c9XGefPLJqjeWXI4jcEREREREXujbb79Fx44dMW7cOERGRuLkyZOIi4tDVFQUOnTogBkzZhTftnv37khPT0d+fj7q1auHSZMmoUuXLujWrRuys7MBAH//+9/xr3/9q/j2kyZNQkxMDNq2bYtdu3YBAM6fP4/7778fXbp0wQMPPICoqKji4JKqB0fgiIiIiIjKafoHB3Hopz+cus/2Tepg6qAOlbrvoUOH8MYbb2DJkiUAgNmzZ+P6669Hfn4+rFYrhg4divbt29vd5/fff8cdd9yB2bNn47nnnsPSpUsxadKkUvtWSmHPnj3YsGEDZsyYgY8//hgLFy5Eo0aNsHbtWuzbtw+RkZGVajdVHkfgiIiIiIi8VHh4OKKjo4svr1q1CpGRkYiMjMThw4dx6NChUvcJDQ1F//79AQBdu3ZFVlaWw33fd999pW6zc+dOjBw5EgDQpUsXdOhQucCTKo8jcERERERE5VTZkTJXqVmzZvHfGRkZ+Pe//409e/agXr16GDVqlMP1xYKCgor/9vf3R35+vsN9BwcHl7qNUsqZzadK4AgcEREREZEP+OOPP1C7dm3UqVMHJ0+exCeffOL0x+jevTuSkpIAAPv373c4wkeuxRE4IiIiIiIfEBkZifbt26Njx4648cYb8ec//9npj/HUU09h9OjR6Ny5MyIjI9GxY0fUrVvX6Y9DZdM8cRg0KipK7d27193NsDPi1S8AAGvGdnNzS4iIn0ciIqpOhw8fRrt27dzdDI+Qn5+P/Px8hISEICMjA3379kVGRgYCAjguVBWO3mOapqUqpaJK3pavNBERERERlcu5c+fQq1cv5OfnQymFV199lcFbNeOrTURERERE5VKvXj2kpqa6uxnXNBYxISIiIiIi8hIM4IiIiIiIdImJgM1mvy03Fzh1yj3tISqBARwRERERkS46Ghg+3AjibDbgl1+AGjXc2y6iIgzgiIiIiIh0ViuQlCRB3JQpch4WBtSp4+6WEQFgAEdEREREZM9qBeLjgZkz5TwkxK3NycnJgcVigcViQaNGjdC0adPiy5cvXy73fpYuXYpTTAX1eqxCSURERERkZrMBixcDCQlyfu+9bm1O/fr1kZ6eDgCYNm0aatWqhQkTJlR4P0uXLkVkZCQaNWrk7CZSNWIAR0RERESks9kkbTIpSUbirFaZA/fHHx6ZRvnmm29i0aJFuHz5Mm677Ta88sorKCwsxCOPPIL09HQopRAXF4eGDRsiPT0dI0aMQGhoKPbs2YOgoCB3N58qgQEcEREREZEuJcUI3gA5T0sDLlwA6tTB9A8O4tBPfzj1Ids3qYOpgzpU+H4HDhzAunXrsGvXLgQEBCAuLg6rV69GeHg4fv31V+zfvx8AcObMGdSrVw8LFy7EK6+8AovF4tT2U/ViAEdERERE5IW2bt2KlJQUREVFAQAuXryI5s2b46677sKRI0fwzDPP4O6770bfvn3d3FJyJgZwREREREQ6fRkBfRTOZgPy8oDwcACo1EiZqyil8Je//AUzZ84sdd3XX3+NTZs24eWXX8batWvx2muvuaGF5AqsQklEREREpPOiZQR69+6NpKQk/PrrrwCkWuUPP/yAX375BUopDBs2DNOnT8dXX30FAKhduzbOnj3rziaTE3AEjoiIiIjIzLyMQEKC25cRKEunTp0wdepU9O7dG4WFhQgMDMSSJUvg7++PRx99FEopaJqGOXPmAAAeeeQRPPbYYyxi4uUYwBERERERmXnYMgJm06ZNs7v84IMP4sEHHyx1u7S0tFLbhg8fjuHDh7uqaVRNGMAREREREem8bBkBuvZwDhwRERERkc7RMgJhYbKMAJEH4AgcEREREZFu4sTS20JCgEaNqr8tRA5wBI6IiIiIiMhLMIAjIiIiIiLyEgzgiIiIiIh0iYlSyMQsNxc4dco97SEqgQEcEREREZEuOlqqUOpBnM0mVShr1HBbk3JycmCxWGCxWNCoUSM0bdq0+PLly5exbt06aJqGb775pvg+WVlZCA0NhcViQfv27TF69Gjk5eUVX79nzx707NkTrVu3RmRkJAYMGID9+/cDkKUKzI9hsViwZs2a4r9r1aqFtm3bwmKxYPTo0aXae/DgQdx5551o06YNWrdujZkzZ0IpBQBYtmwZwsLCYLFY0KFDBwwdOhQXigrEmB+3devWuO+++3Do0CGHr8nu3btxyy23wGKxoF27dqWWV3C2ZcuW4a9//atLH6O8GMAREREREemsVqlCOXw4MGWKnIeFuXUJgfr16yM9PR3p6ekYN24cxo8fX3w5KCgIq1atQvfu3bF69Wq7+4WHhyM9PR379+/H8ePHkZSUBAD4+eefMXz4cLz44ovIyMjAV199hcmTJyMzM7P4vubHSE9Px4gRI4r/joqKwsqVK5Geno633nrL7jEvXryIwYMHY9KkSTh69Cj27duHXbt24T//+U/xbfR9HTx4EEFBQVizZk2px83IyMCIESNw55134pdffin1mowZMwavvfYa0tPTceDAgWtqfTsGcEREREREZlYrEB8PzJwp5yEh7m5Rmc6dO4fPP/8c//vf/0oFcDp/f3/ExMTgxIkTAIBXXnkFY8aMwW233VZ8m+7du2PIkCFVbs/bb7+NP//5z+jbty8AoEaNGnjllVcwe/bsUrfNz8/H+fPncd111znc14gRI9C3b1+8/fbbpa7Lzs5G48aNi59f+/btAcgoXmxsLO688060bt0ar7/+evF95s6di+joaHTu3BlTp04t3r5
"text/plain": [
"<Figure size 1080x720 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"xaxis = range(0, TEST_RANGE[1])\n",
"plot.figure(figsize=(15,10))\n",
"plot.axvline(x=TRAINING_RANGE[0])\n",
"plot.axvline(x=TRAINING_RANGE[1])\n",
"from scipy.signal import savgol_filter\n",
"# savgol_filter for smoothing (params are window size and polynomial degree)\n",
"#plot.plot(xaxis, scaled_target, 'b', xaxis, savgol_filter(model.predict(pca_data.reshape(*np.shape(pca_data), 1)), 51, 3, axis=0), 'rx')\n",
"plot.plot(xaxis, scaled_target, 'b', xaxis, model.predict(pca_data.reshape(*np.shape(pca_data), 1)), 'rx')\n",
"plot.legend(['Training','Test','TARGET OBD Speed','PREDICTED OBD Speed'])\n",
"#len(savgol_filter(model.predict(pca_data).reshape(*np.shape(pca_data), 1), 51, 3, axis=0))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Gütekriterium - Prädiktion\n",
"\n",
"Berechung des Gütekritierums\n",
"- Root-mean-square deviation RMSE\n",
"- NRMSE Normalized root-mean-square deviation\n",
"- Mean absolute error MAE\n",
"- Mean absolute percentage error MAP\n",
"- Symmetric mean absolute percentage error\n",
"- https://en.wikipedia.org/wiki/Least_absolute_deviations\n",
"- https://en.wikipedia.org/wiki/Mean_signed_deviation\n",
"- Pearson Correlation Coefficient\n",
"- Accuracy (Interval of given size; absolute and relative)\n",
"- Media Absolute Deviation\n",
"\n",
"BITTE weitere Kriterien ergänzen\n"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RMSE = 23.21 km/h\n",
"NRMSE = -1.11 %\n",
"MAE = 19.08 km/h\n",
"MAP = 25.80 %\n",
"SMAPE = 23.35 %\n",
"MSD = 3.52 km/h\n",
"CORR = 0.44\n",
"ACC_A = 27.94 %\n",
"ACC_R = 24.19 %\n",
"MAD = 17.29 km/h\n"
]
}
],
"source": [
"ta = target[TEST_RANGE[0]:TEST_RANGE[1]]\n",
"pr = np.squeeze(np.multiply(model.predict(pca_data[TEST_RANGE[0]:TEST_RANGE[1]].reshape(TEST_RANGE[1]-TEST_RANGE[0], np.shape(pca_data)[1], 1)), 100))\n",
"RMSE = np.sqrt(sum((ta-pr)**2)/len(ta))\n",
"print(\"RMSE = %.2f km/h\" %RMSE)\n",
"NRMSE = 1-math.sqrt(sum((ta-pr)**2))/math.sqrt(sum( (ta-np.mean(ta) )**2 ))\n",
"print(\"NRMSE = %.2f %%\" %(NRMSE*100))\n",
"MAE = sum(((ta-pr)**2)**(1/2))/len(ta)\n",
"print(\"MAE = %.2f km/h\" %MAE)\n",
"with np.errstate(divide = 'ignore'): map_elements = np.abs((ta - pr) / ta)\n",
"map_elements[map_elements == np.inf] = 0\n",
"MAP = np.sum(map_elements) / len(ta)\n",
"print(\"MAP = %.2f %%\" % (MAP*100))\n",
"SMAPE = np.sum(np.abs(ta - pr) / ((ta + pr) / 2)) / len(ta)\n",
"print(\"SMAPE = %.2f %%\" % (SMAPE*100))\n",
"MSD = np.sum(ta - pr) / len(ta)\n",
"print(\"MSD = %.2f km/h\" % MSD)\n",
"CORR = np.corrcoef(ta, pr)[1][0]\n",
"print(\"CORR = %.2f\" % CORR)\n",
"ACC_A_THRESHOLD = 10\n",
"ACC_A = (np.abs(ta - pr) < ACC_A_THRESHOLD).sum() / len(ta)\n",
"print(\"ACC_A = %.2f %%\" % (ACC_A*100))\n",
"ACC_R_THRESHOLD = 0.1\n",
"ACC_R = (np.abs(ta / pr - 1) < ACC_R_THRESHOLD).sum() / len(ta)\n",
"print(\"ACC_R = %.2f %%\" % (ACC_R*100))\n",
"MAD = np.median(np.abs(ta - pr))\n",
"print(\"MAD = %.2f km/h\" % MAD)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Vergleich mit HERE Maps Trafic Speed\n",
"Kann zum Vergleich sehr gut herangezogen werden ;)"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RMSE = 16.82 km/h\n",
"NRMSE = 26.73 %\n",
"MAE = 14.06 km/h\n",
"MAP = 15.87 %\n",
"SMAPE = 16.62 %\n",
"MSD = 9.82 km/h\n",
"CORR = 0.81\n",
"ACC_A = 40.18 %\n",
"ACC_R = 32.00 %\n",
"MAD = 12.84 km/h\n"
]
}
],
"source": [
"ta = target[TEST_RANGE[0]:TEST_RANGE[1]]\n",
"pr = [d['hr_traficSpeed'] for d in reader_data] #t['hr_traficSpeed']\n",
"pr = np.array([float(d) if d != '' else 0.0 for d in pr])\n",
"pr = pr[TEST_RANGE[0]:TEST_RANGE[1]] * 3.6\n",
"RMSE = math.sqrt(sum((ta-pr)**2)/len(ta))\n",
"print(\"RMSE = %.2f km/h\" %RMSE)\n",
"NRMSE = 1-math.sqrt(sum((ta-pr)**2))/math.sqrt(sum( (ta-np.mean(ta) )**2 ))\n",
"print(\"NRMSE = %.2f %%\" %(NRMSE*100))\n",
"MAE = sum(((ta-pr)**2)**(1/2))/len(ta)\n",
"print(\"MAE = %.2f km/h\" %MAE)\n",
"with np.errstate(divide = 'ignore'): map_elements = np.abs((ta - pr) / ta)\n",
"map_elements[map_elements == np.inf] = 0\n",
"MAP = np.sum(map_elements) / len(ta)\n",
"print(\"MAP = %.2f %%\" % (MAP*100))\n",
"SMAPE = np.sum(np.abs(ta - pr) / ((ta + pr) / 2)) / len(ta)\n",
"print(\"SMAPE = %.2f %%\" % (SMAPE*100))\n",
"MSD = np.sum(ta - pr) / len(ta)\n",
"print(\"MSD = %.2f km/h\" % MSD)\n",
"CORR = np.corrcoef(ta, pr)[1][0]\n",
"print(\"CORR = %.2f\" % CORR)\n",
"ACC_A_THRESHOLD = 10\n",
"ACC_A = (np.abs(ta - pr) < ACC_A_THRESHOLD).sum() / len(ta)\n",
"print(\"ACC_A = %.2f %%\" % (ACC_A*100))\n",
"ACC_R_THRESHOLD = 0.1\n",
"ACC_R = (np.abs(ta / pr - 1) < ACC_R_THRESHOLD).sum() / len(ta)\n",
"print(\"ACC_R = %.2f %%\" % (ACC_R*100))\n",
"MAD = np.median(np.abs(ta - pr))\n",
"print(\"MAD = %.2f km/h\" % MAD)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Generalisieren"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"model2 = tf.keras.Sequential([\n",
" layers.Lambda(lambda x: x, batch_input_shape = (1, np.shape(data)[1], 1)), # Hacky No-op layer for reshaping\n",
" layers.LSTM(256, stateful = True),\n",
" layers.Dense(1)\n",
"])\n",
"\n",
"model2.compile(optimizer = tf.keras.optimizers.Adam(0.001),\n",
" loss = 'mse',\n",
" metrics = ['mae'])"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 7228 samples\n",
"Epoch 1/4\n",
"7228/7228 [==============================] - 609s 84ms/sample - loss: 0.0670 - mae: 0.1998\n",
"Epoch 2/4\n",
"7228/7228 [==============================] - 584s 81ms/sample - loss: 0.0729 - mae: 0.2219\n",
"Epoch 3/4\n",
"7228/7228 [==============================] - 576s 80ms/sample - loss: 0.0709 - mae: 0.2189\n",
"Epoch 4/4\n",
"7228/7228 [==============================] - 577s 80ms/sample - loss: 0.0690 - mae: 0.2162\n"
]
},
{
"data": {
"text/plain": [
"<tensorflow.python.keras.callbacks.History at 0x7fa2023aebe0>"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model2.fit(np.array(pca_data).reshape(*np.shape(pca_data), 1), np.array(scaled_target).reshape(len(pca_data), 1),\n",
" epochs = 4, batch_size = 1)"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"SETUP_ID_2 = 450\n",
"\n",
"reader2 = runsql('select * from computeddata where setup_id = {} order by distance asc'.format(SETUP_ID_2))\n",
"reader_data2 = list(reader2) # list(...) so that following cells can be repeated"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
"data2 = []\n",
"target2 = []\n",
"for row in reader_data2:\n",
" data2 += [[float(row[c]) if row[c] != '' else math.nan for c in DATA_COLUMNS]]\n",
" target2 += [float(row[TARGET_COLUMN]) if row[TARGET_COLUMN] != '' else math.nan]"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.impute import SimpleImputer\n",
"imp = SimpleImputer(strategy='constant', fill_value=0) # Other strategies remove fully null columns\n",
"data2 = imp.fit_transform(data2)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"scaled_data2 = scaler.transform(data2)\n",
"scaled_target2 = np.multiply(target2, 0.01)"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.legend.Legend at 0x7fa2028475e0>"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA3AAAAI/CAYAAAA2kzvaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOydeZgU1dXG38swqwMIwy4IiuwgA7KIioILbgFRUVwiajRGRY0kRiWJwheNMWpComDQREVFAY2KUTQiEVRwQZBhXwYQlEWWGdZh9qnvj8Odqu6unqnuru6u7n5/zzPPrbq13emurrrvPeeeowzDACGEEEIIIYQQ79Mg3g0ghBBCCCGEEOIMCjhCCCGEEEIISRAo4AghhBBCCCEkQaCAI4QQQgghhJAEgQKOEEIIIYQQQhIECjhCCCGEEEIISRAaxrsBdjRv3tzo2LFjvJtBCCGEEEIIIXFh2bJl+wzDaOFf70kB17FjRyxdujTezSCEEEIIIYSQuKCU2mZXTxdKQgghhBBCCEkQKOAIIYQQQgghJEGggCOEEEIIIYSQBMGTc+DsqKysxPbt21FWVhbvppAkIysrC+3atUN6enq8m0IIIYQQQkidJIyA2759Oxo1aoSOHTtCKRXv5pAkwTAMFBUVYfv27TjppJPi3RxCCCGEEELqJGFcKMvKypCXl0fxRlxFKYW8vDxadgkhhBBCSEKQMAIOAMUbiQq8rwghhBBCSKKQUAIuXhQVFSE/Px/5+flo3bo1TjjhhNr1iooKvPPOO1BKYf369bXHbN26FdnZ2cjPz0ePHj0wduxYVFZW1m5fsmQJhg4dis6dO6Nfv3649NJLsWrVKgDApEmTfK6Rn5+P2bNn1y7n5uaia9euyM/Px9ixYwPau2bNGpx77rno0qULOnfujEceeQSGYQAApk+fjhYtWiA/Px89e/bE6NGjcfTo0YDrdu7cGVdccQXWrl1r+5l89dVXGDRoEPLz89G9e3dMmjTJrY/blunTp+Ouu+6K6jUIIYQQQgjxOhRwDsjLy0NBQQEKCgpw++23Y/z48bXrGRkZmDlzJs466yzMmjXL57hOnTqhoKAAq1atwvbt2/HGG28AAHbv3o2rr74ajz32GAoLC/Htt99iwoQJ2Lx5c+2x1msUFBRgzJgxtcv9+/fHa6+9hoKCArzyyis+1ywtLcXIkSPx4IMPYuPGjVixYgW++OILPPvss7X76HOtWbMGGRkZmD17dsB1CwsLMWbMGJx77rnYu3dvwGdy44034vnnn0dBQQFWr16Nq6++2pXPmhBCCCGEEBIcCrgIOXLkCBYvXowXXnghQMBp0tLSMHDgQOzYsQMAMGXKFNx4440444wzavc566yzMGrUqIjb8/rrr+PMM8/E8OHDAQA5OTmYMmUKHn/88YB9q6qqUFJSgqZNm9qea8yYMRg+fDhef/31gG179uxBmzZtav+/Hj16ABAr3g033IBzzz0XnTt3xj//+c/aY5588kkMGDAAp556KiZOnFhbP2PGDAwcOBD5+fn4xS9+gerqagDASy+9hC5duuCcc87B4sWLw/xECCGEEEIISR4o4CJkzpw5uOiii9ClSxc0a9YM3377bcA+ZWVl+Prrr3HRRRcBEBfHfv361XneyZMn17pMDhs2zHF71qxZg9NOO82nrlOnTjhy5AgOHToEALXumCeccAKKi4sxYsSIoOfr16+fj2uoZvz48ejatSsuv/xyPPfccz5BQFauXIm5c+fiyy+/xB/+8Afs3LkT8+bNQ2FhIZYsWYKCggIsW7YMn332GdatW4fZs2dj8eLFKCgoQFpaGl577TXs2rULEydOxOLFi/Hxxx8HdeUkhBBCCCEklUiYNAJW7r0XKChw95z5+cDf/hb6cTNnzsS9994LALjmmmswc+bMWnG2efNm5Ofno7CwEKNHj8app55qe45Bgwbh0KFDGD58OP7+978DEIF03333hdwewzCCBuXQ9WPGjMGUKVNgGAbGjRuHJ598Eg8++GDQ89nx8MMP4/rrr8e8efPw+uuvY+bMmVi4cCEA4LLLLkN2djays7MxbNgwLFmyBIsWLcK8efPQt29fAGK5LCwsxMqVK7Fs2TIMGDAAgLiAtmzZEl9//TWGDh2KFi1a1LZ548aNIX8ehBBCCCGEJBO0wEVAUVERPvnkE9x6663o2LEjnnzyScyePbtW9Og5cJs2bcJXX32F//znPwCAnj17+ljqvv76azzyyCM4ePBgxG3q2bMnli5d6lO3ZcsW5ObmolGjRj71SimMGDECn332WdDzLV++HN27d7fd1qlTJ9xxxx343//+hxUrVqCoqKj2vP7XMQwDEyZMqJ3Ht2nTJtxyyy0wDAM33nhjbf2GDRtqA6IwOiQhhBBCCCG+JKQFLhxLWTT497//jbFjx+K5556rrTvnnHOwaNEitG/fvrauTZs2ePzxx/GnP/0JI0eOxLhx4zBo0CBceOGFtfPgdCTISLn++uvx2GOPYf78+Tj//PNRWlqKe+65B/fff7/t/osWLUKnTp1st7311luYN28e/vKXvwRsmzt3Li655BIopVBYWIi0tDQcf/zxAIB3330XEyZMQElJCRYuXIjHH38c2dnZeOihh3D99dcjNzcXO3bsQHp6Os477zxcdtllGD9+PFq2bIni4mIcPnwYgwYNwi9/+UsUFRWhcePGePPNN9GnTx9XPiNCCCGEEEISlYQUcF5h5syZAa6HV155JV5//XU88MADPvWjRo3CpEmT8Pnnn2PIkCGYPXs2HnjgAezYsQMtW7ZE8+bN8fDDD9fuP3nyZMyYMaN2fc6cOejYsWO9bcrOzsa7776Lu+++G+PGjUN1dTVuuOEGnxD8s2fPxqJFi1BTU4N27dph+vTpAdctKSlBr1698Mknn9S6MVp59dVXMX78eOTk5KBhw4Z47bXXkJaWBgAYOHAgLr30Unz//fd46KGH0LZtW7Rt2xbr1q3D4MGDAQC5ubmYMWMGevTogUcffRTDhw9HTU0N0tPTMXXqVJx++umYNGkSBg8ejDZt2qBfv361wU0IIYQQQghJVVSwOU7xpH///oa/G+C6deuCuvIR7zBp0iTk5uaGNX8vnvD+IoQQQgghXkIptcwwjP7+9ZwDRwghhBBCCCEJAl0oiavoACSEEEIIIYQQ96EFjhBCCCGEEEISBAo4QgghhBBCCEkQKOAIIYQQQgghJEGggCOEEEIIIYSQBIECLgTS0tKQn5+PXr164aqrrqpNvm2tHzFiBA4cOAAA2Lp1K7Kzs5Gfn1/798orrwAAOnbsiN69e6N3797o0aMHfv/736O8vLz2uF69etVed8mSJTj77LPRtWtXdOvWDbfeeiumTp1ae86MjAz07t0b+fn5ePDBBzF9+nS0aNHC57pr166tbU/fvn3RvXt3DBw4EC+//HLQ/3fRokUYOHAgunXrhm7duuH555+v3TZp0iSccMIJyM/PR7du3XDHHXegpqYGAHDTTTfhpJNOQp8+fdClSxeMHTsWO3bssL3G+++/j759+6JPnz7o0aOHT1L0aDBp0iQ89dRTUb0GIYQQQogTli8HfvIT4FiXkhBHUMCFQHZ2NgoKCrB69WpkZGRg2rRpAfXNmjXD1KlTa4/p1KkTCgoKav/Gjh1bu23BggVYtWoVlixZgi1btuC2224LuObu3btx1VVX4c9//jM2bNiAdevW4aKLLsLo0aNrz9m2bVssWLAABQUFePzxxwEAY8aM8blujx49atuzfPlyrFu3DrNmzcLkyZPx0ksvBVz3xx9/xHXXXYdp06Zh/fr1WLRoEZ577jnMnTu3dp/x48ejoKAAa9euxapVq/Dpp5/WbnvyySexYsUKbNiwAX379sWwYcNQUVHhc43KykrcdttteO+997BixQosX74cQ4cODeObIYQQQghJPO69F5g7FygoiHdLSCJRr4BTSrVXSi1QSq1TSq1RSv3SZh+llHpaKbVJKbVSKdXPsu0ipdSGY9sedPsfsOWJJ4AFC3zrFiyQepcYMmQINm3aFFA/ePDgoNa
"text/plain": [
"<Figure size 1080x720 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"xaxis = range(0, len(target2))\n",
"plot.figure(figsize=(15,10))\n",
"plot.plot(xaxis, scaled_target2, 'b', xaxis, model2.predict(scaled_data2.reshape(*np.shape(scaled_data2), 1)), 'rx')\n",
"plot.legend(['TARGET OBD Speed','PREDICTED OBD Speed'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ab hier Überbleibsel von vorherigem Test, schien mir aber keine wirklich neuen Erkenntnisse zu liefern und wurde daher mit anderer Architektur nicht weiter verfolgt.\n",
"\n",
"---\n",
"\n",
"Wie auch Regression Trees scheinen unbekannte Geschwindigkeiten starke Probleme zu machen, von daher nochmal (vgl. anderes Notebook)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"model3 = tf.keras.Sequential([\n",
" layers.Lambda(lambda x: x, batch_input_shape = (1, np.shape(data)[1], 1)),\n",
" layers.LSTM(128, stateful = True),\n",
" layers.Reshape((1, 128)),\n",
" layers.LSTM(64, stateful = True),\n",
" layers.Dense(1)\n",
"])\n",
"\n",
"model3.compile(optimizer = tf.keras.optimizers.Adam(0.001),\n",
" loss = 'mse',\n",
" metrics = ['mae'])"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 49135 samples\n",
"Epoch 1/10\n",
"49135/49135 [==============================] - 1888s 38ms/sample - loss: 0.0782 - mae: 0.2148\n",
"Epoch 2/10\n",
"49135/49135 [==============================] - 1858s 38ms/sample - loss: 0.0248 - mae: 0.1156\n",
"Epoch 3/10\n",
"49135/49135 [==============================] - 1853s 38ms/sample - loss: 0.0158 - mae: 0.0902\n",
"Epoch 4/10\n",
"49135/49135 [==============================] - 1849s 38ms/sample - loss: 0.0118 - mae: 0.0762\n",
"Epoch 5/10\n",
"49135/49135 [==============================] - 1917s 39ms/sample - loss: 0.0090 - mae: 0.0650\n",
"Epoch 6/10\n",
"49135/49135 [==============================] - 2046s 42ms/sample - loss: 0.0074 - mae: 0.0580\n",
"Epoch 7/10\n",
"49135/49135 [==============================] - 2023s 41ms/sample - loss: 0.0071 - mae: 0.0556\n",
"Epoch 8/10\n",
"49135/49135 [==============================] - 1841s 37ms/sample - loss: 0.0058 - mae: 0.0501\n",
"Epoch 9/10\n",
"49135/49135 [==============================] - 1982s 40ms/sample - loss: 0.0053 - mae: 0.0477\n",
"Epoch 10/10\n",
"49135/49135 [==============================] - 1960s 40ms/sample - loss: 0.0051 - mae: 0.0456\n"
]
},
{
"data": {
"text/plain": [
"<tensorflow.python.keras.callbacks.History at 0x7fa219b94df0>"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model3.fit(np.array(scaled_data2).reshape(*np.shape(scaled_data2), 1), np.array(scaled_target2).reshape(len(scaled_data2), 1),\n",
" epochs = 10, batch_size = 1)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"SETUP_ID_3 = 888\n",
"\n",
"reader3 = runsql('select * from computeddata where setup_id = {} order by distance asc'.format(SETUP_ID_3))\n",
"reader_data3 = list(reader3)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"data3 = []\n",
"target3 = []\n",
"for row in reader_data3:\n",
" data3 += [[float(row[c]) if row[c] != '' else math.nan for c in DATA_COLUMNS]]\n",
" target3 += [float(row[TARGET_COLUMN]) if row[TARGET_COLUMN] != '' else math.nan]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"data3 = imp.transform(data3)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"scaled_data3 = scaler.transform(data3)\n",
"scaled_target3 = np.multiply(target3, 0.01)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.legend.Legend at 0x7fa21db42ac0>"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA3AAAAI/CAYAAAA2kzvaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOyde3gU1fnHv7ObzRUTlIR7EKRAuGm4JCAVSuQi4SZYBVoMttZfMNpakYvQNmi1lmuwtVqhaouAGrQoBZUWMauCVolIEBDlrgIKGqpAIJDsnt8fb87O7O7s7uzubHaTvJ/nmWd2ztzOzJyZPe95b4oQAgzDMAzDMAzDMEzsY4l2BRiGYRiGYRiGYRhjsADHMAzDMAzDMAzTQGABjmEYhmEYhmEYpoHAAhzDMAzDMAzDMEwDgQU4hmEYhmEYhmGYBgILcAzDMAzDMAzDMA2EuGhXQI/09HTRsWPHaFeDYRiGYRiGYRgmKuzYseNbIUSGZ3lMCnAdO3bEhx9+GO1qMAzDMAzDMAzDRAVFUT7XK2cTSoZhGIZhGIZhmAYCC3AMwzAMwzAMwzANBBbgGIZhGIZhGIZhGggx6QOnR01NDY4dO4bq6upoV4VpZCQmJqJ9+/aw2WzRrgrDMAzDMAzD+KXBCHDHjh3DZZddho4dO0JRlGhXh2kkCCFQWVmJY8eOoVOnTtGuDsMwDMMwDMP4pcGYUFZXV6NFixYsvDGmoigKWrRowZpdhmEYhmEYpkHQYAQ4ACy8MRGB2xXDMAzDMAzTUGhQAly0qKysRHZ2NrKzs9G6dWu0a9fOtXzp0iW88sorUBQFn376qWufo0ePIikpCdnZ2ejRowemTZuGmpoa1/rt27dj6NCh6NKlC/r27YsxY8Zg9+7dAIAHH3zQ7RzZ2dlYu3at63ezZs3QrVs3ZGdnY9q0aV713bt3L66//np07doVXbp0wcMPPwwhBABg5cqVyMjIQHZ2Nnr27Imbb74Z58+f9zpvly5dcNNNN+GTTz7RvSfvv/8+BgwYgOzsbHTv3h0PPvigWbdbl5UrV+KXv/xlRM/BMAzDMAzDMLEOC3AGaNGiBSoqKlBRUYE777wTM2bMcC3Hx8fjhRdewHXXXYfS0lK3/Tp37oyKigrs3r0bx44dw4svvggAOHnyJCZNmoQ//vGPOHDgAD766CPMmzcPhw4dcu2rPUdFRQUmT57s+t2/f38899xzqKiowKpVq9zOeeHCBYwfPx5z587F/v37sWvXLrz33nv461//6tpGHmvv3r2Ij4/H2rVrvc574MABTJ48Gddffz2++eYbr3ty22234W9/+xsqKiqwZ88eTJo0yZR7zTAMwzAMwzCMb1iAC5Nz587h3XffxTPPPOMlwEmsVityc3Nx/PhxAMDjjz+O2267DYMGDXJtc91112HChAlh1+f555/HD3/4Q4wcORIAkJycjMcffxwLFy702ra2thZVVVW4/PLLdY81efJkjBw5Es8//7zXulOnTqFNmzau6+vRowcA0uIVFBTg+uuvR5cuXfDUU0+59lmyZAlycnJw9dVX44EHHnCVr1mzBrm5ucjOzsb06dPhcDgAAP/4xz/QtWtX/OhHP8K7774b4h1hGIZhGIZhmMYDC3Bhsn79eowaNQpdu3bFFVdcgY8++shrm+rqanzwwQcYNWoUADJx7Nu3r9/jPvrooy6Tyby8PMP12bt3L/r16+dW1rlzZ5w7dw5nzpwBAJc5Zrt27XD69GmMGzfO5/H69u3rZhoqmTFjBrp164aJEydixYoVbkFAPv74Y7z22mv473//i4ceeggnTpzA5s2bceDAAWzfvh0VFRXYsWMH3nnnHezbtw9r167Fu+++i4qKClitVjz33HP46quv8MADD+Ddd9/FG2+84dOUk2EYhmEYhmGaEg0mjYCWe+8FKirMPWZ2NvCnPwW/3wsvvIB7770XADBlyhS88MILLuHs0KFDyM7OxoEDB3DzzTfj6quv1j3GgAEDcObMGYwcORJ//vOfAZCANGvWrKDrI4TwGZRDlk+ePBmPP/44hBC4++67sWTJEsydO9fn8fSYP38+pk6dis2bN+P555/HCy+8gLfeegsAcOONNyIpKQlJSUnIy8vD9u3bsW3bNmzevBl9+vQBQJrLAwcO4OOPP8aOHTuQk5MDgExAW7ZsiQ8++ABDhw5FRkaGq8779+8P+n4wDMMwDMMwTGOCNXBhUFlZibKyMtxxxx3o2LEjlixZgrVr17qEHukDd/DgQbz//vvYsGEDAKBnz55umroPPvgADz/8ML7//vuw69SzZ098+OGHbmWHDx9Gs2bNcNlll7mVK4qCcePG4Z133vF5vJ07d6J79+666zp37oyioiK8+eab2LVrFyorK13H9TyPEALz5s1z+fEdPHgQv/jFLyCEwG233eYq/+yzz1wBUTg6JMMwDMMwDMO40yA1cKFoyiLBP//5T0ybNg0rVqxwlf3oRz/Ctm3bkJmZ6Spr06YNFi5ciAULFmD8+PG4++67MWDAANxwww0uPzgZCTJcpk6dij/+8Y/YsmULhg8fjgsXLuCee+7BnDlzdLfftm0bOnfurLtu3bp12Lx5M0pKSrzWvfbaaxg9ejQURcGBAwdgtVrRvHlzAMC//vUvzJs3D1VVVXjrrbewcOFCJCUlobi4GFOnTkWzZs1w/Phx2Gw2DBs2DDfeeCNmzJiBli1b4vTp0zh79iwGDBiAX//616isrERqaipeeuklXHPNNabcI4ZhGIZhGIZpqDRIAS5WeOGFF7xMD3/84x/j+eefx/333+9WPmHCBDz44IPYunUrBg8ejLVr1+L+++/H8ePH0bJlS6Snp2P+/Pmu7R999FGsWbPGtbx+/Xp07NgxYJ2SkpLwr3/9C7/61a9w9913w+FwoKCgwC0E/9q1a7Ft2zY4nU60b98eK1eu9DpvVVUVevXqhbKyMpcZo5bVq1djxowZSE5ORlxcHJ577jlYrVYAQG5uLsaMGYMvvvgCxcXFaNu2Ldq2bYt9+/bh2muvBQA0a9YMa9asQY8ePfCHP/wBI0eOhNPphM1mwxNPPIGBAwfiwQcfxLXXXos2bdqgb9++ruAmDMMwDMMwDNNUUXz5OEWT/v37C08zwH379vk05WNihwcffBDNmjULyX8vmnD7YhiGYRiGYWIJRVF2CCH6e5azDxzDMAzDMAzDMEwDgU0oGVORAUgYhmEYhmEYhjEf1sAxDMMwDMMwDMM0EFiAYxiGYRiGYRiGaSCwAMcwDMMwTYXFiwG73b3MbqdyhmEYpkHAAhzDMAzDNBVycoBJk1Qhzm6n5Zyc6NaLYRiGMQwLcEFgtVqRnZ2NXr164ZZbbnEl39aWjxs3Dt999x0A4OjRo0hKSkJ2drZrWrVqFQCgY8eO6N27N3r37o0ePXrgd7/7HS5evOjar1evXq7zbt++HUOGDEG3bt2QlZWFO+64A0888YTrmPHx8ejduzeys7Mxd+5crFy5EhkZGW7n/eSTT1z16dOnD7p3747c3Fw8++yzPq9327ZtyM3NRVZWFrKysvC3v/3Nte7BBx9Eu3btkJ2djaysLBQVFcHpdAIAfvazn6FTp0645ppr0LVrV0ybNg3Hjx/XPcerr76KPn364JprrkGPHj3ckqJHggcffBBLly6N6DkYhmFilrw84MUXSWibP5/mL75I5QzDMEyDgAW4IEhKSkJFRQX27NmD+Ph4LF++3Kv8iiuuwBNPPOHap3PnzqioqHBN06ZNc62z2+3YvXs3tm/fjsOHD6OwsNDrnCdPnsQtt9yCRYsW4bPPPsO+ffswatQo3Hzzza5jtm3bFna7HRUVFVi4cCEAYPLkyW7n7dGjh6s+O3fuxL59+1BaWopHH30U//jHP7zO+/XXX+OnP/0pli9fjk8//RTbtm3DihUr8Nprr7m2mTFjBioqKvDJJ59g9+7dePvtt13rlixZgl27duGzzz5Dnz59kJeXh0uXLrmdo6amBoWFhdi4cSN27dqFnTt3YujQoSE8GYZhGMYQ0lSyqAh4+GGaa8sZhmG
"text/plain": [
"<Figure size 1080x720 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"xaxis = range(0, len(target3))\n",
"plot.figure(figsize=(15,10))\n",
"plot.plot(xaxis, scaled_target3, 'b', xaxis, model3.predict(scaled_data3.reshape(*np.shape(scaled_data3), 1)), 'rx')\n",
"plot.legend(['TARGET OBD Speed','PREDICTED OBD Speed'])"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RMSE = 26.72 km/h\n",
"NRMSE = 6.82 %\n",
"MAE = 22.02 km/h\n",
"MAP = 21.77 %\n",
"SMAPE = 20.89 %\n",
"MSD = -5.56 km/h\n",
"CORR = 0.70\n",
"ACC_A = 24.34 %\n",
"ACC_R = 24.88 %\n",
"MAD = 19.98 km/h\n"
]
}
],
"source": [
"ta = target3\n",
"pr = np.squeeze(np.multiply(model3.predict(scaled_data3.reshape(*np.shape(scaled_data3), 1)), 100))\n",
"RMSE = np.sqrt(sum((ta-pr)**2)/len(ta))\n",
"print(\"RMSE = %.2f km/h\" %RMSE)\n",
"NRMSE = 1-math.sqrt(sum((ta-pr)**2))/math.sqrt(sum( (ta-np.mean(ta) )**2 ))\n",
"print(\"NRMSE = %.2f %%\" %(NRMSE*100))\n",
"MAE = sum(((ta-pr)**2)**(1/2))/len(ta)\n",
"print(\"MAE = %.2f km/h\" %MAE)\n",
"with np.errstate(divide = 'ignore'): map_elements = np.abs((ta - pr) / ta)\n",
"map_elements[map_elements == np.inf] = 0\n",
"MAP = np.sum(map_elements) / len(ta)\n",
"print(\"MAP = %.2f %%\" % (MAP*100))\n",
"SMAPE = np.sum(np.abs(ta - pr) / ((ta + pr) / 2)) / len(ta)\n",
"print(\"SMAPE = %.2f %%\" % (SMAPE*100))\n",
"MSD = np.sum(ta - pr) / len(ta)\n",
"print(\"MSD = %.2f km/h\" % MSD)\n",
"CORR = np.corrcoef(ta, pr)[1][0]\n",
"print(\"CORR = %.2f\" % CORR)\n",
"ACC_A_THRESHOLD = 10\n",
"ACC_A = (np.abs(ta - pr) < ACC_A_THRESHOLD).sum() / len(ta)\n",
"print(\"ACC_A = %.2f %%\" % (ACC_A*100))\n",
"ACC_R_THRESHOLD = 0.1\n",
"ACC_R = (np.abs(ta / pr - 1) < ACC_R_THRESHOLD).sum() / len(ta)\n",
"print(\"ACC_R = %.2f %%\" % (ACC_R*100))\n",
"MAD = np.median(np.abs(ta - pr))\n",
"print(\"MAD = %.2f km/h\" % MAD)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RMSE = 21.57 km/h\n",
"NRMSE = 24.75 %\n",
"MAE = 16.79 km/h\n",
"MAP = 14.52 %\n",
"SMAPE = 15.78 %\n",
"MSD = 12.98 km/h\n",
"CORR = 0.81\n",
"ACC_A = 42.47 %\n",
"ACC_R = 35.90 %\n",
"MAD = 12.00 km/h\n"
]
}
],
"source": [
"ta = target3\n",
"pr = [d['hr_traficSpeed'] for d in reader_data3] #t['hr_traficSpeed']\n",
"pr = np.array([float(d) if d != '' else 0.0 for d in pr])\n",
"pr = pr * 3.6\n",
"RMSE = math.sqrt(sum((ta-pr)**2)/len(ta))\n",
"print(\"RMSE = %.2f km/h\" %RMSE)\n",
"NRMSE = 1-math.sqrt(sum((ta-pr)**2))/math.sqrt(sum( (ta-np.mean(ta) )**2 ))\n",
"print(\"NRMSE = %.2f %%\" %(NRMSE*100))\n",
"MAE = sum(((ta-pr)**2)**(1/2))/len(ta)\n",
"print(\"MAE = %.2f km/h\" %MAE)\n",
"with np.errstate(divide = 'ignore'): map_elements = np.abs((ta - pr) / ta)\n",
"map_elements[map_elements == np.inf] = 0\n",
"MAP = np.sum(map_elements) / len(ta)\n",
"print(\"MAP = %.2f %%\" % (MAP*100))\n",
"SMAPE = np.sum(np.abs(ta - pr) / ((ta + pr) / 2)) / len(ta)\n",
"print(\"SMAPE = %.2f %%\" % (SMAPE*100))\n",
"MSD = np.sum(ta - pr) / len(ta)\n",
"print(\"MSD = %.2f km/h\" % MSD)\n",
"CORR = np.corrcoef(ta, pr)[1][0]\n",
"print(\"CORR = %.2f\" % CORR)\n",
"ACC_A_THRESHOLD = 10\n",
"ACC_A = (np.abs(ta - pr) < ACC_A_THRESHOLD).sum() / len(ta)\n",
"print(\"ACC_A = %.2f %%\" % (ACC_A*100))\n",
"ACC_R_THRESHOLD = 0.1\n",
"ACC_R = (np.abs(ta / pr - 1) < ACC_R_THRESHOLD).sum() / len(ta)\n",
"print(\"ACC_R = %.2f %%\" % (ACC_R*100))\n",
"MAD = np.median(np.abs(ta - pr))\n",
"print(\"MAD = %.2f km/h\" % MAD)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}