the-last-thing/code/parse_hue.py

133 lines
3.6 KiB
Python
Raw Permalink Normal View History

2021-09-24 10:18:57 +02:00
#!/usr/bin/env python3
import sys
sys.path.insert(1, 'lib')
import argparse
import ast
import csv
from datetime import datetime
from geopy.distance import distance
import io
import itertools
import lmdk_lib
import numpy as np
import os
import matplotlib.pyplot as plt
import time
import zipfile
# https://cloud.delkappa.com/s/oNWLmM6jrpjK8Ff
# HUE data format
# Header size
hdr = 1
# Date
dt = 0
# Hour
hr = 1
# Energy consumption in kWh
kwh = 2
# Timestamp format
tim_fmt = "%Y-%m-%d %H"
def main(args):
'''
Load data
'''
2021-09-24 13:26:48 +02:00
# Get consumption data [timestamp, consumption] from previous parsing
2021-09-24 10:18:57 +02:00
cons_data = lmdk_lib.load_data(args, 'cons')
if cons_data.size == 0:
# Consumption [dt, hr, kwh]
cons = []
try:
print('Extracting %s... ' %(os.path.abspath(args.arc)), end='', flush=True)
with zipfile.ZipFile(args.arc, 'r') as arc:
print('[OK]')
with io.TextIOWrapper(arc.open(args.dat), newline='\n') as dat:
try:
print('Finding consumption... ', end='', flush=True)
# Get the consumption by skipping the header
cons_l = list(csv.reader(dat, delimiter=','))[hdr:]
# Check each line
for c in cons_l:
# Add valid consumption
if c[kwh] != '' and float(c[kwh]) > 0:
cons.append([datetime.strptime(c[dt] + ' ' + c[hr], tim_fmt).timestamp(), c[kwh]])
print('[OK]')
except Exception as e:
print('[Error: %s]' %(e))
except Exception as e:
print('[Error: %s]' %(e))
# Save the first 1000 rows to the results
cons_data = np.array(cons, np.float32)[:1000]
lmdk_lib.save_data(args, cons_data, 'cons')
2021-09-24 13:26:48 +02:00
'''
Find thresholds for goals.
0.2: 1.12
0.4: 0.88
0.6: 0.68
0.8: 0.54
2021-09-24 13:26:48 +02:00
'''
# {goal: theta}
lmdk = {}
# Percentage of landmarks
goal = [.2, .4, .6, .8]
# Find max consumpton to start from
theta = max(cons_data[:, 1])
cons_data_cur = np.copy(cons_data)
# Find thetas for each goal
for g in goal:
print('Looking for %.1f... ' %(g), end='', flush=True)
while theta > 0:
# Reduce threshold gradually
theta -= .01
# Find data below the current theta
cons_data_cur = cons_data_cur[cons_data_cur[:, 1] < theta]
# Calculate the percentage of landmarks
diff = (len(cons_data) - len(cons_data_cur))/len(cons_data)
# Check if it is close enough to what we need
if abs(diff - g)/g < .05:
print('%.2f' %(theta))
lmdk[g] = theta
# Continue with the next goal
break
2021-09-24 10:18:57 +02:00
'''
Parse arguments.
Optional:
arc - The data archive file.
dat - The consumption data file.
res - The results archive file.
'''
def parse_args():
# Create argument parser.
parser = argparse.ArgumentParser()
# Mandatory arguments.
# Optional arguments.
parser.add_argument('-a', '--arc', help='The data archive file.', type=str, default='/home/manos/Cloud/Data/HUE/Data.zip')
parser.add_argument('-d', '--dat', help='The consumption data file.', type=str, default='Residential_1.csv')
2021-09-24 10:18:57 +02:00
parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/HUE/Results.zip')
# Parse arguments.
args = parser.parse_args()
return args
if __name__ == '__main__':
try:
start_time = time.time()
main(parse_args())
end_time = time.time()
print('##############################')
print('Time : %.4fs' % (end_time - start_time))
print('##############################')
except KeyboardInterrupt:
print('Interrupted by user.')
exit()