Skip to main content

Time series dataset for torch.

Project description

Time Series Dataset

PyPI version travis codecov GitHub license

Description

Time series dataset for Time series predictor

Installation

pip install time-series-dataset

Usage example

"""
FlightsDataset
"""

import calendar
import math
import numpy as np
import pandas as pd
import seaborn as sns
from datetime import datetime

from time_series_dataset import TimeSeriesDataset


def _raw_make_predictor(features, *reshape_args):
    #pylint: disable=too-many-function-args
    return np.concatenate(features, axis=-1).reshape(
        list(reshape_args) + [len(features)]).astype(np.float32)


def _make_predictor(features, number_of_training_examples):
    #pylint: disable=too-many-function-args
    return _raw_make_predictor(features, number_of_training_examples, -1)


def _get_labels(input_features, output_features):
    def _features_to_label_list(features):
        return [list(feature)[0] for feature in features]
    labels = {}
    labels['x'] = _features_to_label_list(input_features)
    labels['y'] = _features_to_label_list(output_features)
    return labels


class FlightsDataset(TimeSeriesDataset):
    """
    FlightsDataset class

    :param except_last_n: initialize the FlightsDataset without n last months
    """

    # pylint: disable=too-many-locals
    def __init__(self, except_last_n=0):
        flights_dataset = sns.load_dataset("flights")
        chopped_flights_dataset = flights_dataset[:len(
            flights_dataset)-except_last_n]
        passengers = chopped_flights_dataset['passengers']
        month = chopped_flights_dataset['month']
        year = chopped_flights_dataset['year']

        months_3l = [month_name[0:3]
                     for month_name in list(calendar.month_name)]
        month_number = [months_3l.index(_month)
                        for _month in month]

        passengers_df = pd.DataFrame(passengers)
        month_number_df = pd.DataFrame(data={'month_number': month_number})
        year_df = pd.DataFrame(year)

        number_of_training_examples = 1
        # Store month_number and year as _x
        input_features = [month_number_df, year_df]
        _x = _make_predictor(input_features, number_of_training_examples)

        # Store passengers as _y
        output_features = [passengers_df]
        _y = _make_predictor(output_features, number_of_training_examples)

        super().__init__(_x, _y, _get_labels(input_features, output_features))
        self.month_number_df = month_number_df
        self.year_df = year_df

    # pylint: disable=arguments-differ
    def make_future_dataframe(self, number_of_months, include_history=True):
        """
        make_future_dataframe

        :param number_of_months: number of months to predict ahead
        :param include_history: optional, selects if training history is to be included or not
        :returns: future dataframe with the selected amount of months
        """
        def create_dataframe(name, data):
            return pd.DataFrame(data={name: data})

        def create_month_dataframe(data):
            return create_dataframe('month_number', data)

        def create_year_dataframe(data):
            return create_dataframe('year', data)

        month_number_df = self.month_number_df
        year_df = self.year_df
        last_month = month_number_df.values[-1][0]
        last_year = year_df.values[-1][0]
        if not include_history:
            month_number_df = create_month_dataframe([])
            year_df = create_year_dataframe([])
        for i in range(number_of_months):
            month_index = last_month+i
            new_months = [math.fmod(month_index, 12)+1]
            new_years = [last_year + math.floor(month_index / 12)]
            month_number_df = month_number_df.append(
                create_month_dataframe(new_months), ignore_index=True)
            year_df = year_df.append(
                create_year_dataframe(new_years), ignore_index=True)
        input_features = [month_number_df, year_df]
        return _make_predictor(input_features, 1)


def convert_year_month_array_to_datetime(year_month_array):
    def convert_singe_year_month_array_to_datetime(single_year_month_array):
        return datetime(year=single_year_month_array[1], month=single_year_month_array[0], day=15)
    year_month_array_size = year_month_array.size
    if year_month_array_size == 2:
        return convert_singe_year_month_array_to_datetime(year_month_array)
    return [convert_singe_year_month_array_to_datetime(year_month_array[idx, ...]) for idx in range(len(year_month_array))]

Oze dataset history

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

time_series_dataset-0.0.7.tar.gz (3.6 kB view details)

Uploaded Source

Built Distribution

If you're not sure about the file name format, learn more about wheel file names.

time_series_dataset-0.0.7-py3-none-any.whl (4.7 kB view details)

Uploaded Python 3

File details

Details for the file time_series_dataset-0.0.7.tar.gz.

File metadata

  • Download URL: time_series_dataset-0.0.7.tar.gz
  • Upload date:
  • Size: 3.6 kB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/3.3.0 pkginfo/1.6.1 requests/2.25.1 setuptools/47.1.0 requests-toolbelt/0.9.1 tqdm/4.55.1 CPython/3.7.9

File hashes

Hashes for time_series_dataset-0.0.7.tar.gz
Algorithm Hash digest
SHA256 425cce37b90958ebee17146014c820778fdce2345685907a7d07460093a4b4e7
MD5 4221b2d855e92a714f0545875ab39e9b
BLAKE2b-256 b3d39ec111663bdacf4cebd1cf298ab0e7055cfb172de6c97ad704f9402064ed

See more details on using hashes here.

File details

Details for the file time_series_dataset-0.0.7-py3-none-any.whl.

File metadata

  • Download URL: time_series_dataset-0.0.7-py3-none-any.whl
  • Upload date:
  • Size: 4.7 kB
  • Tags: Python 3
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/3.3.0 pkginfo/1.6.1 requests/2.25.1 setuptools/47.1.0 requests-toolbelt/0.9.1 tqdm/4.55.1 CPython/3.7.9

File hashes

Hashes for time_series_dataset-0.0.7-py3-none-any.whl
Algorithm Hash digest
SHA256 a9c8bdb5d25be8dd735445ceaefa040b1b0e29a018e8a641bd935744461f8f60
MD5 65ba159ec4be3816c5abb8d86bd06a1d
BLAKE2b-256 1cfbcf4873adaeee623130d7977f7babe4c99066577b7a3fe31999b53a5e3d77

See more details on using hashes here.

Supported by

AWS Cloud computing and Security Sponsor Datadog Monitoring Depot Continuous Integration Fastly CDN Google Download Analytics Pingdom Monitoring Sentry Error logging StatusPage Status page