#!/usr/bin/env python
#
# file: $NEDC_EXP/quantum/exp_011/qsvm/periodic_data_generator.py
#
# revision history:
# 20250131 (SP): create a method to generate periodic data
#

#
# This file contains a method to generate periodic data and save to csv file.
#

# import required system modules
#
import os
import numpy as np
import pandas as pd

# import required NEDC modules
#
import nedc_debug_tools as ndt

# define the names of keys in dictionaries that are used to access parameters
#
COL_NAME_LABEL = "label"
COL_NAME_FEATURE_1 = "x1"
COL_NAME_FEATURE_2 = "x2"

#------------------------------------------------------------------------------
def generate_periodic_data(save_path: str, n_samples: int=200, 
                           noise: float=0.1) -> None:
    """
    methods: 
     generate periodic data
    
    arguments:
     save_path: str, the path to save the data
     n_samples: int, the number of samples
     noise: float, the noise level
    
    return: 
     None
     
    description:
     generate periodic data and save to csv file
    
    """
    # generates random numbers from a uniform distribution
    # the lower and upper bounds are 0 and 2*pi, respectively
    #
    t = np.random.uniform(0, 2*np.pi, n_samples)
    
    # get number of samples for 0 class
    #
    n_class_0 = n_samples // 2
    
    # apply cosine functions to get first feature value for class 0
    #
    x1_0 = np.cos(t[:n_class_0]) + noise * np.random.randn(n_class_0)
    
    # apply sine functions to get second feature value for class 0
    #
    x2_0 = np.sin(2*t[:n_class_0]) + noise * np.random.randn(n_class_0)
    
    # get number of samples for 1 class
    #
    n_class_1 = n_samples - n_class_0
    # apply cosine functions to get first feature value for class 1
    #
    x1_1 = np.cos(2*t[:n_class_1]) + noise * np.random.randn(n_class_1)
    # apply sine functions to get second feature value for class 0
    #
    x2_1 = np.sin(t[:n_class_1]) + noise * np.random.randn(n_class_1)
    
    # stack the data together to form a 2D array X
    #
    X = np.vstack([
        np.column_stack([x1_0, x2_0]),
        np.column_stack([x1_1, x2_1])
    ])
    
    # create the label array y
    #
    y = np.array([0] * n_class_0 + [1] * n_class_1)
    print(X.shape)
    
    # save the data to csv file
    #
    df = pd.DataFrame({
        COL_NAME_LABEL: y,
        COL_NAME_FEATURE_1: X[:, 0],
        COL_NAME_FEATURE_2: X[:, 1],
    })
    
    # save the data to csv file
    #
    df.to_csv(save_path, index=False, header=False)
    # print the save path
    #
    print(f"Data saved to: {save_path}")
    
    # exit gracefully
    #
    
# end of method

if __name__=="__main__":
    generate_periodic_data(save_path="train.csv", n_samples=10000)