#!/usr/bin/env python
# coding: utf-8

import os
import ilwis
from osgeo import gdal, osr
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime, timedelta
import cdsapi
import csv
import warnings
import glob
import requests
warnings.filterwarnings('ignore')

# ### Retrieve data from the Copernicus climate data store
# 
# install the cdsapi: The CDS API client is a python based library.
# 
# login to the climate data store, register and if you click on the name you get the details of your user profile. Copy and paste the UID and API Key to a text file and change it according to the two line example below:
# 
#         url: https://cds.climate.copernicus.eu/api/v2
#         key: 'Your_UID':'Your_API-KEY'
# 
# 

# You can Install the CDS API client via the package management system pip, type the following command promt window in the python folder:
# pip install cdsapi
# 
# Now you are ready to start retrieving data from the CDS

# The gridded river discharge data used below will be downloaded for a selected area, for the current year. The data represents the discharge in m3/sec (per pixel) generated by the GloFAS system (see: https://www.globalfloods.eu/). The data will be retrieved in grib2 format and will be dowloaded from: https://cds.climate.copernicus.eu/cdsapp#!/dataset/cems-glofas-historical?tab=form. The data is going to be assigned as an ILWISPy raster coverage in the cells below, the dimensions are checked and a plot of the merged dataset is created.

# Link to data portal
# https://cds.climate.copernicus.eu/cdsapp#!/dataset/cems-glofas-historical?tab=overview
# 
# Limitation of data download is that only up to 500 layers (days) can be retrieved given a download request, so here a yearly data download of the area of interest is used

locations = [{'name': 'Atbara', 'description': 'North of Atbara (Sudan)', 'area': [17.90, 33.90, 17.70, 34.05], 'pixel': {'l': 0, 'c': 1}}, # North, West, South, East
             {'name': 'Cairo', 'description': 'South of Cairo (Egypt)', 'area': [30.0, 31.20, 29.80, 31.35], 'pixel': {'l': 0, 'c': 0}},
             {'name': 'Juba', 'description': 'North of Juba (South Sudan)', 'area': [4.95, 31.60, 4.80, 31.75], 'pixel': {'l': 0, 'c': 1}},
             {'name': 'Karuma', 'description': 'North of Karuma (Uganda)', 'area': [2.30, 32.20, 2.20, 32.30], 'pixel': {'l': 1, 'c': 0}},
             {'name': 'Khartoum', 'description': 'North of Khartoum (Sudan)', 'area': [16.00, 32.40, 15.70, 32.70], 'pixel': {'l': 0, 'c': 3}}
            ]

# grib data folder

work_dir = os.getcwd()+'/../data/GLoFAS_Data'
work_dir = os.path.normpath(work_dir)

print('current dir is: %s' % (os.getcwd()))
print('current working directory is:',work_dir) 

if os.path.isdir(work_dir):
    print('Folder exists')
else:
    print("Folder doesn't exist")
    os.mkdir(work_dir)

# csv output folder

out_dir = os.getcwd()+'/../NBI_Results_Worksheets'
out_dir = os.path.normpath(out_dir)

print('current dir is: %s' % (os.getcwd()))
print('current output directory is:',out_dir) 

if os.path.isdir(out_dir):
    print('Folder exists')
else:
    print("Folder doesn't exist")
    os.mkdir(out_dir)

#output folder graphics
gout_dir = os.getcwd()+'/../NBI_Results_Graphics'
gout_dir = os.path.normpath(gout_dir)

print("current dir is: %s" % (os.getcwd()))
print("current output directory is:",gout_dir) 

if os.path.isdir(gout_dir):
    print("Folder exists")
else:
    print("Folder doesn't exists")
    os.mkdir(gout_dir)

# Import of version 4 glofas river discharge Consolidated for the current year

# year = '2024'
cds = cdsapi.Client()

for ix, location in enumerate(locations):
    dt_now = datetime.now()
    print('Processing (' + str(ix+1) + '/' + str(len(locations)) + '):', location['name'])

    # Before April we retrieve the entire previous year as well (the delay of the consolidated data is approximately 3 months)
    # See also: https://cds.climate.copernicus.eu/cdsapp#!/dataset/cems-glofas-historical

    dataset = 'cems-glofas-historical'
    if dt_now < datetime(dt_now.year, 4, 1):
        request = {
            'system_version': ['version_4_0'],
            'hydrological_model': ['lisflood'],
            'product_type': ['consolidated'],
            'variable': ['river_discharge_in_the_last_24_hours'],
            'hyear': [ str(dt_now.year - 1) ],
            'hmonth': [
                '01','02','03','04','05','06','07','08','09','10','11','12'
            ],
            'hday': [
                '01','02','03','04','05','06','07','08','09','10',
                '11','12','13','14','15','16','17','18','19','20',
                '21','22','23','24','25','26','27','28','29','30','31'
            ],
            'data_format': 'grib2',
            'download_format': 'unarchived',
            'area': location['area'] # North, West, South, East
        }
        cds.retrieve(dataset, request, work_dir +'/' +'historical_'+str(dt_now.year - 1)+'_' + location['name'] + '.grib')
    
    # retrieve the current year (from January til March this may result in an "empty" grib file)
    request = {
        'system_version': ['version_4_0'],
        'hydrological_model': ['lisflood'],
        'product_type': ['consolidated'],
        'variable': ['river_discharge_in_the_last_24_hours'],
        'hyear': [ str(dt_now.year) ],
        'hmonth': [
            '01','02','03','04','05','06','07','08','09','10','11','12'
        ],
        'hday': [
            '01','02','03','04','05','06','07','08','09','10',
            '11','12','13','14','15','16','17','18','19','20',
            '21','22','23','24','25','26','27','28','29','30','31'
        ],
        'data_format': 'grib2',
        'download_format': 'unarchived',
        'area': location['area'] # North, West, South, East
    }
    try:
        cds.retrieve(dataset, request, work_dir +'/' +'historical_'+str(dt_now.year) + '_' + location['name'] + '.grib')
    except requests.exceptions.HTTPError:
        print('No historical data yet available in ' + str(dt_now.year))
        pass

    # Delete old intermediate grib files
    
    filelist = glob.glob(work_dir +'/' +'intermediate_*_' + location['name'] + '.grib')
    for f in filelist:
        os.remove(f)
    
    # #### Download Intermediate GloFAS data    
    # Import of version 4 glofas river discharge Intermediate for the past 5 months; account for the fact that some of the months may be in the previous year
    
    dt_start = dt_now - timedelta(days=150)

    intermediate_list = {}

    if dt_start.month > dt_now.month: # three downloads (two in January)
        year = dt_start.strftime('%Y')
        intermediate_list[year] = {'months': ['{:02d}'.format(i) for i in range(dt_start.month, 13)], 'days': ['{:02d}'.format(i) for i in range(1, 32)]}
        year = dt_now.strftime('%Y')
        if dt_now.month > 1:
            intermediate_list[year] = {'months': ['{:02d}'.format(i) for i in range(1, dt_now.month)], 'days': ['{:02d}'.format(i) for i in range(1, 32)]}
        if dt_now.day > 1:
            intermediate_list[year + '-{:02d}'.format(dt_now.month)] = {'months': ['{:02d}'.format(dt_now.month)], 'days': ['{:02d}'.format(i) for i in range(1, dt_now.day)]}
    else:
        year = dt_now.strftime('%Y')
        intermediate_list[year] = {'months': ['{:02d}'.format(i) for i in range(dt_start.month, dt_now.month)], 'days': ['{:02d}'.format(i) for i in range(1, 32)]}
        if dt_now.day > 1:
            intermediate_list[year + '-{:02d}'.format(dt_now.month)] = {'months': ['{:02d}'.format(dt_now.month)], 'days': ['{:02d}'.format(i) for i in range(1, dt_now.day)]}

    for year in sorted(intermediate_list):
        dataset = 'cems-glofas-historical'
        request = {
            'system_version': ['version_4_0'],
            'hydrological_model': ['lisflood'],
            'product_type': ['intermediate'],
            'variable': ['river_discharge_in_the_last_24_hours'],
            'hyear': [ year.split('-')[0] ],
            'hmonth': intermediate_list[year]['months'],
            'hday': intermediate_list[year]['days'],
            'data_format': 'grib2',
            'download_format': 'unarchived',
            'area': location['area'] # North, West, South, East
        }
        cds.retrieve(dataset, request, (work_dir) +'/' +'intermediate_' + year + '_' + location['name'] + '.grib')

    # #### Download Control Forecast GloFAS data
    # Import of version 4 glofas river discharge control forecast
    
    success = False
    retries = 5
    while not success and retries > 0:
        try:
            year = dt_now.strftime('%Y')
            month = dt_now.strftime('%m')
            day = dt_now.strftime('%d')

            dataset = 'cems-glofas-forecast'
            request = {
                'system_version': ['operational'],
                'hydrological_model': ['lisflood'],
                'product_type': ['control_forecast'],
                'variable': ['river_discharge_in_the_last_24_hours'],
                'year': [year],
                'month': [month],
                'day': [day],
                'leadtime_hour': [
                    '24','48','72','96','120','144','168','192','216','240',
                    '264','288','312','336','360','384','408','432','456','480',
                    '504','528','552','576','600','624','648','672','696','720'
                ],
                'data_format': 'grib2',
                'download_format': 'unarchived',
                'area': location['area'] # North, West, South, East
            }

            cds.retrieve(dataset, request, (work_dir) +'/' +'forecast_' + dt_now.strftime('%Y%m%d') + '_' + location['name'] + '.grib')
            success = True
        except:
            print('Forecast of ' + year + month + day + ' is not yet available; trying the previous day')
            dt_now -= timedelta(days = 1)
            retries -= 1
    if not success and retries <= 0:
        print('No forecast available; check that the CDS service is up and running.')

    # Processing the GLOFAS Discharge data, note that GloFAS version 4 is used here, resolution is 0.05 degree, see 'system_version' above

    # set the working directory for ILWISPy (let ilwis scan the folder, after the download)
    ilwis.setWorkingCatalog(work_dir)
    print(work_dir)
    ilwis.version()

    filelist = glob.glob(work_dir +'/' +'historical_????_' + location['name'] + '.grib')
    filelist.sort()

    # Load retrieved data - ignore grib Warning
    glofas = []
    date_historical = []
    for file in filelist:
        glofas.append(ilwis.RasterCoverage(file))
        print(glofas[-1].size())
        dataset = gdal.Open(file)
        grib_info = gdal.Info(dataset).splitlines()
        date_historical += [datetime.strptime(line.split(' REF_TIME=')[1].split()[0], '%Y-%m-%dT%H:%M:%SZ') for line in grib_info if ' REF_TIME=' in line]

    # Transform the stack (in ilwis format) to a 1D numpy array (using np.fromiter(iter()) for the two years under consideration, combine the two 1D arrays (using append) and transform back to a 3D array (using reshape)

    #create 1D arrays
    glofas_2np = []
    for rc in glofas:
        glofas_2np.append(np.fromiter(iter(rc), np.float64, rc.size().linearSize()))

    #combine the arrays - using append
    array_all = glofas_2np[0]
    for i in range(1, len(glofas_2np)):
        array_all = np.append(array_all, glofas_2np[i])

    #create a 3D stack of the three or four years combined
    glofas_2np = array_all.reshape((sum([rc.size().zsize for rc in glofas]), glofas[0].size().ysize, glofas[0].size().xsize))

    #check the dimensions obtained
    print(glofas_2np.shape)

    # In the cell below the data for the location of the Nile river is retrieved for the time series, this is column, row location X=1 and y=0.

    l = location['pixel']['l']
    c = location['pixel']['c']

    Q_all = []

    for rc in glofas:
        z = rc.size().zsize
        print(z)
        for n in range(0,z):
            point_value = rc.pix2value(ilwis.Pixel(c,l,n))
            Q_all.append(point_value)

    # initializing date range for X-Axis
    data = sorted(zip(date_historical, Q_all))
    df = pd.DataFrame(data, columns=['date','value'])
    date_historical = np.array(df.date)
    Q_all = np.array(df.value)

    #save file - including dates
    with open(out_dir + '/' + location['name'] + '_historical.csv', 'w', newline='') as csvfile:
        writer=csv.writer(csvfile, delimiter=',')
        writer.writerows(zip(date_historical,Q_all))

    #save file
    df = pd.DataFrame(Q_all)
    df.to_csv(out_dir + '/' + location['name'] + '_historical_nd.csv')

    # Load intermediate retrieved data - ignore grib Warning
    glofas_interm = [ilwis.RasterCoverage('intermediate_' + year + '_' + location['name'] + '.grib') for year in sorted(intermediate_list)]
    
    date_intermediate = []
    for year in sorted(intermediate_list):
        grib_info = gdal.Info(gdal.Open(work_dir +'/intermediate_' + year + '_' + location['name'] + '.grib')).splitlines()
        date_intermediate += [datetime.strptime(line.split(' REF_TIME=')[1].split()[0], '%Y-%m-%dT%H:%M:%SZ') for line in grib_info if ' REF_TIME=' in line]
    
    Q_interm = []
    for glofas_interm_year in glofas_interm:
        z = glofas_interm_year.size().zsize
        print(z)
        for n in range(0,z):
            point_value = glofas_interm_year.pix2value(ilwis.Pixel(c,l,n))
            Q_interm.append(point_value)

    print('Values extracted for selected location:', Q_interm)
    
    # initializing date range for X-Axis

    data = sorted(zip(date_intermediate, Q_interm))
    df = pd.DataFrame(data, columns=['date','value'])
    date_intermediate = np.array(df.date)
    Q_interm = np.array(df.value)

    #save file - including dates
    with open(out_dir+'/' + location['name'] + '_intermediate.csv', 'w', newline='') as csvfile:
        writer=csv.writer(csvfile, delimiter=',')
        writer.writerows(zip(date_intermediate,Q_interm))

    #save file excluding dates - for multi Q plot
    df = pd.DataFrame(Q_interm)
    df.to_csv(out_dir+'/' + location['name'] + '_intermediate_nd.csv')
    
    # Load control forecast data retrieved  - ignore grib Warning
    glofas_forecast = ilwis.RasterCoverage('forecast_' + dt_now.strftime('%Y%m%d') + '_' + location['name'] + '.grib')
    z = (glofas_forecast.size().zsize)
    print(z)
    Q_forecast = []
    for n in range(0,(z)):
        point_value = (glofas_forecast.pix2value(ilwis.Pixel(c,l,n)))
        Q_forecast.append(point_value)
    print('Values extracted for selected location:', Q_forecast)
    plot_date = datetime.strptime(dt_now.strftime('%d-%m-%Y'), "%d-%m-%Y") # improve! the intention is to discard the hours/minutes/seconds
    date_forecast = pd.date_range(plot_date, periods=z)
    print(date_forecast.strftime("%d-%m-%Y"))

    #save file - including dates
    with open(out_dir+'/' + location['name'] + '_forecast.csv', 'w', newline='') as csvfile:
        writer=csv.writer(csvfile, delimiter=',')
        writer.writerows(zip(date_forecast,Q_forecast))

    #save file
    df = pd.DataFrame(Q_forecast)
    df.to_csv(out_dir+'/' + location['name'] + '_forecast_nd.csv')

    # ####  Combine all discharge data in single graph

    fig = plt.figure(figsize =(12, 4))
    plt.plot(date_historical,Q_all, label = 'consolidated', zorder=5)
    plt.plot(date_intermediate,Q_interm, label = 'intermediate')
    plt.plot(date_forecast,Q_forecast, label = 'forecast')
    plt.xlabel('Time Series period')
    plt.ylabel('Discharge (m3/sec)')
    plt.title('GloFAS derived discharge time series ' + location['description'])
    plt.legend(loc="upper left", title = 'Time series type:')
    plt.savefig(gout_dir+'/Q_' + location['name'] + '_all.png')
    
    # Delete old forecast grib files
    
    filelist = glob.glob(work_dir +'/' +'forecast_????????_' + location['name'] + '.grib')
    filelist.sort()
    if len(filelist) > 0:
        filelist = filelist[0:-1]
        for f in filelist:
            os.remove(f)