/*
 * Historical Socioeconomic Data 
 * The data is kept here in the original countries - the conversion to larger regions is done using seperate curvesets in socio.java
 * This national data is never changed after startup, so there are no interactions and the curvesets are static
 * Their contents can be viewed in expert complexity level
 *
 */
package jcm.mod.socio;

import java.util.Set;
import jcm.core.itf.dataholder;
import jcm.core.ob.module;
import jcm.core.cur.*;
import jcm.core.reg.*;
import jcm.core.par.param;
import static jcm.core.data.loadtable.*;
import static jcm.core.report.*;
import static jcm.core.complexity.*;



public class histsocdata extends module implements dataholder {
    
    // GDP deflator  to convert current US$ to 2009 US$, from WEOApr2016, field USA-NGDP_D, 1980-2021,
    // is this still used? 
    static double[] deflator = {
        44.377, 48.52, 51.531, 53.565, 55.466, 57.241, 58.395, 59.885, 61.982, 64.392, 
        66.773, 68.996, 70.569, 72.248, 73.785, 75.32, 76.699, 78.012, 78.859, 80.066, 
        81.887, 83.753, 85.039, 86.735, 89.12, 91.987, 94.814, 97.337, 99.246, 100,
        101.222, 103.311, 105.214, 106.929, 108.686, 109.775, 110.851, 112.381, 114.67, 117.129, 
         119.565, 122.024
    };
    
    public static final int  weo_ey=2021, pwt_ey=2011;
    public static curveset //
             histpop = new curveset("histpop", "kilo&person", 1700, pwt_ey, expert), 
            // histpopHYDE=new qtset("pop", "kilo&person", 1700, 1995, expert),
             GDP_PPP = new curveset("hist_GDP_PPP", "mega&dollar&(2005)&-PPP", 1950, weo_ey, 1, expert), //mixed sources in 2005 dollars
             GDP_MER = new curveset("hist_GDP_MER", "mega&dollar&(2005)&-MER", 1950, weo_ey, 1, expert), //ditto
            
             WEOGDP_PPP = new curveset("WEO_GDP_PPP", "mega&dollar (current)&-PPP", 1980, weo_ey, 1, expert), // note - in "current international dollars", not 2000
             WEOGDP_PPP_orig = new curveset("WEO_GDP_PPP_orig", "mega&dollar (current)&-PPP", 1980, weo_ey, 1, expert), 
             WEOGDP_MER = new curveset("WEO_GDP_MER", "mega&dollar (current)&-MER", 1980, weo_ey, 1, expert), // note - in "current US dollars" - not constant 
             WEOrealgrowth=new curveset("WEO real growth rate", "%", 1980, weo_ey, 1, expert),
             WEOinvest = new curveset("WEO investment", "%", 1980, weo_ey, 1, expert),
             WEOunemploy = new curveset("WEO unemployment", "%", 1980, weo_ey, 1, expert);
    public static curveset //
             PWT_RGDPE = new curveset("PWT Real GDP Expenditure", "mega&dollar&2005&-PPP", 1950, pwt_ey, 1, expert),  
             PWT_RGDPO = new curveset("PWT Real GDP Output", "mega&dollar&2005&-PPP", 1950, pwt_ey, 1, expert),  
             PWT_RGDPO_orig = new curveset("PWT Real GDP Output", "mega&dollar&2005&-PPP", 1950, pwt_ey, 1, experimental),  
             PWT_RKNA = new curveset("PWT Capital", "mega&dollar&2005", 1950, pwt_ey, 1, expert),  //
             PWT_RTFPNA = new curveset("PWT TFP", "factor", 1950, pwt_ey, 1, expert),
             PWT_hc = new curveset("Human Capital Index", "factor", 1950, pwt_ey, 1, expert),
             PWT_pop = new curveset("PWT population", "kilo&person", 1950, pwt_ey, 1, expert),
             PWT_pop_orig = new curveset("PWT population", "kilo&person", 1950, pwt_ey, 1, experimental),
             PWT_inv = new curveset("PWT investment", "ratio", 1950, pwt_ey, 1, expert),
             PWT_labsh = new curveset("PWT labour share", "ratio", 1950, pwt_ey, 1, expert);
             //PWTppprate = new curveset("PWTppp_rate", "localcurrency&per&ppp_dollar", 1950, 2004, 1, expert),  //
             //PWTmerate = new curveset("PWTmex_rate", "localcurrency&per&dollar", 1950, 2004, 1, expert); //

    public static param joinyear=new param("year start use weo (not pwt)", "year", 1980, 1980, 2005, expert);

//this is called from histemitdata to ensure that GDP data is complete before extrapolating emissions 
    public static void fillhistdata() {
        loaddata(histpop, "data/hist/HYDE_pop_kkg.csv", "\t", true, true, 1f, 1700, 1995, 1700, 1);
        
        loaddata(WEOGDP_MER, "data/hist/WEOgdp_mer.csv", "\t", false, true, 1000f, 1980, weo_ey, 1980, 1);
        loaddata(WEOGDP_PPP, "data/hist/WEOgdp_ppp.csv", "\t", false, true, 1000f, 1980, weo_ey, 1980, 1);
        loaddata(WEOGDP_PPP_orig, "data/hist/WEOgdp_ppp.csv", "\t", false, false, 1000f, 1980, weo_ey, 1980, 1);
        //below originally percent, so small regions added using weight of PPP 
        loaddata(WEOrealgrowth, WEOGDP_PPP_orig, "data/hist/WEOgdp_real_growth.csv", "\t", false, true, 1f, 1980, weo_ey, 1980, 1);
        loaddata(WEOinvest, WEOGDP_PPP_orig, "data/hist/WEOinvestment.csv", "\t", false, true, 1f, 1980, weo_ey, 1980, 1);
        loaddata(WEOunemploy, WEOGDP_PPP_orig, "data/hist/WEOunemployment.csv", "\t", false, true, 1f, 1980, weo_ey, 1980, 1);

        loaddata(PWT_RGDPE, "data/hist/PWT_rgdpe.tab", "\t", false, true, 1f, 1950, pwt_ey, 1950, 1);    
        loaddata(PWT_RGDPO, "data/hist/PWT_rgdpo.tab", "\t", false, true, 1f, 1950, pwt_ey, 1950, 1);    
        loaddata(PWT_RKNA, "data/hist/PWT_rkna.tab", "\t", false, true, 1f, 1950, pwt_ey, 1950, 1);    
        loaddata(PWT_pop, "data/hist/PWT_pop.tab", "\t", false, true, 1f, 1950, pwt_ey, 1950, 1);
                 // the second false below means that names will not be converted to regions
        loaddata(PWT_RGDPO_orig, "data/hist/PWT_rgdpo.tab", "\t", false, false, 1f, 1950, pwt_ey, 1950, 1);    
        loaddata(PWT_pop_orig, "data/hist/PWT_pop.tab", "\t", false, false, 1f, 1950, pwt_ey, 1950, 1);
                //use the orig small regions for weighting for ratios below
        loaddata(PWT_RTFPNA, PWT_RGDPO_orig, "data/hist/PWT_rtfpna.tab", "\t", false, true, 1f, 1950, pwt_ey, 1950, 1);            
        loaddata(PWT_inv, PWT_RGDPO_orig, "data/hist/PWT_csh_i.tab", "\t", false, true, 1f, 1950, pwt_ey, 1950, 1);    
        loaddata(PWT_hc, PWT_pop_orig, "data/hist/PWT_hc.tab", "\t", false, true, 1f, 1950, pwt_ey, 1950, 1); 
        loaddata(PWT_labsh, PWT_pop_orig, "data/hist/PWT_labsh.tab", "\t", false, true, 1f, 1950, pwt_ey, 1950, 1); 
        
        //loaddata(PWTppprate, "data/hist/PWTppp.csv", "\t", false, false, 1f, 1950, 2004, 1950, 1);    // local currency / $ppp
        //loaddata(PWTmerate, "data/hist/PWTxrat.csv", "\t", false, false, 1f, 1950, 2004, 1950, 1);    // local currency / $mex
        fixpop();
        realgdp();
        //combinePWT();
        //blend(WEOGDP_MER, GDP_MER);
        //blend(WEOGDP_PPP, GDP_PPP);
        //depressionfix();
        }
    
    
    //note this will be called if adjust params above - but not by other modules (due dataholder)
    static boolean done=false;
    public void precalc() {
        realgdp(); done=true;
    }
    
    
    
    static void realgdp() {
        String lackdata="";
        float nodatagrow=3.5f; //for extrapolate back where missing data
        Set<region> fsu=regman.allreg.findreg("FSU").subreg(regman.nations), eue=regman.allreg.findreg("EUE").subreg(regman.nations);
        //deb("\n\nFSU="); for (region r : fsu) deb(r.name);
        for (region r : regman.nations.reg)  {
            float weogrow, grow=1f, gdpppp=WEOGDP_PPP.get(r, 2005), gdpmer=WEOGDP_MER.get(r, 2005);
            GDP_PPP.set(r, 2005, gdpppp); GDP_MER.set(r, 2005, gdpmer); 
            for (int y=2005; y>=1980; y--) {
                weogrow=WEOrealgrowth.get(r,y);
                if (Float.isNaN(weogrow) || weogrow==0) {
                //fix lacking data for Former Soviet Union and FYU countries 
                    if (fsu.contains(r) || eue.contains(r)) weogrow= (y==1992 ? -5f : y==1991 ? -2f : y==1990 ? 0f : nodatagrow); 
                    else weogrow=nodatagrow; //for a few other small countries missing data 
                }    
                grow=100f/(100f+weogrow);
                gdpppp*=grow; gdpmer*=grow;
                GDP_PPP.set(r, y-1, gdpppp); GDP_MER.set(r, y-1, gdpmer); //-1 because going backwards
            }
            
            grow=1f; gdpppp=WEOGDP_PPP.get(r, 2005); gdpmer=WEOGDP_MER.get(r, 2005);
            for (int y=2006; y<=weo_ey; y++) {
                //fix missing data for Argentina post 2015 (problems data due potential default?)
                weogrow=(WEOrealgrowth.gotdata(r,y) ? WEOrealgrowth.get(r,y) : 2.0f); 
                grow=(100f+weogrow)/100f;
                gdpppp*=grow; gdpmer*=grow;
                GDP_PPP.set(r, y, gdpppp); GDP_MER.set(r, y, gdpmer);
            }
            int jy=(int)joinyear.getval();
             
                float scalefac=GDP_PPP.get(r, jy)/PWT_RGDPE.get(r, jy);
                float pwtgdp;
                for (int y=jy-1; y>=1950; y--) {
                    pwtgdp=PWT_RGDPE.get(r,y);
                    if (Float.isNaN(pwtgdp) || pwtgdp==0) GDP_PPP.set(r, y, GDP_PPP.get(r, y+1)*100f/(100f+nodatagrow));
                    else GDP_PPP.set(r, y, pwtgdp*scalefac);
                }
            
        }
        if (lackdata.length()>0) deb(" lack GDP data for: "+lackdata); 
    }
    
    
     static void fixpop() { 
         //fix missing data after break of S&M
         for (int y=2002; y<=pwt_ey; y++) PWT_pop.set("Serbia and Montenegro", y, PWT_pop.get("Serbia and Montenegro", 2001));
         //use HYDE pop data until end 1995, PWT thereafter
         for (region r : regman.nations.reg) {
             for (int y = 1996; y <= pwt_ey; y++) { 
                 if (PWT_pop.get(r, y)>0) histpop.set(r, y, PWT_pop.get(r, y));
                 else {
                	//quick fix for small countries missing from PWT (Cuba, Nth Korea etc)
                	 float inc=(histpop.get(r,1995)-histpop.get(r,1985))/10f;  
                	 histpop.set(r, y, histpop.get(r, 1995)+inc*(y-1995));
                 }
             }
         }
     } 
     
     
    } //end class

      
    /*************** GDP *******************
     * The PWT GDP data is per capita, and the rates are per $.
     * These need to be converted to absolute quantities before they can be added, particularly for combining small countries and  islands.
     * check ok : mex/ppp is >1 for all except richest countries  => gdp mer will be less than gdp ppp
     * check ok: all 4 files have same number rows x columns
     */
     
      /*
    static void combinePWT() {
        float pop, gdp_p, gdp_m, ppper, mer;

        for (region r : regman.nations.reg) {
            for (int y = 1950; y <= pwt_ey; y++) {
                //if (r!=regman.nations.findreg("East Timor")) histpop.set(r, y, 0f); //note PWTpop doesn't have East Timor
                GDP_PPP.set(r, y, curve.dud);
                //GDP_MER.set(r, y, curve.dud);
            }    // need to wipe out the HYDE data - except where PWT missing
        }

       
        pwtloop:
        for (Object o : PWT_pop.map.keySet()) {
            region r = regman.allreg.findreg(o.toString());

            if (r == null) {
                deb("PWT data: can't find region " + o + ": ignored ");
                continue pwtloop;
            }

            for (int y = 1950; y <= pwt_ey; y++) {
                pop = PWT_pop.get(o, y);
                histpop.add(r, y, pop);
            } 
                if (PWTCGDP.gotdata(o, y)) {
                    gdp_p = 0.001f * pop * PWTCGDP.get(o, y);
                    GDP_PPP.add(r, y, gdp_p);

                    if (PWTppprate.gotdata(o, y)) {
                        ppper = PWTppprate.get(o, y);
                        mer = PWTmerate.get(o, y);
                        gdp_m = gdp_p * ppper / mer;    // note: the fraction is this way up, because the two exchange rates are local currency / $ppp and local currency / $mex
                        GDP_MER.add(r, y, gdp_m);
                    }
                }
            }
             
        }
        * */
    

    /*
     * WEO PPP data used after 2000, and as base for 2000 to correct PWT before that
     * //P2 try alternative - use WEO from 1980, and 1980 as basis for correction?
     * PWT data retained if WEO missing
     * linear extrapolation (based on 5yr trend) after 2003/4 for a few small countries, if growth is positive, otherwise constant
     * Note: French Guyana, Palestine, Burma and East-Timor have no GDP data in 2000
     */
/*
    static void blend(curveset extrasource, curveset gdp) {
        String lackdata="";
        for (region r : regman.nations.reg) {
            if (!(gdp.get(r, 2000) > 0)) { lackdata+=r.name+" "; continue; }
            float corrfac =  (100f / (float) deflator[2000 - 1980]) * extrasource.get(r, 2000) / gdp.get(r, 2000);

            //corrected to use WEO PPPs as base, PWT corrected
            for (int y = 1950; y < 2000; y++) {
                gdp.set(r, y, ((gdp.get(r, y) > 0) ? (gdp.get(r, y) * corrfac) : 0));
            }
            for (int y = 2000; y <= 2016; y++) {
                gdp.set(r, y, //
                        (extrasource.get(r, y) > 0) ? //
                        (100f / (float) deflator[y - 1980]) * extrasource.get(r, y) //
                        : (gdp.get(r, y) > 0) ? gdp.get(r, y) * corrfac //
                        : (gdp.get(r, y - 1) > gdp.get(r, y - 6)) ? gdp.get(r, y - 1) + 0.2f * (gdp.get(r, y - 1) - gdp.get(r, y - 6)) //
                        : gdp.get(r, y - 1)); //
            }
        }
        if (lackdata.length()>0) deb(gdp.name+" lacks data in 2000 for: "+lackdata); 
    }
    * */
  

    /*
     temporary fix Mar09 awaiting new WEO projections Apr09 - reduce growth rates -1%,-3%,-1%, in 2008,9,10, 11 respectively compared to WEO Oct08
     noting that WEO January09 is much more pessimistic than Oct08, but recent data (Mar09) is even more so.
     this gives a downturn in developed countries' economy and emissions similar to that in early 1980s (much less than 1930s)

  static void depressionfix() {
      for (region r : regman.nations.reg) {
          GDP_PPP.set(r, 2008, 0.99f*GDP_PPP.get(r, 2008));
          GDP_MER.set(r, 2008, 0.99f*GDP_MER.get(r, 2008));
          GDP_PPP.set(r, 2009, 0.96f*GDP_PPP.get(r, 2009));
          GDP_MER.set(r, 2009, 0.96f*GDP_MER.get(r, 2009));
          for (int y = 2010; y < 2015; y++) {
               GDP_PPP.set(r, y, 0.95f*GDP_PPP.get(r, y));
               GDP_MER.set(r, y, 0.95f*GDP_MER.get(r, y));
           }
      }
  }
  
  
    // GDP deflator  to convert current US$ to 2005 US$, from WEOApr2011, field USA-NGDP_D, 1980-2016,
    static double[] deflator = {
        47.750,	52.226,	55.411,	57.603,	59.767,	61.576,	62.937,	64.764,	66.988,	69.518,
        72.200, 74.760,	76.532,	78.223,	79.872,	81.535,	83.088,	84.555,	85.510,	86.768,
        88.647,	90.650, 92.117,	94.101,	96.771,	100.000, 103.257, 106.296, 108.619, 109.615,
        110.662, 111.874,	113.415, 114.871,	116.566,	118.537,	120.672
        };
    
    
    // GDP deflator  to convert current US$ to 2000 US$, from WEOOct2008, field USA-NGDP_D, 1980-2014,
    static double[] deflator_old = {

        54.04, 59.12, 62.73, 65.21, 67.66, 69.71, 71.25, 73.2, 75.69, 78.56,
        81.59, 84.44, 86.39, 88.38, 90.26, 92.11, 93.85, 95.41, 96.47, 97.87,
        100, 102.4, 104.19, 106.4, 109.46, 113.03, 116.68,	119.82,	122.42,
        123.57,	124.06,	124.79,	126.64,	129.09,	131.64
                //old Sept 08 data ended with (2008 to 2013) 122.46,	124.42,	126.28,	128.66,	131.14,	133.67
                // old April 08 data ended with (2005 to 2013) 113, 116.57, 119.68, 122.09, 124.3, 126.6, 129.28, 131.76, 134.09
    };
    
    
     */
    