


############################################################################
####################  Build boxplots #######################################
############################################################################

build.boxplot=function(x,title.plot=NULL,B=200,type.dig="1st"){
  
  switch(type.dig,
            "1st"={
              nb.dig=9;fact.dig=1:9
              x=x[x>.1]
              freq.dig=function(x) {
                tmp.tab<-table(signifd(x))
                new.tab<-rep(0,9)
                new.tab[as.numeric(names(tmp.tab))]<-tmp.tab
                new.tab/length(x)
              }
            },
          "2nd"={
              x=x[x>9.9]
              nb.dig=10;fact.dig=0:9
              freq.dig=function(x) {
                tmp= ( signifd(x,2)/10-signifd(x,1) )*10
                tmp=round(tmp)
                tmp.tab=c(table(tmp))
                new.tab<-rep(0,10)
                new.tab[1+as.numeric(names(tmp.tab))]<-tmp.tab
                new.tab/length(x)
              }
            })
  
  n=length(x)
  dig.B=matrix(0,nrow=B,ncol=nb.dig)
  for (b in 1:B){
    switch(type.dig,
          "1st"={
              sim<-sample(1:9,size=n,repl=TRUE,prob=c(log(1+1/1:9))/log(10))
          },"2nd"={
            vp=sapply(0:9,function(d) sum(log(1+1/(d+10*1:9))/log(10)))
            sim<-sample(0:9,size=n,repl=TRUE,prob=vp)
            sim<-sim+10
          })
      dig.B[b,]=freq.dig(sim)
  }
  
  df<-data.frame(freq=c(dig.B),digit=factor(rep(fact.dig,each=B)))
  res.summary=NULL;for (e in unique(df$digit)){
    fr=subset(df,df$digit==e)$freq
    res.summary<-rbind(res.summary,c(mean(fr),quantile(fr,.025),quantile(fr,.975)))
  }
  colnames(res.summary)<-c('freq','lower','upper')
  
  df.summary<-data.frame(res.summary,digit=factor(fact.dig))
  df.x<-data.frame(freq=freq.dig(x),digit=factor(fact.dig))
  
  return(list(df=df,df.summary=df.summary,df.x=df.x,n=n))
}



###############################################################################
################################# violin boxplots ################################
###############################################################################

do.violins<-function(l.data,txt,B=50,type.dig="1st",cumulative=FALSE){
  
  if (cumulative){ 
    nm.confirm="cum_confirm"
    nm.dead="cum_dead"
  } else {
    nm.confirm="confirm"
    nm.dead="dead"
  }
  nml=names(l.data)
  if ("allchina" %in% nml){
    l3<-build.boxplot(df.hubei[,nm.confirm],B=B,type.dig=type.dig)
    l4<-build.boxplot(df.hubei[,nm.dead],B=B,type.dig=type.dig)
  } else {l3<-l4<-NULL}
  if ("allcanada" %in% nml){
    l5<-build.boxplot(df.can[,nm.confirm],B=B,type.dig=type.dig)
    l6<-build.boxplot(df.can[,nm.dead],B=B,type.dig=type.dig)
  } else {l5<-l6<-NULL}
  if ("allusa" %in% nml){
    l7<-build.boxplot(df.usa[,nm.confirm],B=B,type.dig=type.dig)
    l8<-build.boxplot(df.usa[,nm.dead],B=B,type.dig=type.dig)
  } else {l7<-l8<-NULL}
  if ("allfrance" %in% nml){
    l9<-build.boxplot(df.france[,nm.confirm],B=B,type.dig=type.dig)
    l10<-build.boxplot(df.france[,nm.dead],B=B,type.dig=type.dig)
  } else {l9<-l10<-NULL}
  
  
  transf.df<-function(txt="df"){
    if ("allchina" %in% nml){
      df3=l3[[txt]];df3$what="confirm";df3$who="allchina"
      df4=l4[[txt]];df4$what="dead";df4$who="allchina"
    } else {
      df3<-df4<-NULL
    }
    if ("allcanada" %in% nml){
      df5=l5[[txt]];df5$what="confirm";df5$who="allcanada"
      df6=l6[[txt]];df6$what="dead";df6$who="allcanada"
    } else { df5<-df6<-NULL }
    if ("allusa" %in% nml){
      df7=l7[[txt]];df7$what="confirm";df7$who="allusa"
      df8=l8[[txt]];df8$what="dead";df8$who="allusa"
    } else { df7<-df8<-NULL}
    if ("allfrance" %in% nml){
      df9=l9[[txt]];df9$what="confirm";df9$who="allfrance"
      df10=l10[[txt]];df10$what="dead";df10$who="allfrance"
    } else { df9<-df10<-NULL}
    df=rbind(df3,df4,df5,df6,df7,df8,df9,df10)
    df$what<-factor(df$what)
    df$who<-factor(df$who)
    return(df)
  }
  M=function(la,lb){
    if (is.null(la)) return(NULL)
    else {
      return(max( max(la$df.x$freq,df$freq) , max(lb$df.x$freq,df$freq)))
    }
  }
  
  df=transf.df("df")
  df.summary=transf.df("df.summary")
  df.x=transf.df("df.x")
  vecn=c(l3$n,l4$n,l5$n,l6$n,l7$n,l8$n,l9$n,l10$n)
  vecM=c(M(l3,l4),M(l5,l6),M(l7,l8),M(l9,l10))
  vecM=rep(vecM,each=2)
  nb.box=length(nml)*2

  tmp.who<-tmp.whol<-NULL 
  if ("allchina" %in% nml){
    tmp.who=c(tmp.who,"allchina")
    tmp.whol=c(tmp.whol,"China")
  }
  if ("allcanada" %in% nml){
    tmp.who=c(tmp.who,"allcanada")
    tmp.whol=c(tmp.whol,"Canada")
  }
  if ("allusa" %in% nml){
    tmp.who=c(tmp.who,"allusa")
    tmp.whol=c(tmp.whol,"USA")
  }
  if ("allfrance" %in% nml){
    tmp.who=c(tmp.who,"allfrance")
    tmp.whol=c(tmp.whol,"France")
  }
  
  var.who=rep(factor(tmp.who),each=2)
  df.n=data.frame(x=rep(2,nb.box),y=vecM,n=paste('(n=',vecn,')',sep=''),
                  what=factor(rep(c('confirm','dead'),nb.box/2)),
                  who=var.who)
  
  who.labs <- tmp.whol
  names(who.labs) <- tmp.who
  
  #what.labs <- c("Cas confirmés", "Décès")
  what.labs <- c("Confirmed cases", "Deaths")
  names(what.labs) <- c("confirm","dead")
  
  neworder <- tmp.who
  df <- transform(df,who=factor(who,levels=neworder))
  
  df.summary <- transform(df.summary,who=factor(who,levels=neworder))
  df.x <- transform(df.x,who=factor(who,levels=neworder))
  neworder <- c("confirm","dead")
  df <- transform(df,what=factor(what,levels=neworder))
  df.summary <- transform(df.summary,what=factor(what,levels=neworder))
  df.x <- transform(df.x,what=factor(what,levels=neworder))
  
  tmp=.3
  switch(type.dig,"1st"={nb.dig=9;beginsc=tmp+(1-tmp)/10;x.tmp=5.5;min.dig=1},
         "2nd"={nb.dig=10;beginsc=tmp;x.tmp=5.5;min.dig=0})
  
  df$digit=as.numeric(as.character(df$digit))
  df.x$digit=as.numeric(as.character(df.x$digit))
  df.summary$digit=as.numeric(as.character(df.summary$digit))
  #lab.ann="Fréquences observées"
  lab.ann="Observed frequencies"
  ann=data.frame(x=4.8,y=M(l3,l4),lab=lab.ann,what="confirm",who=tmp.who[1])
  df.tmp=data.frame(x=x.tmp,y=.96*M(l3,l4),what="confirm",who=tmp.who[1])
  
  p<-ggplot(data=df, aes(x = digit,y=freq))+
    geom_violin(aes(y=freq,fill=factor(digit)),alpha=.8, trim = TRUE) +
    scale_fill_viridis(discrete=TRUE,begin=beginsc,end=1,alpha=.8,)+
    theme_bw()+
    theme(legend.position = "bottom")+
    #guides(fill=guide_legend("Distribution sous la loi de Benford"))+
    guides(fill=guide_legend("Digit"))+
    geom_point(data=df.summary,shape=16,size=2,show.legend=TRUE)+
    geom_line(data=df.summary,alpha=.5,show.legend=TRUE)+
    geom_point(data=df.x,colour="Darkred",shape=16,size=2)+
    geom_line(data=df.x,colour="Darkred",linetype=1,alpha=.8)+
    scale_x_continuous(breaks=seq(min.dig,9,by=1))+
    xlab("")+ylab("")+
    facet_grid(who ~ what,
               labeller = labeller(what = what.labs, who = who.labs))+
    geom_label(data = df.n, aes(x=x,y=y,label = n),inherit.aes = FALSE,size=3)+
    geom_text(data=ann,mapping=aes(x=Inf,y=y,label=lab),hjust=1.05,vjust=1.05)+
    geom_point(data=df.tmp,aes(x=x,y=y),colour="Darkred")+
    ggtitle(txt)
    #ggtitle("Fréquences des 1ères décimales observées et comparaison avec la loi de Benford")
  return(list(p=p,df.x=df.x,df.n=df.n))
}


############################################################################
###########  Confidence bands ##############################################
############################################################################

ci.boot=function(dfx,dfn,e.what,e.who,e.digit,B=50,m=32){
  ## m is the number of simultaneous CI
  df2x=filter(dfx,((what==e.what) & (who==e.who) & (type.digit==e.digit)))
  df2n=filter(dfn,((what==e.what) & (who==e.who) & (type.digit==e.digit)))
  n=as.numeric(unlist(strsplit( unlist(strsplit(as.character(df2n$n),")")) ,split="="))[2])
  N=df2x$freq*n
  tmp.nm=as.numeric(as.character(df2x$digit))
  data=unlist(sapply(1:length(tmp.nm),function(i) rep(tmp.nm[i],N[i])))
  
  freq.B=matrix(0,nrow=length(tmp.nm),ncol=B);rownames(freq.B)=tmp.nm
  boot=FALSE
  if (boot){
    for (b in 1:B){
      tmp.prob=df2x$freq
      switch(e.digit,
             "1st"={
               sim<-sample(1:9,size=n,repl=TRUE,prob=tmp.prob)+10
             },"2nd"={
               sim<-sample(0:9,size=n,repl=TRUE,prob=tmp.prob)+10
             })
      freq.b = table(sim)/n
      ind=as.numeric(names(freq.b))-10
      plus=ifelse(e.digit=="1st",0,1)
      freq.B[ind+plus,b]= freq.b
    }
    res=cbind(apply(freq.B,1,quantile,.025),apply(freq.B,1,quantile,.975))
    lower=2*df2x$freq-res[,2];upper=2*df2x$freq-res[,1]
    }
    else {
      require(MultinomialCI); 
      a=.05/m
      multinomialCI(N,alpha=a)->tmp
      lower=tmp[,1];upper=tmp[,2]
    }
    df2x=cbind(df2x,lower=lower,upper=upper)
    df2x
}

plotCI<-function(l1.day,l2.day,l1.cum,l2.cum,B=50){
  dfx=rbind(l1.day$df.x,l2.day$df.x,l1.cum$df.x,l2.cum$df.x)
  dfx=cbind(dfx,type.digit=c( rep("1stDaily",nrow(l1.day$df.x)) , 
                              rep("2ndDaily",nrow(l2.day$df.x)),
                              rep("1stCum",nrow(l1.cum$df.x)) , 
                              rep("2ndCum",nrow(l2.cum$df.x))
                              ))
  dfn=rbind(l1.day$df.n,l2.day$df.n,l1.cum$df.n,l2.cum$df.n)
  dfn=cbind(dfn,type.digit=c( rep("1stDaily",nrow(l1.day$df.n)) , 
                              rep("2ndDaily",nrow(l2.day$df.n)),
                              rep("1stCum",nrow(l1.cum$df.n)) , 
                              rep("2ndCum",nrow(l2.cum$df.n)) ))
  
  v.what=unique(dfx$what)
  v.who=unique(dfx$who)
  v.type.digit=unique(dfx$type.digit)
  
  tmp.df=NULL
  for (e.what in v.what){
    for (e.who in v.who){
      for (e.type.digit  in v.type.digit){
        tmp.df=rbind(tmp.df , ci.boot(dfx,dfn,e.what ,e.who,e.type.digit,B=B))
      }
    }
  }
  
  df=tmp.df
  
  p01=  log(1+1/1:9)/log(10)
  p02=sapply(0:9,function(d) sum(log(1+1/(d+10*1:9))/log(10)))
  d01=data.frame(digit=factor(1:9),freq=p01,type.digit=factor(rep("1stDaily",9)))
  d02=data.frame(digit=factor(0:9),freq=p02,type.digit=factor(rep("2ndDaily",10)))
  d01bis=data.frame(digit=factor(1:9),freq=p01,type.digit=factor(rep("1stCum",9)))
  d02bis=data.frame(digit=factor(0:9),freq=p02,type.digit=factor(rep("2ndCum",10)))
  d0=rbind(d01,d02,d01bis,d02bis)
  d0c=data.frame(d0,what=rep("confirm",19*2))
  d0d=data.frame(d0,what=rep("dead",19*2))
  d0=rbind(d0c,d0d)
  d0=data.frame(d0,lower=d0$freq,upper=d0$freq,who=rep("benford",38*2))
  
  digit.labs <- c("1st digit - Daily data","2nd digit - Daily data",
                  "1st digit - Cumulative data","2nd digit - Cumulative data")
  names(digit.labs) <-  c("1stDaily", "2ndDaily","1stCum", "2ndCum")
  
  nml=levels(df$who)
  tmp.whol=NULL
  if ("allchina" %in% nml) tmp.whol=c(tmp.whol,"China")
  if ("allcanada" %in% nml) tmp.whol=c(tmp.whol,"Canada")
  if ("allusa" %in% nml) tmp.whol=c(tmp.whol,"USA")
  if ("allfrance" %in% nml) tmp.whol=c(tmp.whol,"France")
  who.labs <- tmp.whol
  names(who.labs) <- nml
  what.labs <- c("Confirmed cases", "Deaths")
  names(what.labs) <- c("confirm","dead")
  neworder <- nml
  df <- transform(df,who=factor(who,levels=neworder))
  neworder <- c("1stDaily", "2ndDaily","1stCum", "2ndCum")
  df <- transform(df,type.digit=factor(type.digit,levels=neworder))
  neworder <- c("confirm","dead")
  df <- transform(df,what=factor(what,levels=neworder))
  neworder<-as.character(0:9)
  df <- transform(df,digit=factor(digit,levels=neworder))
  txt="Adjusted simultaneaous confidence intervals"
  
  x.tmp=4.5;M=max(filter(df,type.digit=="1stDaily")$upper)
  ann=data.frame(what="confirm",who=nml[1],
                 type.digit="1stDaily",x=7.2,y=.95*M,lab="Benford distribution")
  df.tmp=data.frame(x=x.tmp,y=.95*M,
                    what="confirm",who=nml[1],type.digit="1stDaily")
  
  d0$digit=as.numeric(as.character(d0$digit))
  df$digit=as.numeric(as.character(df$digit))
  col.ctry=c("#EFC000FF","#868686FF","#CD534CFF","#0073C2FF")
  
  p<-ggplot(data=df, aes(x = digit, y = freq, color=who))+
    theme_bw()+
    geom_point(size=1.7,position=position_dodge(0.5),show.legend = TRUE)+
    geom_errorbar(aes(ymin=lower,ymax=upper),size=1,alpha=.85,
                  width=.8,position=position_dodge(0.7)) +
    geom_line(data=d0,aes(x=digit,y=freq),colour="black",alpha=.4,linetype=1,show.legend = FALSE)+
    geom_point(data=d0,aes(x=digit,y=freq),colour="black",alpha=.4,show.legend = FALSE)+
    scale_x_continuous(breaks=seq(0,9,by=1))+
    theme(legend.position = "bottom")+
    theme(legend.title = element_blank(),legend.text=element_text(size=9))+
    scale_color_manual(labels=who.labs,values=col.ctry)+
    theme(axis.title.x=element_blank(),axis.title.y=element_blank())+
    facet_grid(type.digit ~ what,
               labeller = labeller(type.digit=digit.labs,what = what.labs))+
    geom_text(data=ann,mapping=aes(x=x,y=y,label=lab),colour="black",size=4.5)+
    geom_point(data=df.tmp,aes(x=x,y=y),colour="black")+
    ggtitle(txt)  
    p       
}

###################################################################
##############  Tables n
####################################################################

makeTable<-function(l1.day,l2.day,l1.cum,l2.cum,file="tablen.tex",
                    tab.caption=NULL){
  df.day=rbind(l1.day$df.n,l2.day$df.n)
  df.day=data.frame(df.day[,3:5],type.digit=factor(c(rep("1st",nrow(df.day)/2),
                                             rep("2nd",nrow(df.day)/2))))
  nn=sapply(1:nrow(df.day),function(i) 
    as.numeric(unlist(strsplit( unlist(strsplit(as.character(df.day$n[i]),")")) ,split="="))[2]) )
  df.day$n=nn
  df.cum=rbind(l1.cum$df.n,l2.cum$df.n)
  df.cum=data.frame(df.cum[,3:5],type.digit=factor(c(rep("1st",nrow(df.cum)/2),
                                                 rep("2nd",nrow(df.cum)/2))))
  nn=sapply(1:nrow(df.cum),function(i) 
    as.numeric(unlist(strsplit( unlist(strsplit(as.character(df.cum$n[i]),")")) ,split="="))[2]) )
  df.cum$n=nn
  
  nml=levels(df.day$who)
  n.ctries=length(nml)
  tmp.df.day<-tmp.df.cum<-tmp.row<-NULL
  if ("allchina" %in% nml){
    tmp.df.day=rbind(tmp.df.day,filter(df.day,who=="allchina"))
    tmp.df.cum=rbind(tmp.df.cum,filter(df.cum,who=="allchina"))
    tmp.row=c(tmp.row,"China")
  }
  if ("allcanada" %in% nml){
    tmp.df.day=rbind(tmp.df.day,filter(df.day,who=="allcanada"))
    tmp.df.cum=rbind(tmp.df.cum,filter(df.cum,who=="allcanada"))
    tmp.row=c(tmp.row,"Canada")
  }
  if ("allusa" %in% nml){
    tmp.df.day=rbind(tmp.df.day,filter(df.day,who=="allusa"))
    tmp.df.cum=rbind(tmp.df.cum,filter(df.cum,who=="allusa"))
    tmp.row=c(tmp.row,"USA")
  }
  if ("allfrance" %in% nml){
    tmp.df.day=rbind(tmp.df.day,filter(df.day,who=="allfrance"))
    tmp.df.cum=rbind(tmp.df.cum,filter(df.cum,who=="allfrance"))
    tmp.row=c(tmp.row,"France")
  }
  df.day = tmp.df.day;df.cum=tmp.df.cum
  
  N.day=matrix(df.day$n,nrow=n.ctries,ncol=4,by=TRUE)
  rownames(N.day)<-tmp.row
  colnames(N.day)=c("C1day","D1day","C2day","D2day")
  N.cum=matrix(df.cum$n,nrow=n.ctries,ncol=4,by=TRUE)
  rownames(N.cum)<-tmp.row
  colnames(N.cum)=c("C1cum","D1cum","C2cum","D2cum")
  
  N=cbind(N.day,N.cum)
  
  addtorow <- list()
  addtorow$pos <- list(0,0,0)
  addtorow$command <- c("&  \\multicolumn{4}{c}{Daily data} &\\multicolumn{4}{c}{Cumulative data} \\\\\n",
                        "& \\multicolumn{2}{c}{1st digit} &\\multicolumn{2}{c}{2nd digit} 
                          & \\multicolumn{2}{c}{1st digit} &\\multicolumn{2}{c}{2nd digit} 
                        \\\\\n","& Cases & Deaths &Cases & Deaths  & Cases & Deaths&Cases & Deaths    \\\\\n")
  print(xtable(N,dig=0,align="rcccccccc",label=file,caption=tab.caption),add.to.row=addtorow, 
        include.colnames = FALSE,
        file=file)
}

############################################################################
############################ Extract pvalues ###############################
############################################################################


extractPvals<-function(l1.day,l2.day,l1.cum,l2.cum,B=50){
  dfx=rbind(l1.day$df.x,l2.day$df.x,l1.cum$df.x,l2.cum$df.x)
  dfx=cbind(dfx,type.digit=c( rep("1stDaily",nrow(l1.day$df.x)) , 
                              rep("2ndDaily",nrow(l2.day$df.x)),
                              rep("1stCum",nrow(l1.cum$df.x)) , 
                              rep("2ndCum",nrow(l2.cum$df.x))))
  dfn=rbind(l1.day$df.n,l2.day$df.n,l1.cum$df.n,l2.cum$df.n)
  dfn=cbind(dfn,type.digit=c( rep("1stDaily",nrow(l1.day$df.n)) , 
                              rep("2ndDaily",nrow(l2.day$df.n)),
                              rep("1stCum",nrow(l1.cum$df.n)) , 
                              rep("2ndCum",nrow(l2.cum$df.n)) ))
  p01=  log(1+1/1:9)/log(10)
  p02=sapply(0:9,function(d) sum(log(1+1/(d+10*1:9))/log(10)))
  v.what=unique(dfx$what)
  v.who=unique(dfx$who)
  v.type.digit=unique(dfx$type.digit)
  
  tmp.df<-tmp2.df<-NULL
  for (e.what in v.what){
    for (e.digit  in v.type.digit){
      e2.digit=ifelse(e.digit %in% c("1stDaily","1stCum"),"1st","2nd")
      switch(e2.digit,"1st"={nb.dig=9;p0=p01;plus=0},"2nd"={nb.dig=10;p0=p02;plus=1})
      N=NULL
      for (e.who in v.who){
        df2x=filter(dfx,((what==e.what) & (who==e.who) & (type.digit==e.digit)))
        df2n=filter(dfn,((what==e.what) & (who==e.who) & (type.digit==e.digit)))
        n=as.numeric(unlist(strsplit( unlist(strsplit(as.character(df2n$n),")")) ,split="="))[2])
        N=rbind(N,df2x$freq*n)
        N.tmp=df2x$freq*n
        tmp.nm=as.numeric(as.character(df2x$digit))
        data=unlist(sapply(1:length(tmp.nm),function(i) rep(tmp.nm[i],N.tmp[i])))
        count=rep(0,nb.dig)
        tab=table(data)
        ind=as.numeric(names(tab))
        count[ind+plus]<-c(tab)
        pval=chisq.test(count,p=p0,simulate.p.value = TRUE,B=B)$p.value
        tmp.df=rbind(tmp.df,data.frame(who=e.who,what=e.what,type.digit=e.digit,pval=pval))
      }
      N.china=rbind(N[2,],apply(N[-2,],2,sum))
      pvalAll=chisq.test(N,simulate=TRUE,B=B)$p.val
      pvalChina=chisq.test(N.china,simulate=TRUE,B=B)$p.val
      tmp2.df=rbind(tmp2.df,data.frame(what=e.what,type.digit=e.digit,pvalAll=pvalAll,pvalChina=pvalChina))
    }
  }
  dfBenford=tmp.df
  dfGroup=tmp2.df
  
  tmpBH=p.adjust(c(dfBenford$pval,dfGroup$pvalAll,dfGroup$pvalChina),method="fdr")
  tmpBY=p.adjust(c(dfBenford$pval,dfGroup$pvalAll,dfGroup$pvalChina),method="BY")
  nB=nrow(dfBenford);nG=nrow(dfGroup)
  
  dfBenford$BH=tmpBH[1:nB]
  dfBenford$BY=tmpBY[1:nB]
  
  dfGroup$AllBH=tmpBH[(1+nB):(nB+nG)]
  dfGroup$ChinaBH=tmpBH[(1+nB+nG):(nB+2*nG)]
  dfGroup$AllBY=tmpBY[(1+nB):(nB+nG)]
  dfGroup$ChinaBY=tmpBY[(1+nB+nG):(nB+2*nG)]
  
  tmp1=data.frame(test=rep("all",nrow(dfGroup)),
                  dfGroup[,c("what","type.digit","pvalAll",
                             "AllBH","AllBY")])
  tmp2=data.frame(test=rep("china",nrow(dfGroup)),
                  dfGroup[,c("what","type.digit","pvalChina",
                             "ChinaBY","AllBY")])
  names(tmp1)[4:6]=c("none","BH","BY")
  names(tmp2)[4:6]=c("none","BH","BY")
  dfGroup=rbind(tmp1,tmp2)
  names(dfBenford)[4]="none"
  dfBenford=melt(dfBenford,id=1:3,variable.name="type.adj",value.name="pval")
  dfGroup=melt(dfGroup,id=1:3,variable.name="type.adj",value.name="pval")
  
  return(list(dfBenford=dfBenford,dfGroup=dfGroup))
}
  
######################################################################################
######################### Plots pvals
######################################################################################

plotPvals<-function(dfBenford,dfGroup,path=NULL){
  digit.labs <- c(expression("1st digit \n Daily data"), 
                  expression("2nd digit \n Daily data"), 
                  expression("1st digit \n Cumulative data"), 
                  expression("2nd digit \n Cumulative data"))
  names(digit.labs) <- c("1stDaily","2ndDaily","1stCum","2ndCum")
  
  nml=levels(allpvals$dfBenford$who)
  tmp.who<-tmp.whol<-NULL
  if ("allchina" %in% nml){
    tmp.who=c(tmp.who,"allchina")
    tmp.whol=c(tmp.whol,"China")
  }
  if ("allcanada" %in% nml){
    tmp.who=c(tmp.who,"allcanada")
    tmp.whol=c(tmp.whol,"Canada")
  }
  if ("allusa" %in% nml){
    tmp.who=c(tmp.who,"allusa")
    tmp.whol=c(tmp.whol,"USA")
  }
  if ("allfrance" %in% nml){
    tmp.who=c(tmp.who,"allfrance")
    tmp.whol=c(tmp.whol,"France")
  }
  
  adj.labs=c("Raw values","Benjamini-Hocherg","Benjamini-Yekutieli")
  names(adj.labs)=c("none","BH","BY")
  neworder <- c("none","BH","BY")
  dfBenford <- transform(dfBenford,type.adj=factor(type.adj,levels=neworder))
  
  who.labs <- tmp.whol
  names(who.labs) <- tmp.who
  neworder <- tmp.who
  dfBenford <- transform(dfBenford,who=factor(who,levels=neworder))
  
  what.labs <- c("Confirmed cases", "Deaths")
  names(what.labs) <- c("confirm","dead")
  neworder <- c("confirm","dead")
  dfBenford <- transform(dfBenford,what=factor(what,levels=neworder))

  txt=TeX("P-values (in %) for $\\chi^2$ goodness-of-fit tests")
  col.ctry=c('#999999','#E69F00','#56B4E9',"#009E73","#F0E442")
  dfBenford$pval=round(100*dfBenford$pval,1)
  write(min(dfBenford$pval),file=paste(path,"minpval.tex",sep=""))
  vj=sapply(1:(length(who.labs)*length(what.labs)*length(digit.labs)),
            function(i) ifelse(dfBenford$pval[i]<6,-.2,1.5))
  m=dfBenford$pval
  col.ctry=c("#EFC000FF","#868686FF","#CD534CFF","#0073C2FF")
  
  pBenford<-ggplot(data=dfBenford, aes(x = type.digit, y = pval,col=who,shape=what))+
    theme_bw()+
    geom_point(size=3,position=position_jitter(width = 0.25, height = 1),alpha=.7)+
    theme(legend.position = "bottom",legend.direction = "horizontal", 
          legend.box = "vertical")+
    theme(axis.text.x=element_text(color = "black", 
                                   size=10, angle=30, vjust=.8, hjust=0.8)) +
    scale_color_manual(values=col.ctry,labels=who.labs)+
    scale_shape_discrete(labels=what.labs)+
    scale_x_discrete(labels=digit.labs)+
    labs(col="Countries",shape="Type of data")+
    geom_hline(yintercept  = 5,linetype=2,alpha=.5,col="Darkred")+
    facet_grid(~type.adj,labeller=labeller(type.adj=adj.labs))+
    xlab("")+ylab("")+ggtitle(txt)  
    
  ######################## PLot pvalsGroup
  
  test.labs=c("China vs Canada vs US vs France","China vs Others")
  names(test.labs) <- c("all","china")
  
  adj.labs=c("Raw values","Benjamini-Hocherg","Benjamini-Yekutieli")
  names(adj.labs)=c("none","BH","BY")
  neworder <- c("none","BH","BY")
  dfGroup <- transform(dfGroup,type.adj=factor(type.adj,levels=neworder))
  
  neworder <- c("confirm","dead")
  dfGroup <- transform(dfGroup,what=factor(what,levels=neworder))
  neworder <- c("all","china")
  dfGroup <- transform(dfGroup,test=factor(test,levels=neworder))
  
  txt=TeX("P-values (in %) for $\\chi^2$ independence tests")
  dfGroup$pval=round(100*dfGroup$pval,1)
  col.test=c("#EFC000FF","#0073C2FF")
  pGroup<-ggplot(dfGroup,aes(x=type.digit,y=pval,shape=what,colour=test))+
    theme_bw()+
    geom_point(size=3,position=position_jitter(width = 0.25, height =1),alpha=.7)+   
    theme(legend.position = "bottom",legend.direction = "vertical", 
          legend.box = "horizontal")+
    theme(axis.text.x=element_text(color = "black", 
                                   size=10, angle=30, vjust=.8, hjust=0.8)) +
    scale_color_manual(values=col.test,labels=test.labs)+
    scale_shape_discrete(labels=what.labs)+
    scale_x_discrete(labels=digit.labs)+
    labs(col="Categories of countries",shape="Type of data")+
    facet_grid(~type.adj,labeller=labeller(type.adj=adj.labs))+
    geom_hline(yintercept  = 5,linetype=2,alpha=.5,col="Darkred")+
    xlab("")+ylab("")+ggtitle(txt)
  
  write(min(dfGroup$pval),file=paste(path,"minpvalsGroup.tex",sep=""))
  write.csv(dfBenford,file=paste(path,"pvalBenford.csv",sep=""))
  write.csv(dfGroup,file=paste(path,"pvalGroup.csv",sep=""))
  return(list(pBenford=pBenford,pGroup=pGroup))
}

