#! /bin/csh
#++++++++++++++++
# Copyright:    (C) 2008-2017 UDS/CNRS
# License:       GNU General Public License
#.IDENTIFICATION Detex
#.LANGUAGE       C-shell
#.AUTHOR         Francois Ochsenbein [CDS]
#.ENVIRONMENT    CDS Catalogues
#.KEYWORDS
#.VERSION  1.0   15-Feb-1992
#.VERSION  1.1   21-Jun-1998: Allow HTML
#.VERSION  1.2   06-May-1999: Allow tsv : csv -d
#.VERSION  1.3   28-Jun-2001: Verify better the Numeric fields
#.VERSION  1.4   16-Apr-2005: Problem with <(followed by numeric)
#.VERSION  1.5   16-Feb-2014: Indepen,dent from cats
#.PURPOSE        Generate awk commands to convert a Table
#.COMMENTS       The program has 2 names: Detex / Dehtml
#----------------
#
### No argument: identical to -help
if ($#argv < 1) then
    exec $0 -help
endif

#	Definition of some variables
#
#set verbose
alias echo2 echo
set pgm = `basename $0`
set usage = "Usage: $pgm "[-html|-vot]" file ..."
set tt = "$pgm"
if (! $?CATS) then
    set dir = `dirname $0`
    set rot = `dirname $dir`
    if (-r $rot/lib/bibdef.tex) then
        setenv CATS $rot
    else if (-r /home/cats/lib/bibdef.tex) then
        setenv CATS /home/cats
    else if (-r $HOME/cats/lib/bibdef.tex) then
        setenv CATS $HOME
    else
        echo2 "#...Missing configuration files in $rot/lib ! "
	exit 1
    endif
endif
set o = ()
#
#	Look for options
#
set verbop = 0
set dontop = 0			# -n option
set remove = 0			# -rm option
set cont   = 0			# -cont
set force  = 0			# -force
set format = 0
set cvprog = "tex2a $tt"	# Default latex
set defile = $CATS/lib/bibdef.tex
set filter = cat

if ($0 =~ *html) then
    set argv = (-html $argv:q)
endif

while ( $#argv >= 1 )
    if ($verbop == 1)	echo2 -n " $1"
    switch ("$1")
    case "-v":		# Verbose mode
    	set verbop = 1
    	echo2 -n "#...${pgm}: examine argument "
    	breaksw
    case "-h":
    case "-help":
    	echo2 "$usage"
	echo2 "     -html: input is in HTML"
	echo2 "     -vot : input is in VOTable"
	echo2 "  file is a LaTeX, html or VOTable file to convert"
    	exit 0
    case "-ht*":
	set defile = ""
	set cvprog = "sgml2 -f $CATS/lib/tsv-def.htm $tt.tmp"
	@ format += 1
	breaksw
    case "-tex":
	set cvprog = "tex2a $tt"	# Default latex
	set defile = $CATS/lib/bibdef.tex
	@ format += 1
	breaksw
    case "-vot*":
	set defile = ""
        set cvprog = "sgml2 -s -f  $CATS/lib/votable_tsv.dic $tt.tmp"
	set filter = ( gawk -f $CATS/lib/Detex-vot.awk )
	@ format += 1
	breaksw
    case "-csv":		# ;-sep values
	set cvprog = tr
	if (! $?delim) then
	    set delim = ';'
	endif
	set defile = ""
	@ format += 1
	breaksw
    case "-tsv":
	set cvprog = (cat $tt.tmp)
	set defile = ""
	@ format += 1
	breaksw
    case "-[dtF]?":
	set delimarg = $argv[1]:q
	set delim = "`echo $argv[1]:q | cut -c3-`"
	breaksw
    case "-*":
    	echo2 "*** ${pgm}: Bad option $1"
    	echo2 "$usage"
    	exit 1
    default:		# Assume LaTeX
	if ($format == 0) then
	    if ($verbop) then
	        echo2 " [implied category]"
	        set ov = "-v"
	    else
	        set ov = ""
	    endif
	    if ($1 =~ *.sgm*) exec $0 $ov -html $*
	    if ($1 =~ *.htm*) exec $0 $ov -html $*
	    if ($1 =~ *.tsv*) exec $0 $ov -tsv $*
	    if ($1 =~ *.vot*) exec $0 $ov -vot $*
	    if ($1 =~ *.xml*) exec $0 $ov -vot $*
	    if ($1 =~ *.csv*) then
	        if ($?delimarg) then
		    exec $0 $delimarg:q -csv $*
		endif
	        exec $0 -csv $*
	    endif
	else
	    if ($verbop) echo2 "  [go]"
	endif
	if ("$defile" =~ *.tex) then
	    fcat $defile $1 | sed '/^\\%/d' | sed 's/ *$//' >! $tt.tex
	else
	    #echo cat $defile $1 ...
	    #set verbose
	    # Change <TD>~</TD> ==> <TD></TD>
	    fcat $defile $1 | reformat -c -w 8000 \
	    | sed 's/<\([0-9]\)/\&lt;\1/g'  \
	    | sed 's%>[~]</%></%g' >! $tt.tmp
	endif
	echo "#...Executing $cvprog > $1.T"
	if (-f $1.T) then
	    confirm "#---Remove $1.T" && rm -f $1.T
	endif
	if ("$cvprog" == tr) then
	    tr $delim:q '\11' < $tt.tmp | acut -d'	' -f1-j > $1.T
	else if ("$filter" == "cat") then
	    rm -f $1.e
	    $cvprog | acut -d -f1-j > $1.T
	else
	    (($cvprog | $filter) | acut -d -f1-j > $1.T) |& tee $1.e
	endif
	#set verbose
	# Reformat the output from anafile -ccg
	if (-r $1.e) set o = (-v f=$1.e)
	anafile -d -ccg $1.T | gawk $o '\
	 BEGIN{ if (f != "") { while((getline < f)>0) { \
	    if($1 == "1X") { n++; F[n] = $0 }}}}\
	 /^#Proposed acut/   { prog=1 } \
	 /^#Proposed format/ { prog=0 } \
	 /^------/ {  sub(/-/, "#") } \
	 /^[01]X .*Col[1-9]/ { i=index($0,"Col"); i=substr($0,i+3)+0; \
	    sub(/[AaEeIiFfDd][.][.][.]/,  $2, F[i]) } \
	 { if(prog==0) printf "#"; print } \
	 END{print "#################################################"; \
	     for(i=1;i<=n;i++) print "#" F[i] }' > $1.s
	#gawk -v fil=$1 'BEGIN { FS="\t"; flds = 0 } \
	#  function compress(s){ gsub(/ +/," ",s); \
	#       sub(/^ /,"",s); sub(/ $/,"",s); return(s) }\
	#    { if (NF>flds) { while(flds<NF) L[++flds] = 0}\
	#      for (i=1; i <= NF; i++) { x=compress($i); \
	#	sub(/[+-]*[0-9.Ee+-]*/, "", x); if(x!="") A[i]+=1;\
	#	n = length($i); id = index($i, "."); if (id != 0) id = n-id; \
	#	ii = L[i] - nd[i] + id ; if (n < ii) n = ii ; \
	#	if (n > L[i]) L[i] = n; if (id > nd[i]) nd[i] = id }} \
	#   END { printf("# To Convert file %s\n\nacut -d^\\t^\\\n", fil);\
	#	for (i=1; i <= flds; i++) { \
	#	  if (L[i] == 0) continue;\
	#	  if(A[i]>0) m="-"; else m="";\
	#	  printf("\t-i^ ^ -f%d%%%s%d", i, m,L[i]);\
	#	  if (A[i] > 0) { j = "l"; F[i] = "A" L[i]}\
	#	  else if (nd[i] == 0) {j = "r"; F[i] = "I" L[i]}\
	#	  else { j = "." nd[i]; F[i] = "F" L[i] j;}\
	#	  print j "\\";\
	#	} print " | sed ^s/ *$//^ ";\
	#	print "#... Suggested Format:";\
	#	o = 2; for (i=1; i <= flds; i++) { \
	#	  if (L[i] == 0) continue;\
	#	  printf("#1X-%3d %-6s  ---  Field_%d\n", o, F[i], i);\
	#	  o += L[i] + 1; \
	#	}}'  $1.T | sed "s/\^/'/g" > $1.s
	# Merge .s + .e
	confirm "----Transform  TSV (.T) into Ascii ($1.A)"
	if ($status) then
	    if (-r $1.s) echo "#===Transformation TSC>Ascii in: $1.s"
	    gawk '/^##Proposed format/{ k=1; print; next } \
	      /^#########/ { k=2 } \
	      { if(k<1) next; if (k==2) sub(/^#/, ""); print } '\
	    $1.s > $1.f
	    if (-r $1.f) echo "#===Format in file: $1.f"
	else
	    echo -n "#...sed '"'/^$/d'"' $1.T | (source $1.s) > $1.A "
	    sed '/^$/d' $1.T | (source $1.s) > $1.A
	    echo "(done)"
	    gawk '/^#... Suggested Format/ { p=1; next}\
	         {if (p == 1) print substr($0,2)}' $1.s > $1.f
	    echo "====$1.A Ready. See suggested format in $1.f"
	    confirm "----Try Anafile -f1w $1.f $1.A"
	    if (! $status) then
	        anafile -f1w $1.f $1.A | less
	    endif
	endif
    	breaksw
    endsw
    shift
end
if ($verbop == 1)	echo2 "=== ${pgm} (END)"
#rm -i $tt.*
#
exit 0
