Merger/filtering script

Typically, a Starsim run will result in an output which is a file, or a series of files with names like gstar.1.fz, gstar.2.fz etc. Regardless of whether we run locally or on the Grid, there is a small chance that the file(s) will be truncated. To guard against the possibility of feeding up incorrect data to the reconstruction stage, and/or performing a split or merger of a few file, a KUMAC script has been developed. It will, among other things, discard incomplete events, and produce serially numbered files with names like rcf1319_01_100evts.fzd, which contains the name of the dataset, the serial number of the file (distinct from the numbering of the input files), and the number of events contained therein, all of which is helpful in setting up or debugging the production. It has recently been simplified (although still not easily readable), and wrapped into a utility shell script, which does preparation work as well as cleanup. The resulting script, named "filter.tcsh", takes a single argument which is assumed to be the name of the dataset (and which is then used in naming the output files).

#! /usr/local/bin/tcsh -f
#
# remove the old list of files
if( -e process.list ) then
rm process.list
endif
#
if( -e filter.kumac ) then
rm filter.kumac
endif
ls gstar.*.fz | sed -e 's/[gstar.|.fz]//g' | sort -n > process.list
#
# clean the trash bin before the next run, re-create
rm -fr trash
mkdir trash
echo `du --block-size=1000K -s | cut -f1` MB in the current directory
echo `df --block-size=1000K . | tail -1 | sed -e 's/\ *[0-9]*\ *[0-9]*\ *//' | sed -e 's/\ .*//g'` MB available on disk
cat<<EOF>>filter.kumac
macro filter name
input='gstar'
mess Start with filenames [input].*.fz, converting to [name]
ag/version batch
option stat
option date
option nbox
filecase keep
pwd =\$shell('pwd');
nfiles=\$shell('cat process.list | wc -l | sed -e "s/[\ ]*//g"');

message Starting to process [nfiles]
* trace on
ve/cr runs([nfiles]) I
ve/read runs process.list
ve/pri runs

if (\$Len([name]).eq.0) then
message cannot define current directory in [pwd]
exit
endif
namz=[name]
out =\$env('OUTDIR')
if ([out].ne.'') then
namz = [out]/[name]/[name]
endif

lenb = 1000
message reading
ve/cr id(3) I
* ve/read id N
message reading complete
nt=[nfiles] | total number of files to process
n1=runs(1) | first input file
n2=runs([nfiles]) | last input file
mm = 0 | number of output files
nn = 0 | number of processed files
cnt = 0 | total number of events in this job
cno = 0 | number of events when output has been opened
nev = 0 | number of events in this output
ii = 0 | input active flag
io = 0 | output active flag
len0= 1200 | minimum output file len
len1= [len0]+200 | average output file len - stop at end-of-file
len2= [len1]+200 | maximum output file len - stop always
ni = [n1] | first input file
no = 0 | skip up to this file
nd = [n1] | file to delete
ntrig = 10
*
if (\$fexist(nn).gt.0) then
ve/read id nn
na=id(1); message [na] input files already done
no=id(2); message first input files up to gstar.[no]
mm=id(3); message first output files up to [name].[mm]
mm=[mm]-1;
endif
*
hist = [name].his
if (\$fexist([hist]).gt.0) then
shell mv [hist] old.his
* call HRGET(0,\$quote([hist]),' ')
endif
ghist [hist]
cdir //pawc
mdir cont
if (\$fexist(old.his).gt.0) then
call HRGET(0,\$quote(old.his),' ')
endif

gfile p gstar.[n1].fz
mode control prin 1 hist 0 | simu 2
gexec ../.lib/control.sl
gexec ../.lib/index.sl

message loaded libs

title=merging runs [n1]-[n2] in [name]
fort/file 66 [name].ps; meta 66 -111
next; dcut cave x .1 10 10 .03 .03
Set DMOD 1; Igset TXFP -60; Igset CHHE .35
ITX 5 19.5 \$quote([title])
ITX .5 .1 \$quote([pwd])
*
* do ni = [ni],[n2]
frst=1
ag/version interactive
do iev=1,1000000000000
* new input file ?
if ([ii].eq.0) then
do nfoo=[frst],[nfiles]
ni = runs([nfoo])

file = [input].[ni].fz
filz = [input].[ni].fz.gz
hist = [input].[ni].his
message processing index [nfoo] out of [nfiles]
ve/print runs([nfoo])
*
if (\$fexist([file]).gt.0) then
message loop with [file]
gfile p [file]
if (\$iquest(1).eq.0) then
ii = 1
nn = [nn]+1
if (\$fexist([hist]).gt.0) then
if (\$hexist(-1).eq.0) then
call HRGET(0,\$quote([hist]),' ')
else
call HRGET(0,\$quote([hist]),'A')
endif
endif
call indmes(\$quote([file]))
goto nextf
* iquest:
endif
* fexist:
endif
enddo
goto nexto
endif

nextf:
* new output file ?
if ([io].eq.0) then
mm = [mm]+1
if ([mm].lt.10) then
output=[namz]_0[mm]
else
output=[namz]_[mm]
endif
io = 1
cno = [cnt]
gfile o [output].fzt
iname = [name]_[mm].fzt
call indmes(\$quote([iname]))
endif

* processing next event
call rzcdir('//SLUGRZ',' ')
trig [ntrig]
evt = \$iquest(99)

if (\$iquest(1).ne.0) then
ni = [ni]+1
frst=[frst]+1
ii = 0
endif
if ([ii].eq.0) goto nexto
* get output file length in MB:
cmd = ls -s [output].fzt
len = \$word(\$shell([cmd]))
len = [len]/[lenb]
* mess wrquest len=[len] ii=[ii] evt=[evt]
if ([len].lt.[len0]) goto nextev
if ([len].lt.[len1] .and. [ii].gt.0) goto nextev
if ([len].lt.[len2] .and. [ii].gt.0 .and. [evt].eq.0) goto nextev
* output file done
nexto:
cnt = \$iquest(100)
if ([cnt]<0) then
cnt = 0
endif
nev = [cnt]-[cno]
io = 0
*
if ([nev].gt.0) then
if ([nev].lt.199999) then
* terminate last event, clear memory
call guout
call gtrigc
gfile o
* rename temp file into the final one:
cmv = mv [output].fzt [output]_[nev]evts.fzd
i = \$shell([cmv])
endif
endif
message files inp = [ni] out = [mm] cnt = [cnt] done
*
if ([ii].eq.0) then
nj = [ni] - 1 | this file was finished, ni is NEXT to read
mj = [mm] + 1 | this is next to start write after the BP
message writing breakpoint [nn] [ni] [mj]
ve/inp id [nn] [ni] [mj]
ve/write id nn i6
ntrig = 10
************************************
* moving files to TRASH
while ([nd].lt.[ni]) do
filed = [input].[nd].fz
alrun = *.[nd].*
if (\$fexist([filed]).gt.0) then
shell mv [alrun] trash/
endif
nd = [nd] + 1
endwhile
************************************
else
ntrig = [ntrig] + 1
endif
if ([ni].gt.[n2]) goto alldone
nextev:
enddo

* control histogram
alldone:
if ([nn].eq.[nt]) then
shell touch filter.done
endif
cdir //pawc
tit = files [n1] - [n2] in set [name]
title_global \$quote([tit])
next; size 20.5 26; zone 2 4;
hi/pl 11; hi/pl 12; hi/pl 13; hi/pl 14
if (\$hexist(1).gt.1) then
n/pl 1.ntrack; n/pl 1.Nvertx; n/pl 1.NtpcHit; n/pl 1.Ntr10
endif
swn 111 0 20 0 20; selnt 111
ITX 2.0 0.1 \$quote([pwd])
close 66; meta 0
physi
exit
return
EOF
echo ------------------------------------------------------------------
echo Activating starsim for dataset $1
$STAR_BIN/starsim -w 1 -g 40 -b ./filter.kumac $1
# cleanup
rm ZEBRA.O process.list nn index paw.metafile *.his *.ps filter.done filter.kumac