#!/bin/bash
#
# Process status.tar.gz files customers send in.  These files are tar 
# archives of the Kerio mail store status.fld files.  They contain
# both the folder structure of the store, and the status.fld files which
# contain a lot of information about each folder.
#

echo
echo "Deepest folder paths:"
echo "(could cause failures to traverse too deep in some libs, clients or filesystems)"
ls -R * | awk '{print length " " $0}' | sort -n | tail

echo
echo "Most folders per user:"
echo "(can affect mail program startup time or refresh time)"
for u in `ls -d [^#]*/`; do echo -n $u; find $u -name status.fld -print | wc -l; done | sort -n -k2 | tail

echo
echo "Most items per folder:"
echo "(can affect re-index time, and can lock out users or groups of users)"
#find . -name status.fld -exec grep -H ^M {} \; | sed 's/:M/:/' | sort -n -t: -k2 | tail
find . -name status.fld -exec grep -H ^M {} \; | sed 's/^\(.*\)\/status.fld:M\([0-9][0-9]*\)/\2 \1/' | sort -n -t: -k1 | tail

echo
echo "Largest size per folder:"
echo "(can affect loading time of messages, indexing, synchronization for office 2011 or KOFF, etc)"
#find . -name status.fld -exec grep -H ^S {} \; | sed 's/:S/:/' | sort -n -t: -k2| tail
#find . -name status.fld -exec grep -H ^S {} \; | sed 's/^\(.*\)\/status.fld:S\([0-9][0-9]*\)/\2 \1/' | sort -n -k1 | tail
find . -name status.fld -exec grep -H ^S {} \; | sed 's/^\(.*\)\/status.fld:S\([0-9][0-9]*\)/\2 \1/' | sort -n -k1 | tail | sed 's/^\([0-9]\{1,3\}\)[0-9]\{9\} /\1G /' | sed 's/^\([0-9]\{1,3\}\)[0-9]\{6\} /\1M /' | sed 's/^\([0-9]\{1,3\}\)[0-9]\{3\} /\1K /'

echo
echo "Calendars with the most items:"
echo "(large ones could be caused by duplication bugs in clients and can cause user or groups of users to be locked out and phone batteries to die quickly)"
find . -name status.fld -exec grep -l T2 {} \; -exec grep -H M {} \; | sed -n 's/^\(.*\):M\([0-9]*\)/\2 \1/p' | sort -n | tail

echo
echo "Largest Calendars in bytes"
echo "(very large calendars can be caused by duplication of items within events such as ATTENDEE lines, MIME boundaries, etc.  Of course, can lock out user accounts, cause CPU issues in clients, etc.  Next step would be to find largest calendar events in the large calendar folders)"
#find . -name status.fld -exec grep -l T2 {} \; -exec grep -H S {} \; | sed -n 's/^\(.*\):S\([0-9]*\)/\2 \1/p' | sort -n | tail
find . -name status.fld -exec grep -l T2 {} \; -exec grep -H S {} \; | sed 's/^\(.*\)\/status.fld:S\([0-9][0-9]*\)/\2 \1/' | sort -n -k1 | tail | sed 's/^\([0-9]\{1,3\}\)[0-9]\{9\} /\1G /' | sed 's/^\([0-9]\{1,3\}\)[0-9]\{6\} /\1M /' | sed 's/^\([0-9]\{1,3\}\)[0-9]\{3\} /\1K /'

echo
echo "Mailbox sizes"
set */*
for u in "$@"; do grep -Eor 'S[0-9].*$' "$u" | awk -F:S '{sum+=$2}END{printf "%10.02fG ", sum/1000000000}'; echo $u ; done | sort -n| tail
