#!/bin/bash

# Gabriel L. Somlo - 2004,2005

# QueryTracker is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any
# later version.
#
# QueryTracker is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with QueryTracker; see the file COPYING.  If not, write to the Free
# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.  


CFGFILE=/home/QueryTracker/QueryTracker.conf
. ${CFGFILE}

THRESHOLDS="0.05 0.10 0.15 0.20 0.25 0.30 0.35 0.40 0.45 0.50 0.55 0.60 0.65 0.70 0.75 0.80 0.85 0.90 0.95"

HEAD="
<HTML>
 <HEAD>
  <TITLE>QueryTracker: %s</TITLE>
 </HEAD>
 <BODY>
  <H1>QueryTracker: %s</H1>
  <HR>
"
TAIL="
  <HR>
  <a href="index.cgi">Home</a></p>
 </BODY>
</HTML>
"

# acquire mutex
lockfile ${MUTEX}

echo "Content-type: text/html"
echo

read INPUT
### log activity:
echo "$(date +%D-%R):${REMOTE_USER}:${REMOTE_ADDR}:${INPUT}" >> activity.log

RDOCS=""
NDOCS=""
if [ "${INPUT}" != "" ]; then
  # parse input commands
  for CMD in ${INPUT//\&/ }; do
    case ${CMD} in
      qsel=*)
        QSEL=${CMD#*=}
      ;;
      qid=*)
        QID=${CMD#*=}
      ;;
      qtx=*)
        QTX=${CMD#*=}
      ;;
      qrt=*)
        QRT=${CMD#*=}
      ;;
      qqt=*)
        QQT=${CMD#*=}
      ;;
      submit=*)
        SUBMIT=${CMD#*=}
      ;;
      resdate=*)
        RESDATE=${CMD#*=}
      ;;
      *=rel)
        RDOCS="${RDOCS} ${CMD%%=rel}"
      ;;
      *=non)
        NDOCS="${NDOCS} ${CMD%%=non}"
      ;;
    esac
  done
  # execute given command
  case ${SUBMIT} in
    New)
      if [ ! "${QID}" -o ! "${QTX}" ]; then
        ERRMSG="New: Query ID and Query Terms are both needed!"
      elif [ "$(grep "^${REMOTE_USER}\.${QID}:" ${QFILE})" != "" ]; then
        ERRMSG="New: Query ID \"${QID}\" already exists!"
      else
        echo "${REMOTE_USER}.${QID}:${QTX}:${QRT}:${QQT}" >> ${QFILE}
      fi
    ;;
    View)
      HITLIST="${VARDIR}/${REMOTE_USER}.${QSEL}/${DISFILE}"
      if [ ! "${QSEL}" ]; then
        ERRMSG="View: No query has been selected!"
      elif [ ! "$(grep "^${REMOTE_USER}\.${QSEL}:" ${QFILE})" ]; then
        ERRMSG="View: the requested query \"${QSEL}\" does not exist!"
      elif [ ! -f ${HITLIST} ]; then
        ERRMSG="View: no results available for query \"${QSEL}\"!"
      else
        QTX=$(grep "^${REMOTE_USER}\.${QSEL}:" ${QFILE} | cut -d: -f 2)
        RESDATE=$(ls -l ${HITLIST} | awk -F' -> ' '{print $2}' | cut -d/ -f1)
        RESDSTR=$(ls -l ${HITLIST} | awk '{print $6 " " $7 ", at " $8}')
        # we need the day-of-month number as a random seed
        RESDAY=$(ls -l ${HITLIST} | awk '{print $7}')
        # do we have a preload version ?
        X_PRELOAD=$(grep "^preload-${REMOTE_USER}\.${QSEL}:" ${QFILE})
        P_HITLIST="${VARDIR}/preload-${REMOTE_USER}.${QSEL}/${DISFILE}"

        printf "${HEAD}" "user=${REMOTE_USER}; view=${QSEL}" \
                         "user=${REMOTE_USER}; view=${QSEL}"
        cat << EOT
<font color=grey>
Notes:<ul>
<li>Select <b>Relevant</b>=<i>Yes</i> if you thought the document was relevant and want it added to the query profile. This will be used to help focus future searches on your topic of interest.
<li>Select <b>Relevant</b>=<i>No</i> if you didn't think the document was relevant. This will let QueryTracker know that you <b>did</b> read the document and weren't interested.
<li>Unless you select <i>Yes</i> or <i>No</i> for <b>Relevant</b>, QueryTracker assumes you didn't read the document, and will keep disseminating it to you for as long as it is found by the search engine.
<li>When you are done making your selections, press the <b>Feedback</b> button at the bottom of the page to submit them to QueryTracker. You can do this more than once. If you make multiple conflicting selections on the same document, all but the last selection for that document will be ignored.
<li>If you encounter the same document content under a different URL, please give it the same rating you gave to the original document. This will help future studies on redundancy detection.
</ul>
</font>
<form method=post action=index.cgi>
 <input type=hidden name=qsel value="${QSEL}">
 <input type=hidden name=resdate value="${RESDATE}">
<table border>
 <tr><td colspan=2 align=center>
   <table align=left><tr><td>Terms: ${QTX//\+/ };<br>
                             Generated on ${RESDSTR}</td></tr></table>
 <tr><th>Relevant?</th><th>Link</th></tr>
EOT
        # we want new and relevant changed documents only (i.e., no unc, no chg)
        ## limit 20 randomly selected results !
#bak#   grep -v ' \(Unc\|Chg\)$' ${HITLIST} \
#bak#          | ./unsort.ksh ${RESDAY} | head -20 | while read URL STS DFN; do
        { grep -v ' \(Unc\|Chg\)$' ${HITLIST} | awk '{print $1 " " $3}'
          if [ "${X_PRELOAD}" ]; then
            # mark filenames from preload hitlist with "p." to recognize them
            grep -v ' \(Unc\|Chg\)$' ${P_HITLIST} | awk '{print $1 " p" $3}'
          fi
        # merge regular and preload displays by unique URL
        # (uses '-' to concatenate fname and p.fname occurrences)
        } | ./merge_preload.pl \
          | ./unsort.ksh ${RESDAY} | head -20 | while read URL DFN; do
          cat << EOT
 <tr><td><input type=radio name="${DFN}" value=rel>Y
         <input type=radio name="${DFN}" value=non>N</td>
     <td><a href=${URL}>${URL}</a></td></tr>
EOT
        done
        cat << EOT
 <tr><td colspan=2><input type=submit name=submit value=Feedback>
                   (Update profile with selected relevant documents)</td></tr>
</table>
</form><br>
EOT
        echo "${TAIL}"
        rm -f ${MUTEX}
        exit 0
      fi
    ;;
    Feedback)
      for D in ${RDOCS//\-/ }; do
        if [ "${D}" = "${D#p}" ]; then
          # no 'p' prefix in front of filename -- update regular query
          echo "${RESDATE}/${D} R" >> ${VARDIR}/${REMOTE_USER}.${QSEL}/${FBKLOG}
        else
          # 'p' prefix in front of filename -- strip, and update preload query
          echo "${RESDATE}/${D#p} R" >> ${VARDIR}/preload-${REMOTE_USER}.${QSEL}/${FBKLOG}
        fi
      done
      for D in ${NDOCS//\-/ }; do
        if [ "${D}" = "${D#p}" ]; then
          # no 'p' prefix in front of filename -- update regular query
          echo "${RESDATE}/${D} N" >> ${VARDIR}/${REMOTE_USER}.${QSEL}/${FBKLOG}
        else
          # 'p' prefix in front of filename -- strip, and update preload query
          echo "${RESDATE}/${D#p} N" >> ${VARDIR}/preload-${REMOTE_USER}.${QSEL}/${FBKLOG}
        fi
      done
      ERRMSG="Feedback: Relevant documents for \"${QSEL}\" added to profile."
    ;;
    Edit)
      if [ ! "${QSEL}" ]; then
        ERRMSG="Edit: No query has been selected!"
      elif [ ! "$(grep "^${REMOTE_USER}\.${QSEL}:" ${QFILE})" ]; then
        ERRMSG="Edit: the requested query \"${QSEL}\" does not exist!"
      else
        Q=$(grep "^${REMOTE_USER}\.${QSEL}:" ${QFILE})
        QTX=$(echo ${Q}| cut -d: -f2)
        QRT=$(echo ${Q}| cut -d: -f3)
        QQT=$(echo ${Q}| cut -d: -f4)
        printf "${HEAD}" "user=${REMOTE_USER}; edit=${QSEL}" \
                         "user=${REMOTE_USER}; edit=${QSEL}"
        echo "<form method=\"post\" action=\"index.cgi\">"
        echo "<table>"
        echo "<tr><th colspan=5 align=left>Edit query terms:</th></tr>"
        echo "<tr><th colspan=5>&nbsp;</th></tr>"
        echo "<tr><td>Query ID:</td><td>Query Terms:</td>"
        echo "<td colspan=2>Thresholds</td>"
        echo "<td>Submit</td></tr>"
        echo "<tr><td colspan=2></td>"
        echo "<td>Rank</td><td>Query</td>"
        echo "<td></td></tr>"
        echo "<tr><td><input type=hidden name=qid value=\"${QSEL}\">"
        echo "${QSEL}</td>"
        echo "<td><input type=text size=40 maxlength=80 name=qtx value=\"${QTX//\+/ }\"></td>"
        for F in qrt qqt; do
          echo "<td><select name=${F}>"
          case ${F} in
            qrt)
              DEFTHR=${QRT}
            ;;
            qqt)
              DEFTHR=${QQT}
            ;;
          esac
          echo "<option value=\"${DEFTHR}\" selected>${DEFTHR}</option>"
          for T in ${THRESHOLDS}; do
            echo "<option value=\"${T}\">${T}</option>"
          done
          echo "</select></td>"
        done
        echo "<td><input type=submit name=submit value=Commit></td></tr>"
        echo "<tr><td colspan=5>"
        echo "<font color=grey>"
        echo "Note 1: Currently, only <i>implicit-And</i> and <i>Not</i> are "
        echo "supported. <i>Not</i>-ed terms must begin with a \"-\" sign. "
        echo "<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"
        echo "<i>Or</i>&nbsp; and <i>phrases</i> (such as \"word sequence\") "
        echo "are not (yet) supported."
        echo "</font>"
        echo "</td></tr>"
        echo "<tr><td colspan=5>"
        echo "<font color=grey>"
        echo "Note 2: Don't change the default thresholds, unless you "
        echo "have a good reason to do so..."
        echo "</font>"
        echo "</td></tr>"
        echo "</table></form><br>"
        echo "${TAIL}"
        rm -f ${MUTEX}
        exit 0
      fi
    ;;
    Commit)
      if [ ! "${QID}" -o ! "${QTX}" ]; then
        ERRMSG="Commit: Query ID and Query Terms are both needed!"
      else
        X_PRELOAD=$(grep "^preload-${REMOTE_USER}\.${QID}:" ${QFILE})
        cat ${QFILE} | grep -v "^${REMOTE_USER}\.${QID}:" \
                   | grep -v "^preload-${REMOTE_USER}\.${QID}:" > ${QFILE}.tmp
        mv -f ${QFILE}.tmp ${QFILE}
        echo "${REMOTE_USER}.${QID}:${QTX}:${QRT}:${QQT}" >> ${QFILE}
        if [ "${X_PRELOAD}" ]; then
          echo "preload-${REMOTE_USER}.${QID}:${QTX}:${QRT}:${QQT}" >> ${QFILE}
        fi
      fi
    ;;
    Delete)
      if [ ! "${QSEL}" ]; then
        ERRMSG="Delete: No query has been selected!"
      elif [ ! "$(grep "^${REMOTE_USER}\.${QSEL}:" ${QFILE})" ]; then
        ERRMSG="Delete: the requested query \"${QSEL}\"does not exist!"
      else
        cat ${QFILE} | grep -v "^${REMOTE_USER}\.${QSEL}:" \
                   | grep -v "^preload-${REMOTE_USER}\.${QSEL}:" > ${QFILE}.tmp
        mv -f ${QFILE}.tmp ${QFILE}
      fi
    ;;
  esac
fi


# ${INPUT} didn't generate a page and exit, so generate home page:
#
printf "${HEAD}" "user=${REMOTE_USER}" "user=${REMOTE_USER}"

#echo "<font color=purple>"
#echo "All QueryTracker profiles have been reset on May 31 in order to start "
#echo "collecting data for a new experiment.<br>"
#echo "Some documents you've seen before will be disseminated again -- please "
#echo "treat them as new !<br>"
#echo "Thank you for continuing to supply your feedback !"
#echo "</font><hr>"

# print out the error message, if any:
if [ "${ERRMSG}" != "" ]; then
  echo "<font color=red>${ERRMSG}</font></p>"
fi

# get a list of ${REMOTE_USER}s queries
QUERIES=$(grep "^${REMOTE_USER}\." ${QFILE})

# generate html table of ${REMOTE_USER}s queries
echo "<form method=\"post\" action=\"index.cgi\">"
echo "<table>"

echo "<tr><th colspan=5 align=left>Enter a new query:</th></tr>"
echo "<tr><th colspan=5>&nbsp;</th></tr>"
echo "<tr><td>Query ID:</td><td>Query Terms:</td>"
echo "<td colspan=2>Thresholds</td>"
echo "<td>Submit</td></tr>"
echo "<tr><td colspan=2></td>"
echo "<td>Rank</td><td>Query</td>"
echo "<td></td></tr>"
echo "<tr><td><input type=text size=10 maxlength=10 name=qid></td>"
echo "<td><input type=text size=40 maxlength=80 name=qtx></td>"
for F in qrt qqt; do
  echo "<td><select name=${F}>"
  case ${F} in
    qrt)
      DEFTHR=${DRT}
    ;;
    qqt)
      DEFTHR=${DQT}
    ;;
  esac
  echo "<option value=\"${DEFTHR}\" selected>${DEFTHR}</option>"
  for T in ${THRESHOLDS}; do
    echo "<option value=\"${T}\">${T}</option>"
  done
  echo "</select></td>"
done
echo "<td><input type=submit name=submit value=New></td></tr>"
echo "<tr><td colspan=5>"
echo "<font color=grey>"
echo "Note 1: The Query ID is an identifier by which QueryTracker "
echo "keeps track of your query."
echo "</font>"
echo "</td></tr>"
echo "<tr><td colspan=5>"
echo "<font color=grey>"
echo "Note 2: Currently, only <i>implicit-And</i> and <i>Not</i> are "
echo "supported. <i>Not</i>-ed terms must begin with a \"-\" sign. "
echo "<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"
echo "<i>Or</i>&nbsp; and <i>phrases</i> (such as \"word sequence\") "
echo "are not (yet) supported."
echo "</font>"
echo "</td></tr>"
echo "<tr><td colspan=5>"
echo "<font color=grey>"
echo "Note 3: Don't change the default thresholds, unless you "
echo "have a good reason to do so..."
echo "</font>"
echo "</td></tr>"
echo "<tr><td colspan=5>"
echo "<font color=grey>"
echo "Note 4: Once you submit a new query, it will appear in the list of "
echo "existing queries displayed below."
echo "</font>"
echo "</td></tr>"

echo "<tr><td colspan=5>&nbsp;</td></tr>"

if [ ! "${QUERIES}" ]; then
  echo "<tr><td colspan=5 align=center>No queries are currently active "
  echo "for this account</td></tr>"
else
  echo "<tr><th colspan=5 align=left>Select an existing query from the following list:</th></tr>"
  echo "<tr><th colspan=5>&nbsp;</th></tr>"
  echo "<tr><td>Query ID:</td><td>Query Terms:</td>"
  echo "<td colspan=2>Thresholds</td>"
  echo "<td>Select</td></tr>"
  echo "<tr><td colspan=2></td>"
  echo "<td>Rank</td><td>Query</td>"
  echo "<td></td></tr>"
  for Q in $(grep "^${REMOTE_USER}\." ${QFILE}); do
    QID=$(echo ${Q} | cut -d: -f1 | cut -d. -f2)
    QTX=$(echo ${Q} | cut -d: -f2)
    QRT=$(echo ${Q} | cut -d: -f3)
    QQT=$(echo ${Q} | cut -d: -f4)
    echo "<tr><td>${QID}</td><td>${QTX//\+/ }</td>"
    echo "<td>${QRT}</td><td>${QQT}</td>"
    echo "<td align=center><input type=radio name=qsel value=${QID}></td></tr>"
  done
  echo "<tr><td colspan=5 align=center>"
  echo "<input type=submit name=submit value=View> results for, or "
  echo "<input type=submit name=submit value=Edit> / "
  echo "<input type=submit name=submit value=Delete> the selected query"
  echo "</td></tr>"
  echo "<tr><td colspan=5>"
  echo "<font color=grey>"
  echo "Note 1: Select one of the existing queries by clicking on the radio "
  echo "button on the right."
  echo "<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"
  echo "You will then be able to select one of the actions: "
  echo "View results, Edit, or Delete."
  echo "</font>"
  echo "</td></tr>"
fi

echo "<tr><td colspan=5>&nbsp;</td></tr>"

echo "<tr><th colspan=5 align=left><a href=\"file_upload.cgi\">"
echo "Submit your own positive examples here.</a></th></tr>"
echo "<tr><td colspan=5>"
echo "<font color=grey>"
echo "Note: You can increase the potential relevance of your QueryTracker "
echo "results by submitting"
echo "<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"
echo "a few examples of documents you would consider relevant for your query."
echo "</font>"
echo "</td></tr>"

echo "</table></form><br>"
echo "${TAIL}"

# release mutex
rm -f ${MUTEX}
