An Inverted-Pendulum Balancing Game

If you have the Tcl/Tk Plug-in for Netscape Navigator, you will see an application below.

The application is an interactive program in which you try to keep an inverted-pendulum balanced by pushing left and right at its base. Lots of numerical calculations are being performed for each step, so this is rather slow. It is written by Chuck Anderson. Suggestions for speeding it up are welcome. Anyone care to translate it into Java?

Versions of this simulation in C have been used by many people to research ways in which a computer can learn to solve difficult control problems. Here are some references to related publications with which I have been involved.


Here is the source for the inverted-pendulum program:

set width 400
set height 400
#so each change in pole is visible
update idletasks

canvas .c -height $height -width $width
pack .c
bind .c <Button-1> {set act -1}
bind .c <Button-2> {reset; run}
bind .c <Button-3> {set act 1}
bind .c <Any-ButtonRelease> {set act 0}

set act 0
set state {0 0 0 0}
set cart_mass 1
set pole_mass 0.1
set pole_length 1
set force_mag 10.
set tau 0.005
set fric_cart 0.00005
set fric_pole 0.005
set not_first 0
set trial_length 0

proc run {} {
	update_pole
	show_state
    after 0 {update idletasks; run}
}

proc update_pole {} {
  global state act tau
 
  set derivs [calc_derivs $state $act]

  for {set i 0} {$i < 4} {incr i} {
      lappend new_state [expr {[lindex $state $i] + \
	      [lindex $derivs $i] * $tau}]
  }
  set state $new_state
}

proc sign {x} {
    if {$x < 0} {
	return -1
    } else {
	return 1
    }
}

proc calc_derivs {state act} {
  global force_mag cart_mass pole_mass pole_length fric_cart fric_pole

    set force [expr {$force_mag * $act}]
    set xd [lindex $state 1]
    set th [lindex $state 2]
    set thd [lindex $state 3]
    set sinth [expr {sin($th)}]
    set costh [expr {cos($th)}]
    set total_mass [expr {$cart_mass + $pole_mass}]
    set thdsq [expr {$thd * $thd}]
    set half_pole [expr 0.5 * $pole_length]
    set pole_mass_length [expr $half_pole * $pole_mass]

    set common [expr {($force + $pole_mass_length * $thdsq * \
	    $sinth - $fric_cart * [sign $xd])  / $total_mass}]

    set thdd [expr {(9.8 * $sinth - $costh * $common \
	 -$fric_pole * $thd  / $pole_mass_length) / \
    ($half_pole * (4./3. - $pole_mass * $costh * $costh / \
    $total_mass))}]

    set xdd [expr {$common - $pole_mass_length * $thdd * \
	    $costh / $total_mass}]

    return [list $xd $xdd $thd $thdd]
}

proc reset { } {
    global state not_first

    set state {0 0 0 0}
    set not_first 0

    .c delete cart

    draw_track
}

proc show_state {} {
    global old_state old_act state act not_first

    draw_state

    set old_state $state
    set old_act  $act
    set not_first 1
}


set pole_length 1.5
set arrow_length 0.2

proc convx {x} {
    global width
    return [expr {($x + 2.5) / 5.0 * $width}]
}

proc convy {y} {
    global height
    return [expr { $height - ($y + 2.5) / 5.0 * $height } ]
}

proc draw_state {} {
  global arrow_length not_first old_state state act pole_length

  set x [lindex $state 0]
  set th [lindex $state 2]
  set arrow_tip [expr {$x + $arrow_length * [sign $act]}]
  set arrow_dx [expr {-.05 * [sign $act]}]

  .c delete cart
  .c delete pole
  .c delete arrow

  rectangle [expr $x-0.2] -0.2 [expr $x+0.2] 0 cornflowerblue cart
  if {$act > 0} {
      .c create line [convx $x] [convy -0.1] \
	      [convx [expr $x+0.2]] [convy -0.1] \
	      -arrow last -fill yellow  -tag arrow
  }
  if {$act < 0} {
      .c create line [convx $x] [convy -0.1] \
	      [convx [expr $x-0.2]] [convy -0.1] \
	      -arrow last -fill yellow  -tag arrow
  }


  line $x 0 \
    [expr $x+sin($th)*$pole_length] [expr $pole_length*cos($th)] \
    cornflowerblue pole
}

proc rectangle {x1 y1 x2 y2 color name} {
    .c create rectangle [convx $x1] [convy $y1] [convx $x2] [convy $y2] \
	    -fill $color -outline "" -tag $name
}

proc line {x1 y1 x2 y2 color name} {
    .c create line [convx $x1] [convy $y1] [convx $x2] [convy $y2] \
	    -fill $color -tag $name -width 3
}

proc draw_track { } {
  global not_first 

  set points {-2.5 -0.4  2.5 -0.4  2.5 0.  2.3 0.  2.3 -0.2 \
       -2.3 -0.2  -2.3 0.  -2.5 0.}

  for {set i 0} {$i < 8} {incr i} {
    lappend convpoints [convx [lindex $points [expr $i*2]]] \
	    [convy [lindex $points [expr $i*2+1]]]
  }		      

  eval .c create polygon $convpoints -fill gray

  .c create text [convx -2.3] [convy -1.8] \
  -text "Buttons:  Left - Push Left, Middle - PANIC, Right - Push Right" \
   -anchor sw

  set not_first 0
}

# Now run it!

reset
run