Section 967: Breaking vertical lists into pages

The vsplit procedure, which implements ’s \vsplit operation, is considerably simpler than line_break because it doesn’t have to worry about hyphenation, and because its mission is to discover a single break instead of an optimum sequence of breakpoints. But before we get into the details of vsplit, we need to consider a few more basic things.

Section 968

A subroutine called prune_page_top takes a pointer to a vlist and returns a pointer to a modified vlist in which all glue, kern, and penalty nodes have been deleted before the first box or rule node. However, the first box or rule is actually preceded by a newly created glue node designed so that the topmost baseline will be at distance split_top_skip from the top, whenever this is possible without backspacing.

In this routine and those that follow, we make use of the fact that a vertical list contains no character nodes, hence the type field exists for each node in the list.

page_break.c
// << Start file |page_break.c|, 1382 >>

// adjust top after page break
pointer prune_page_top(pointer p) {
    pointer prev_p; // lags one step behind |p|
    pointer q;      // temporary variable for list manipulation
    prev_p = TEMP_HEAD;
    link(TEMP_HEAD) = p;
    while (p != null) {
        switch (type(p)) {
        case HLIST_NODE:
        case VLIST_NODE:
        case RULE_NODE:
            // << Insert glue for |split_top_skip| and set |p = null|, 969 >>
            break;
        
        case WHATSIT_NODE:
        case MARK_NODE:
        case INS_NODE:
            prev_p = p;
            p = link(prev_p);
            break;
        
        case GLUE_NODE:
        case KERN_NODE:
        case PENALTY_NODE:
            q = p;
            p = link(q);
            link(q) = null;
            link(prev_p) = p;
            flush_node_list(q);
            break;
        
        default:
            confusion("pruning");
        }
    }
    return link(TEMP_HEAD);
}

Section 969

⟨ Insert glue for split_top_skip and set p = null 969 ⟩≡

q = new_skip_param(SPLIT_TOP_SKIP_CODE);
link(prev_p) = q;
link(q) = p;
// now |temp_ptr = glue_ptr(q)|
if (width(temp_ptr) > height(p)) {
    width(temp_ptr) -= height(p);
}
else {
    width(temp_ptr) = 0;
}
p = null;

Section 970

The next subroutine finds the best place to break a given vertical list so as to obtain a box of height h, with maximum depth d. A pointer to the beginning of the vertical list is given, and a pointer to the optimum breakpoint is returned. The list is effectively followed by a forced break, i.e., a penalty node with the EJECT_PENALTY; if the best break occurs at this artificial node, the value null is returned.

An array of six scaled distances is used to keep track of the height from the beginning of the list to the current place, just as in line_break. In fact, we use one of the same arrays, only changing its name to reflect its new significance.

breaker.h
#define active_height      active_width           // new name for the six distance variables
#define cur_height         active_height[1]       // the natural height
#define set_height_zero(X) active_height[(X)] = 0 // initialize the height to zero
page_break.c
// finds optimum page break
pointer vert_break(pointer p, scaled h, scaled d) {
    pointer prev_p;            // if |p| is a glue node, |type(prev_p)| determines whether |p| is a legal breakpoint
    pointer q, r;              // glue specifications
    int pi = 0;                // penalty value
    int b;                     // badness at a trial breakpoint
    int least_cost;            // the smallest badness plus penalties found so far
    pointer best_place = null; // the most recent break that leads to |least_cost|
    scaled prev_dp;            // depth of previous box in the list
    small_number t;            // |type| of the node following a kern
    
    prev_p = p; // an initial glue node is not a legal breakpoint
    least_cost = AWFUL_BAD;
    do_all_six(set_height_zero);
    prev_dp = 0;
    while(true) {
        // << If node |p| is a legal breakpoint, check if this break is the best known, and |goto done| if |p| is null or if the page-so-far is already too full to accept more stuff, 972 >>
        prev_p = p;
        p = link(prev_p);
    }
done:
    return best_place;
}

Section 971

A global variable best_height_plus_depth will be set to the natural size of the box that corresponds to the optimum breakpoint found by vert_break. (This value is used by the insertion-splitting algorithm of the page builder.)

⟨ Global variables 13 ⟩+≡

scaled best_height_plus_depth; // height of the best box, without stretching or shrinking

Section 972

A subtle point to be noted here is that the maximum depth d might be negative, so cur_height and prev_dp might need to be corrected even after a glue or kern node.

⟨ If node p is a legal breakpoint, check if this break is the best known, and goto done if p is null or if the page-so-far is already too full to accept more stuff 972 ⟩≡

if (p == null) {
    pi = EJECT_PENALTY;
}
else {
    // << Use node |p| to update the current height and depth measurements; if this node is not a legal breakpoint, |goto not_found| or |update_heights|, otherwise set |pi| to the associated penalty at the break, 973 >>
}
// << Check if node |p| is a new champion breakpoint; then |goto done| if |p| is a forced break or if the page-so-far is already too full, 974 >>
if (type(p) < GLUE_NODE || type(p) > KERN_NODE) {
    goto not_found;
}

update_heights:
// << Update the current height and depth measurements with respect to a glue or kern node |p|, 976 >>

not_found:
if (prev_dp > d) {
    cur_height += prev_dp - d;
    prev_dp = d;
}

Section 973

⟨ Use node p to update the current height and depth measurements; if this node is not a legal breakpoint, goto not_found or update_heights, otherwise set pi to the associated penalty at the break 973 ⟩≡

switch (type(p)) {
case HLIST_NODE:
case VLIST_NODE:
case RULE_NODE: 
    cur_height += prev_dp + height(p);
    prev_dp = depth(p);
    goto not_found;

case WHATSIT_NODE:
    // << Process whatsit |p| in |vert_break| loop, |goto not_found|, 1365 >>

case GLUE_NODE:
    if (precedes_break(prev_p)) {
        pi = 0;
    }
    else {
        goto update_heights;
    }
    break;

case KERN_NODE:
    if (link(p) == null) {
        t = PENALTY_NODE;
    }
    else {
        t = type(link(p));
    }
    if (t == GLUE_NODE) {
        pi = 0;
    }
    else {
        goto update_heights;
    }
    break;

case PENALTY_NODE:
    pi = penalty(p);
    break;

case MARK_NODE:
case INS_NODE:
    goto not_found;

default:
    confusion("vertbreak");
}

Section 974

constants.h
#define DEPLORABLE 100000 // more than |INF_BAD|, but less than |AWFUL_BAD|

⟨ Check if node p is a new champion breakpoint; then goto done if p is a forced break or if the page-so-far is already too full 974 ⟩≡

if (pi < INF_PENALTY) {
    // << Compute the badness, |b|, using |AWFUL_BAD| if the box is too full, 975 >>
    if (b < AWFUL_BAD) {
        if (pi <= EJECT_PENALTY) {
            b = pi;
        }
        else if (b < INF_BAD) {
            b += pi;
        }
        else {
            b = DEPLORABLE;
        }
    }
    if (b <= least_cost) {
        best_place = p;
        least_cost = b;
        best_height_plus_depth = cur_height + prev_dp;
    }
    if (b == AWFUL_BAD || pi <= EJECT_PENALTY) {
        goto done;
    }
}

Section 975

⟨ Compute the badness, b, using AWFUL_BAD if the box is too full 975 ⟩≡

if (cur_height < h) {
    if (active_height[3] != 0
      || active_height[4] != 0
      || active_height[5] != 0)
    {
      b = 0;
    }
    else {
      b = badness(h - cur_height, active_height[2]);
    }
}
else if (cur_height - h > active_height[6]) {
    b = AWFUL_BAD;
}
else {
    b = badness(cur_height - h, active_height[6]);
}

Section 976

Vertical lists that are subject to the vert_break procedure should not contain infinite shrinkability, since that would permit any amount of information to “fit” on one page.

⟨ Update the current height and depth measurements with respect to a glue or kern node p 976 ⟩≡

if (type(p) == KERN_NODE) {
    q = p;
}
else {
    q = glue_ptr(p);
    active_height[2 + stretch_order(q)] += stretch(q);
    active_height[6] += shrink(q);
    if (shrink_order(q) != NORMAL && shrink(q) != 0) {
        print_err("Infinite glue shrinkage found in box being split");
        help4("The box you are \\vsplitting contains some infinitely")
            ("shrinkable glue, e.g., `\\vss' or `\\vskip 0pt minus 1fil'.")
            ("Such glue doesn't belong there; but you can safely proceed,")
            ("since the offensive shrinkability has been made finite.");
        error();
        r = new_spec(q);
        shrink_order(r) = NORMAL;
        delete_glue_ref(q);
        glue_ptr(p) = r;
        q = r;
    }
}
cur_height += prev_dp + width(q);
prev_dp = 0;

Section 977

Now we are ready to consider vsplit itself. Most of its work is accomplished by the two subroutines that we have just considered.

Given the number of a vlist box n, and given a desired page height h, the vsplit function finds the best initial segment of the vlist and returns a box for a page of height h. The remainder of the vlist, if any, replaces the original box, after removing glue and penalties and adjusting for split_top_skip. Mark nodes in the split-off box are used to set the values of split_first_mark and split_bot_mark; we use the fact that split_first_mark = null if and only if split_bot_mark = null.

The original box becomes “void” if and only if it has been entirely extracted. The extracted box is “void” if and only if the original box was void (or if it was, erroneously, an hlist box).

page_break.c
// extracts a page of height |h| from box |n|
pointer vsplit(eight_bits n, scaled h) {
    pointer v; // the box to be split
    pointer p; // runs through the vlist
    pointer q; // points to where the break occurs
    v = box(n);
    if (split_first_mark != null) {
        delete_token_ref(split_first_mark);
        split_first_mark = null;
        delete_token_ref(split_bot_mark);
        split_bot_mark = null;
    }
    // << Dispense with trivial cases of void or bad boxes, 978 >>
    q = vert_break(list_ptr(v), h, split_max_depth);
    // << Look at all the marks in nodes before the break, and set the final link to |null| at the break, 979 >>
    q = prune_page_top(q);
    p = list_ptr(v);
    free_node(v, BOX_NODE_SIZE);
    if (q == null) {
        box(n) = null; // the |eq_level| of the box stays the same
    }
    else {
        box(n) = vpack(q, NATURAL);
    }
    return vpackage(p, h, EXACTLY, split_max_depth);
}

Section 978

⟨ Dispense with trivial cases of void or bad boxes 978 ⟩≡

if (v == null) {
    return null;
}
if (type(v) != VLIST_NODE) {
    print_err("");
    print_esc("vsplit");
    print(" needs a ");
    print_esc("vbox");
    help2("The box you are trying to split is an \\hbox.")
        ("I can't split such a box, so I'll leave it alone.");
    error();
    return null;
}

Section 979

It’s possible that the box begins with a penalty node that is the “best” break, so we must be careful to handle this special case correctly.

⟨ Look at all the marks in nodes before the break, and set the final link to null at the break 979 ⟩≡

p = list_ptr(v);
if (p == q) {
    list_ptr(v) = null;
}
else {
    while(true) {
        if (type(p) == MARK_NODE) {
            if (split_first_mark == null) {
                split_first_mark = mark_ptr(p);
                split_bot_mark = split_first_mark;
                token_ref_count(split_first_mark) += 2;
            }
            else {
                delete_token_ref(split_bot_mark);
                split_bot_mark = mark_ptr(p);
                add_token_ref(split_bot_mark);
            }
        }
        if (link(p) == q) {
            link(p) = null;
            break; // goto done
        }
        p = link(p);
    }
}
// done: