src/export/subset.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809

//! OpenType font subsetting.

use std::borrow::Cow;
use std::collections::HashSet;
use std::convert::{TryFrom, TryInto};
use std::iter;
use std::ops::Range;

use ttf_parser::parser::{
    FromData, LazyArray16, LazyArray32, Offset, Offset16, Offset32, Stream, F2DOT14,
};
use ttf_parser::Tag;

/// Subset a font face for PDF embedding.
///
/// This will remove the outlines of all glyphs that are not part of the given
/// slice. Furthmore, all character mapping and layout tables are dropped as
/// shaping has already happened.
///
/// Returns `None` if the font data is fatally broken (in which case
/// `ttf-parser` would probably already have rejected the font, so this
/// shouldn't happen if the font data has already passed through `ttf-parser`).
pub fn subset(data: &[u8], index: u32, glyphs: &HashSet<u16>) -> Option<Vec<u8>> {
    Some(Subsetter::new(data, index, glyphs)?.subset())
}

struct Subsetter<'a> {
    data: &'a [u8],
    magic: Magic,
    records: LazyArray16<'a, TableRecord>,
    num_glyphs: u16,
    glyphs: &'a HashSet<u16>,
    tables: Vec<(Tag, Cow<'a, [u8]>)>,
}

impl<'a> Subsetter<'a> {
    /// Parse the font header and create a new subsetter.
    fn new(data: &'a [u8], index: u32, glyphs: &'a HashSet<u16>) -> Option<Self> {
        let mut s = Stream::new(&data);

        let mut magic = s.read::<Magic>()?;
        if magic == Magic::Collection {
            // Parse font collection header if necessary.
            s.skip::<u32>();
            let num_faces = s.read::<u32>()?;
            let offsets = s.read_array32::<Offset32>(num_faces)?;
            let offset = offsets.get(index)?.to_usize();

            s = Stream::new_at(&data, offset)?;
            magic = s.read::<Magic>()?;
            if magic == Magic::Collection {
                return None;
            }
        }

        // Read number of table records.
        let count = s.read::<u16>()?;

        // Skip boring parts of header.
        s.skip::<u16>();
        s.skip::<u16>();
        s.skip::<u16>();

        // Read the table records.
        let records = s.read_array16::<TableRecord>(count)?;
        let mut subsetter = Self {
            data,
            magic,
            records,
            num_glyphs: 0,
            glyphs,
            tables: vec![],
        };

        // Find out number of glyphs.
        let maxp = subsetter.table_data(MAXP)?;
        subsetter.num_glyphs = Stream::read_at::<u16>(maxp, 4)?;

        Some(subsetter)
    }

    /// Encode the subsetted font file.
    fn subset(mut self) -> Vec<u8> {
        self.subset_tables();

        // Start writing a brand new font.
        let mut w = Vec::new();
        w.write(self.magic);

        // Write table directory.
        let count = self.tables.len() as u16;
        let entry_selector = (count as f32).log2().floor() as u16;
        let search_range = entry_selector.pow(2) * 16;
        let range_shift = count * 16 - search_range;
        w.write(count);
        w.write(search_range);
        w.write(entry_selector);
        w.write(range_shift);

        // Tables shall be sorted by tag.
        self.tables.sort_by_key(|&(tag, _)| tag);

        // This variable will hold the offset to the checksum adjustment field
        // in the head table, which we'll have to write in the end (after
        // checksumming the whole font).
        let mut checksum_adjustment_offset = None;

        // Write table records.
        let mut offset = 12 + self.tables.len() * TableRecord::SIZE;
        for (tag, data) in &mut self.tables {
            if *tag == HEAD {
                // Zero out checksum field in head table.
                data.to_mut()[8 .. 12].copy_from_slice(&[0; 4]);
                checksum_adjustment_offset = Some(offset + 8);
            }

            let len = data.len();
            w.write(TableRecord {
                tag: *tag,
                checksum: checksum(&data),
                offset: offset as u32,
                length: len as u32,
            });

            // Account for the padding to 4 bytes.
            offset += len + len % 4;
        }

        // Write tables.
        for (_, data) in &self.tables {
            // Write data plus padding zeros to align to 4 bytes.
            w.extend(data.as_ref());
            w.extend(iter::repeat(0).take(data.len() % 4));
        }

        // Write checksumAdjustment field in head table.
        if let Some(i) = checksum_adjustment_offset {
            let sum = checksum(&w);
            let val = 0xB1B0AFBA_u32.wrapping_sub(sum);
            w[i .. i + 4].copy_from_slice(&val.to_be_bytes());
        }

        w
    }

    /// Subset, drop and copy tables.
    fn subset_tables(&mut self) {
        // Remove unnecessary name information.
        let handled_post = post::subset(self).is_some();

        // Remove unnecessary glyph outlines.
        let handled_glyf_loca = glyf::subset(self).is_some();
        let handled_cff1 = cff::subset_v1(self).is_some();

        for record in self.records {
            // If `handled` is true, we don't take any further action, if it's
            // false, we copy the table.
            #[rustfmt::skip]
            let handled = match &record.tag.to_bytes() {
                // Drop: Glyphs are already mapped.
                b"cmap" => true,

                // Drop: Layout is already finished.
                b"GPOS" | b"GSUB" | b"BASE" | b"JSTF" | b"MATH" |
                b"ankr" | b"kern" | b"kerx" | b"mort" | b"morx" |
                b"trak" | b"bsln" | b"just" | b"feat" | b"prop" => true,

                // Drop: They don't render in PDF viewers anyway.
                // TODO: We probably have to convert fonts with one of these
                // tables into Type 3 fonts where glyphs are described by either
                // PDF graphics operators or XObject images.
                b"CBDT" | b"CBLC" | b"COLR" | b"CPAL" | b"sbix" | b"SVG " => true,

                // Subsetted: Subsetting happens outside the loop, but if it
                // failed, we simply copy the affected table(s).
                b"post" => handled_post,
                b"loca" | b"glyf" => handled_glyf_loca,
                b"CFF " => handled_cff1,

                // Copy: All other tables are simply copied.
                _ => false,
            };

            if !handled {
                if let Some(data) = self.table_data(record.tag) {
                    self.push_table(record.tag, data);
                }
            }
        }
    }

    /// Retrieve the table data for a table.
    fn table_data(&mut self, tag: Tag) -> Option<&'a [u8]> {
        let (_, record) = self.records.binary_search_by(|record| record.tag.cmp(&tag))?;
        let start = record.offset as usize;
        let end = start + (record.length as usize);
        self.data.get(start .. end)
    }

    /// Push a new table.
    fn push_table(&mut self, tag: Tag, data: impl Into<Cow<'a, [u8]>>) {
        self.tables.push((tag, data.into()));
    }
}

// Some common tags.
const HEAD: Tag = Tag::from_bytes(b"head");
const MAXP: Tag = Tag::from_bytes(b"maxp");
const POST: Tag = Tag::from_bytes(b"post");
const LOCA: Tag = Tag::from_bytes(b"loca");
const GLYF: Tag = Tag::from_bytes(b"glyf");
const CFF1: Tag = Tag::from_bytes(b"CFF ");

/// Calculate a checksum over the sliced data as sum of u32's. The data length
/// must be a multiple of four.
fn checksum(data: &[u8]) -> u32 {
    let mut sum = 0u32;
    for chunk in data.chunks(4) {
        let mut bytes = [0; 4];
        bytes[.. chunk.len()].copy_from_slice(chunk);
        sum = sum.wrapping_add(u32::from_be_bytes(bytes));
    }
    sum
}

/// Zero all bytes in a slice.
fn memzero(slice: &mut [u8]) {
    for byte in slice {
        *byte = 0;
    }
}

/// Convenience trait for writing into a byte buffer.
trait BufExt {
    fn write<T: ToData>(&mut self, v: T);
}

impl BufExt for Vec<u8> {
    fn write<T: ToData>(&mut self, v: T) {
        v.write(self);
    }
}

/// A trait for writing raw binary data.
trait ToData {
    fn write(&self, data: &mut Vec<u8>);
}

impl ToData for u8 {
    fn write(&self, data: &mut Vec<u8>) {
        data.push(*self);
    }
}

impl ToData for u16 {
    fn write(&self, data: &mut Vec<u8>) {
        data.extend(&self.to_be_bytes());
    }
}

impl ToData for Offset16 {
    fn write(&self, data: &mut Vec<u8>) {
        self.0.write(data);
    }
}

impl ToData for u32 {
    fn write(&self, data: &mut Vec<u8>) {
        data.extend(&self.to_be_bytes());
    }
}

impl ToData for Offset32 {
    fn write(&self, data: &mut Vec<u8>) {
        self.0.write(data);
    }
}

impl ToData for Tag {
    fn write(&self, data: &mut Vec<u8>) {
        self.as_u32().write(data);
    }
}

/// Font magic number.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
enum Magic {
    TrueType,
    OpenType,
    Collection,
}

impl FromData for Magic {
    const SIZE: usize = 4;

    fn parse(data: &[u8]) -> Option<Self> {
        match u32::parse(data)? {
            0x00010000 | 0x74727565 => Some(Magic::TrueType),
            0x4F54544F => Some(Magic::OpenType),
            0x74746366 => Some(Magic::Collection),
            _ => None,
        }
    }
}

impl ToData for Magic {
    fn write(&self, data: &mut Vec<u8>) {
        let value: u32 = match self {
            Magic::TrueType => 0x00010000,
            Magic::OpenType => 0x4F54544F,
            Magic::Collection => 0x74746366,
        };
        value.write(data);
    }
}

/// Locates a table in the font file.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
struct TableRecord {
    tag: Tag,
    checksum: u32,
    offset: u32,
    length: u32,
}

impl FromData for TableRecord {
    const SIZE: usize = 16;

    fn parse(data: &[u8]) -> Option<Self> {
        let mut s = Stream::new(data);
        Some(TableRecord {
            tag: s.read::<Tag>()?,
            checksum: s.read::<u32>()?,
            offset: s.read::<u32>()?,
            length: s.read::<u32>()?,
        })
    }
}

impl ToData for TableRecord {
    fn write(&self, data: &mut Vec<u8>) {
        self.tag.write(data);
        self.checksum.write(data);
        self.offset.write(data);
        self.length.write(data);
    }
}

mod post {
    use super::*;

    /// Subset the post table by removing the name information.
    pub(super) fn subset(subsetter: &mut Subsetter) -> Option<()> {
        // Table version three is the one without names.
        let mut new = 0x00030000_u32.to_be_bytes().to_vec();
        new.extend(subsetter.table_data(POST)?.get(4 .. 32)?);
        subsetter.push_table(POST, new);
        Some(())
    }
}

mod glyf {
    use super::*;

    /// Subset the glyf and loca tables by clearing out glyph data for
    /// unused glyphs.
    pub(super) fn subset(subsetter: &mut Subsetter) -> Option<()> {
        let head = subsetter.table_data(HEAD)?;
        let short = Stream::read_at::<i16>(head, 50)? == 0;
        if short {
            subset_impl::<Offset16>(subsetter)
        } else {
            subset_impl::<Offset32>(subsetter)
        }
    }

    fn subset_impl<T>(subsetter: &mut Subsetter) -> Option<()>
    where
        T: LocaOffset,
    {
        let loca = subsetter.table_data(LOCA)?;
        let glyf = subsetter.table_data(GLYF)?;

        let offsets = LazyArray32::<T>::new(loca);
        let glyph_data = |id: u16| {
            let from = offsets.get(u32::from(id))?.loca_to_usize();
            let to = offsets.get(u32::from(id) + 1)?.loca_to_usize();
            glyf.get(from .. to)
        };

        // The set of all glyphs we will include in the subset.
        let mut subset = HashSet::new();

        // Because glyphs may depend on other glyphs as components (also with
        // multiple layers of nesting), we have to process all glyphs to find
        // their components. For notdef and all requested glyphs we simply use
        // an iterator, but to track other glyphs that need processing we create
        // a work stack.
        let mut iter = iter::once(0).chain(subsetter.glyphs.iter().copied());
        let mut work = vec![];

        // Find composite glyph descriptions.
        while let Some(id) = work.pop().or_else(|| iter.next()) {
            if subset.insert(id) {
                let mut s = Stream::new(glyph_data(id)?);
                if let Some(num_contours) = s.read::<i16>() {
                    // Negative means this is a composite glyph.
                    if num_contours < 0 {
                        // Skip min/max metrics.
                        s.read::<i16>();
                        s.read::<i16>();
                        s.read::<i16>();
                        s.read::<i16>();

                        // Read component glyphs.
                        work.extend(component_glyphs(s));
                    }
                }
            }
        }

        let mut sub_loca = vec![];
        let mut sub_glyf = vec![];

        for id in 0 .. subsetter.num_glyphs {
            // If the glyph shouldn't be contained in the subset, it will
            // still get a loca entry, but the glyf data is simply empty.
            sub_loca.write(T::usize_to_loca(sub_glyf.len())?);
            if subset.contains(&id) {
                sub_glyf.extend(glyph_data(id)?);
            }
        }

        sub_loca.write(T::usize_to_loca(sub_glyf.len())?);

        subsetter.push_table(LOCA, sub_loca);
        subsetter.push_table(GLYF, sub_glyf);

        Some(())
    }

    trait LocaOffset: Sized + FromData + ToData {
        fn loca_to_usize(self) -> usize;
        fn usize_to_loca(offset: usize) -> Option<Self>;
    }

    impl LocaOffset for Offset16 {
        fn loca_to_usize(self) -> usize {
            2 * usize::from(self.0)
        }

        fn usize_to_loca(offset: usize) -> Option<Self> {
            if offset % 2 == 0 {
                (offset / 2).try_into().ok().map(Self)
            } else {
                None
            }
        }
    }

    impl LocaOffset for Offset32 {
        fn loca_to_usize(self) -> usize {
            self.0 as usize
        }

        fn usize_to_loca(offset: usize) -> Option<Self> {
            offset.try_into().ok().map(Self)
        }
    }

    /// Returns an iterator over the component glyphs referenced by the given
    /// `glyf` table composite glyph description.
    fn component_glyphs(mut s: Stream) -> impl Iterator<Item = u16> + '_ {
        const ARG_1_AND_2_ARE_WORDS: u16 = 0x0001;
        const WE_HAVE_A_SCALE: u16 = 0x0008;
        const MORE_COMPONENTS: u16 = 0x0020;
        const WE_HAVE_AN_X_AND_Y_SCALE: u16 = 0x0040;
        const WE_HAVE_A_TWO_BY_TWO: u16 = 0x0080;

        let mut done = false;
        iter::from_fn(move || {
            if done {
                return None;
            }

            let flags = s.read::<u16>()?;
            let component = s.read::<u16>()?;

            if flags & ARG_1_AND_2_ARE_WORDS != 0 {
                s.skip::<i16>();
                s.skip::<i16>();
            } else {
                s.skip::<u16>();
            }

            if flags & WE_HAVE_A_SCALE != 0 {
                s.skip::<F2DOT14>();
            } else if flags & WE_HAVE_AN_X_AND_Y_SCALE != 0 {
                s.skip::<F2DOT14>();
                s.skip::<F2DOT14>();
            } else if flags & WE_HAVE_A_TWO_BY_TWO != 0 {
                s.skip::<F2DOT14>();
                s.skip::<F2DOT14>();
                s.skip::<F2DOT14>();
                s.skip::<F2DOT14>();
            }

            done = flags & MORE_COMPONENTS == 0;
            Some(component)
        })
    }
}

mod cff {
    use super::*;

    /// Subset the CFF table by zeroing glyph data for unused glyphs.
    pub(super) fn subset_v1(subsetter: &mut Subsetter) -> Option<()> {
        let cff = subsetter.table_data(CFF1)?;
        let mut s = Stream::new(cff);

        let (major, _) = (s.read::<u8>()?, s.skip::<u8>());
        if major != 1 {
            return None;
        }

        let header_size = s.read::<u8>()?;
        s = Stream::new_at(cff, usize::from(header_size))?;

        // Skip the name index.
        Index::parse_stream(&mut s);

        // Read the top dict. The index should contain only one item.
        let top_dict_index = Index::parse_stream(&mut s)?;
        let top_dict = Dict::parse(top_dict_index.get(0)?);

        let mut sub_cff = cff.to_vec();

        // Because completely rebuilding the CFF structure would be pretty
        // complex, for now, we employ a peculiar strategy for CFF subsetting:
        // We simply replace unused data with zeros. This way, the font
        // structure and offsets can stay the same. And while the CFF table
        // itself doesn't shrink, the actual embedded font is compressed and
        // greatly benefits from the repeated zeros.
        zero_char_strings(subsetter, cff, &top_dict, &mut sub_cff);
        zero_subr_indices(subsetter, cff, &top_dict, &mut sub_cff);

        subsetter.push_table(CFF1, sub_cff);

        Some(())
    }

    /// Zero unused char strings.
    fn zero_char_strings(
        subsetter: &Subsetter,
        cff: &[u8],
        top_dict: &Dict,
        sub_cff: &mut [u8],
    ) -> Option<()> {
        let char_strings_offset = top_dict.get_offset(Op::CHAR_STRINGS)?;
        let char_strings = Index::parse(cff.get(char_strings_offset ..)?)?;

        for (id, _, range) in char_strings.iter() {
            if !subsetter.glyphs.contains(&id) {
                let start = char_strings_offset + range.start;
                let end = char_strings_offset + range.end;
                memzero(sub_cff.get_mut(start .. end)?);
            }
        }

        Some(())
    }

    /// Zero unused local subroutine indices. We don't currently remove
    /// individual subroutines because finding out which ones are used is
    /// complicated.
    fn zero_subr_indices(
        subsetter: &Subsetter,
        cff: &[u8],
        top_dict: &Dict,
        sub_cff: &mut [u8],
    ) -> Option<()> {
        // Parse FD Select data structure, which maps from glyph ids to find
        // dict indices.
        let fd_select_offset = top_dict.get_offset(Op::FD_SELECT)?;
        let fd_select =
            parse_fd_select(cff.get(fd_select_offset ..)?, subsetter.num_glyphs)?;

        // Clear local subrs from unused font dicts.
        let fd_array_offset = top_dict.get_offset(Op::FD_ARRAY)?;
        let fd_array = Index::parse(cff.get(fd_array_offset ..)?)?;

        // Determine which font dict's subrs to keep.
        let mut sub_fds = HashSet::new();
        for &glyph in subsetter.glyphs {
            sub_fds.insert(fd_select.get(usize::from(glyph))?);
        }

        for (i, data, _) in fd_array.iter() {
            if !sub_fds.contains(&(i as u8)) {
                let font_dict = Dict::parse(data);
                if let Some(private_range) = font_dict.get_range(Op::PRIVATE) {
                    let private_dict = Dict::parse(cff.get(private_range.clone())?);
                    if let Some(subrs_offset) = private_dict.get_offset(Op::SUBRS) {
                        let start = private_range.start + subrs_offset;
                        let index = Index::parse(cff.get(start ..)?)?;
                        let end = start + index.data.len();
                        memzero(sub_cff.get_mut(start .. end)?);
                    }
                }
            }
        }

        Some(())
    }

    /// Returns the font dict index for each glyph.
    fn parse_fd_select(data: &[u8], num_glyphs: u16) -> Option<Cow<'_, [u8]>> {
        let mut s = Stream::new(data);
        let format = s.read::<u8>()?;
        Some(match format {
            0 => Cow::Borrowed(s.read_bytes(usize::from(num_glyphs))?),
            3 => {
                let count = usize::from(s.read::<u16>()?);
                let mut fds = vec![];
                let mut start = s.read::<u16>()?;
                for _ in 0 .. count {
                    let fd = s.read::<u8>()?;
                    let end = s.read::<u16>()?;
                    for _ in start .. end {
                        fds.push(fd);
                    }
                    start = end;
                }
                Cow::Owned(fds)
            }
            _ => Cow::Borrowed(&[]),
        })
    }

    struct Index<'a> {
        /// The data of the whole index (including its header).
        data: &'a [u8],
        /// The data ranges for the actual items.
        items: Vec<Range<usize>>,
    }

    impl<'a> Index<'a> {
        fn parse(data: &'a [u8]) -> Option<Self> {
            let mut s = Stream::new(data);

            let count = usize::from(s.read::<u16>()?);

            let mut items = Vec::with_capacity(count);
            let mut len = 2;

            if count > 0 {
                let offsize = usize::from(s.read::<u8>()?);
                if offsize < 1 || offsize > 4 {
                    return None;
                }

                // Read an offset and transform it to be relative to the start
                // of the index.
                let data_offset = 3 + offsize * (count + 1);
                let mut read_offset = || {
                    let mut bytes = [0u8; 4];
                    bytes[4 - offsize .. 4].copy_from_slice(s.read_bytes(offsize)?);
                    Some(data_offset - 1 + u32::from_be_bytes(bytes) as usize)
                };

                let mut last = read_offset()?;
                for _ in 0 .. count {
                    let offset = read_offset()?;
                    data.get(last .. offset)?;
                    items.push(last .. offset);
                    last = offset;
                }

                len = last;
            }

            Some(Self { data: data.get(.. len)?, items })
        }

        fn parse_stream(s: &'a mut Stream) -> Option<Self> {
            let index = Index::parse(s.tail()?)?;
            s.advance(index.data.len());
            Some(index)
        }

        fn get(&self, idx: usize) -> Option<&'a [u8]> {
            self.data.get(self.items.get(idx)?.clone())
        }

        fn iter(&self) -> impl Iterator<Item = (u16, &'a [u8], Range<usize>)> + '_ {
            self.items
                .iter()
                .enumerate()
                .map(move |(i, item)| (i as u16, &self.data[item.clone()], item.clone()))
        }
    }

    struct Dict<'a>(Vec<Pair<'a>>);

    impl<'a> Dict<'a> {
        fn parse(data: &'a [u8]) -> Self {
            let mut s = Stream::new(data);
            Self(iter::from_fn(|| Pair::parse(&mut s)).collect())
        }

        fn get(&self, op: Op) -> Option<&[Operand<'a>]> {
            self.0
                .iter()
                .find(|pair| pair.op == op)
                .map(|pair| pair.operands.as_slice())
        }

        fn get_offset(&self, op: Op) -> Option<usize> {
            match self.get(op)? {
                &[Operand::Int(offset)] if offset > 0 => usize::try_from(offset).ok(),
                _ => None,
            }
        }

        fn get_range(&self, op: Op) -> Option<Range<usize>> {
            match self.get(op)? {
                &[Operand::Int(len), Operand::Int(offset)] if offset > 0 => {
                    let offset = usize::try_from(offset).ok()?;
                    let len = usize::try_from(len).ok()?;
                    Some(offset .. offset + len)
                }
                _ => None,
            }
        }
    }

    #[derive(Debug)]
    struct Pair<'a> {
        operands: Vec<Operand<'a>>,
        op: Op,
    }

    impl<'a> Pair<'a> {
        fn parse(s: &mut Stream<'a>) -> Option<Self> {
            let mut operands = vec![];
            while s.clone().read::<u8>()? > 21 {
                operands.push(Operand::parse(s)?);
            }
            Some(Self { operands, op: Op::parse(s)? })
        }
    }

    #[derive(Debug, Eq, PartialEq)]
    struct Op(u8, u8);

    impl Op {
        const CHAR_STRINGS: Self = Self(17, 0);
        const PRIVATE: Self = Self(18, 0);
        const SUBRS: Self = Self(19, 0);
        const FD_ARRAY: Self = Self(12, 36);
        const FD_SELECT: Self = Self(12, 37);

        fn parse(s: &mut Stream) -> Option<Self> {
            let b0 = s.read::<u8>()?;
            match b0 {
                12 => Some(Self(b0, s.read::<u8>()?)),
                0 ..= 21 => Some(Self(b0, 0)),
                _ => None,
            }
        }
    }

    #[derive(Debug)]
    enum Operand<'a> {
        Int(i32),
        Real(&'a [u8]),
    }

    impl<'a> Operand<'a> {
        fn parse(s: &mut Stream<'a>) -> Option<Self> {
            let b0 = i32::from(s.read::<u8>()?);
            Some(match b0 {
                30 => {
                    let mut len = 0;
                    for &byte in s.tail()? {
                        len += 1;
                        if byte & 0x0f == 0x0f {
                            break;
                        }
                    }
                    Self::Real(s.read_bytes(len)?)
                }
                32 ..= 246 => Self::Int(b0 - 139),
                247 ..= 250 => {
                    let b1 = i32::from(s.read::<u8>()?);
                    Self::Int((b0 - 247) * 256 + b1 + 108)
                }
                251 ..= 254 => {
                    let b1 = i32::from(s.read::<u8>()?);
                    Self::Int(-(b0 - 251) * 256 - b1 - 108)
                }
                28 => Self::Int(i32::from(s.read::<i16>()?)),
                29 => Self::Int(s.read::<i32>()?),
                _ => return None,
            })
        }
    }
}