1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
|
use typst::eval::Tracer;
use unicode_bidi::{BidiInfo, Level as BidiLevel};
use unicode_script::{Script, UnicodeScript};
use xi_unicode::LineBreakIterator;
use super::{BoxElem, HElem, Sizing, Spacing};
use crate::layout::AlignElem;
use crate::math::EquationElem;
use crate::prelude::*;
use crate::text::{
shape, LinebreakElem, Quoter, Quotes, ShapedText, SmartQuoteElem, SpaceElem, TextElem,
};
/// Arrange text, spacing and inline-level elements into a paragraph.
///
/// Although this function is primarily used in set rules to affect paragraph
/// properties, it can also be used to explicitly render its argument onto a
/// paragraph of its own.
///
/// ## Example
/// ```example
/// #set par(first-line-indent: 1em, justify: true)
/// #show par: set block(spacing: 0.65em)
///
/// We proceed by contradiction.
/// Suppose that there exists a set
/// of positive integers $a$, $b$, and
/// $c$ that satisfies the equation
/// $a^n + b^n = c^n$ for some
/// integer value of $n > 2$.
///
/// Without loss of generality,
/// let $a$ be the smallest of the
/// three integers. Then, we ...
/// ```
///
/// Display: Paragraph
/// Category: layout
#[element(Construct)]
pub struct ParElem {
/// The spacing between lines.
///
/// The default value is `{0.65em}`.
#[resolve]
#[default(Em::new(0.65).into())]
pub leading: Length,
/// Whether to justify text in its line.
///
/// Hyphenation will be enabled for justified paragraphs if the [text
/// property hyphenate]($func/text.hyphenate) is set to `{auto}` and the
/// current language is known.
///
/// Note that the current [alignment]($func/align) still has an effect on
/// the placement of the last line except if it ends with a [justified line
/// break]($func/linebreak.justify).
#[default(false)]
pub justify: bool,
/// How to determine line breaks.
///
/// When this property is set to `{auto}`, its default value, optimized line
/// breaks will be used for justified paragraphs. Enabling optimized line
/// breaks for ragged paragraphs may also be worthwhile to improve the
/// appearance of the text.
///
/// ```example
/// #set page(width: 190pt)
/// #set par(linebreaks: "simple")
/// Some texts are frustratingly
/// challenging to break in a
/// visually pleasing way. This
/// very aesthetic example is one
/// of them.
///
/// #set par(linebreaks: "optimized")
/// Some texts are frustratingly
/// challenging to break in a
/// visually pleasing way. This
/// very aesthetic example is one
/// of them.
/// ```
#[default]
pub linebreaks: Smart<Linebreaks>,
/// The indent the first line of a paragraph should have.
///
/// Only the first line of a consecutive paragraph will be indented (not
/// the first one in a block or on the page).
///
/// By typographic convention, paragraph breaks are indicated either by some
/// space between paragraphs or by indented first lines. Consider reducing
/// the [paragraph spacing]($func/block.spacing) to the [`leading`] when
/// using this property (e.g. using
/// `[#show par: set block(spacing: 0.65em)]`).
pub first_line_indent: Length,
/// The indent all but the first line of a paragraph should have.
#[resolve]
pub hanging_indent: Length,
/// The contents of the paragraph.
#[external]
pub body: Content,
/// The paragraph's children.
#[internal]
#[variadic]
pub children: Vec<Content>,
}
impl Construct for ParElem {
fn construct(_: &mut Vm, args: &mut Args) -> SourceResult<Content> {
// The paragraph constructor is special: It doesn't create a paragraph
// element. Instead, it just ensures that the passed content lives in a
// separate paragraph and styles it.
let styles = Self::set(args)?;
let body = args.expect::<Content>("body")?;
Ok(Content::sequence([
ParbreakElem::new().pack(),
body.styled_with_map(styles),
ParbreakElem::new().pack(),
]))
}
}
impl ParElem {
/// Layout the paragraph into a collection of lines.
#[tracing::instrument(name = "ParElement::layout", skip_all)]
pub fn layout(
&self,
vt: &mut Vt,
styles: StyleChain,
consecutive: bool,
region: Size,
expand: bool,
) -> SourceResult<Fragment> {
#[comemo::memoize]
#[allow(clippy::too_many_arguments)]
fn cached(
par: &ParElem,
world: Tracked<dyn World + '_>,
tracer: TrackedMut<Tracer>,
provider: TrackedMut<StabilityProvider>,
introspector: Tracked<Introspector>,
styles: StyleChain,
consecutive: bool,
region: Size,
expand: bool,
) -> SourceResult<Fragment> {
let mut vt = Vt { world, tracer, provider, introspector };
let children = par.children();
// Collect all text into one string for BiDi analysis.
let (text, segments, spans) = collect(&children, &styles, consecutive)?;
// Perform BiDi analysis and then prepare paragraph layout by building a
// representation on which we can do line breaking without layouting
// each and every line from scratch.
let p = prepare(&mut vt, &children, &text, segments, spans, styles, region)?;
// Break the paragraph into lines.
let lines = linebreak(&vt, &p, region.x - p.hang);
// Stack the lines into one frame per region.
finalize(&mut vt, &p, &lines, region, expand)
}
cached(
self,
vt.world,
TrackedMut::reborrow_mut(&mut vt.tracer),
TrackedMut::reborrow_mut(&mut vt.provider),
vt.introspector,
styles,
consecutive,
region,
expand,
)
}
}
/// How to determine line breaks in a paragraph.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)]
pub enum Linebreaks {
/// Determine the line breaks in a simple first-fit style.
Simple,
/// Optimize the line breaks for the whole paragraph.
///
/// Typst will try to produce more evenly filled lines of text by
/// considering the whole paragraph when calculating line breaks.
Optimized,
}
/// A paragraph break.
///
/// This starts a new paragraph. Especially useful when used within code like
/// [for loops]($scripting/#loops). Multiple consecutive
/// paragraph breaks collapse into a single one.
///
/// ## Example
/// ```example
/// #for i in range(3) {
/// [Blind text #i: ]
/// lorem(5)
/// parbreak()
/// }
/// ```
///
/// ## Syntax
/// Instead of calling this function, you can insert a blank line into your
/// markup to create a paragraph break.
///
/// Display: Paragraph Break
/// Category: layout
#[element(Unlabellable)]
pub struct ParbreakElem {}
impl Unlabellable for ParbreakElem {}
/// Range of a substring of text.
type Range = std::ops::Range<usize>;
// The characters by which spacing, inline content and pins are replaced in the
// paragraph's full text.
const SPACING_REPLACE: char = ' '; // Space
const OBJ_REPLACE: char = '\u{FFFC}'; // Object Replacement Character
/// A paragraph representation in which children are already layouted and text
/// is already preshaped.
///
/// In many cases, we can directly reuse these results when constructing a line.
/// Only when a line break falls onto a text index that is not safe-to-break per
/// rustybuzz, we have to reshape that portion.
struct Preparation<'a> {
/// Bidirectional text embedding levels for the paragraph.
bidi: BidiInfo<'a>,
/// Text runs, spacing and layouted elements.
items: Vec<Item<'a>>,
/// The span mapper.
spans: SpanMapper,
/// The styles shared by all children.
styles: StyleChain<'a>,
/// Whether to hyphenate if it's the same for all children.
hyphenate: Option<bool>,
/// The text language if it's the same for all children.
lang: Option<Lang>,
/// The paragraph's resolved alignment.
align: Align,
/// Whether to justify the paragraph.
justify: bool,
/// The paragraph's hanging indent.
hang: Abs,
}
impl<'a> Preparation<'a> {
/// Find the item that contains the given `text_offset`.
fn find(&self, text_offset: usize) -> Option<&Item<'a>> {
let mut cursor = 0;
for item in &self.items {
let end = cursor + item.len();
if (cursor..end).contains(&text_offset) {
return Some(item);
}
cursor = end;
}
None
}
/// Return the items that intersect the given `text_range`.
///
/// Returns the expanded range around the items and the items.
fn slice(&self, text_range: Range) -> (Range, &[Item<'a>]) {
let mut cursor = 0;
let mut start = 0;
let mut end = 0;
let mut expanded = text_range.clone();
for (i, item) in self.items.iter().enumerate() {
if cursor <= text_range.start {
start = i;
expanded.start = cursor;
}
let len = item.len();
if cursor < text_range.end || cursor + len <= text_range.end {
end = i + 1;
expanded.end = cursor + len;
} else {
break;
}
cursor += len;
}
(expanded, &self.items[start..end])
}
}
/// A segment of one or multiple collapsed children.
#[derive(Debug, Copy, Clone)]
enum Segment<'a> {
/// One or multiple collapsed text or text-equivalent children. Stores how
/// long the segment is (in bytes of the full text string).
Text(usize),
/// Horizontal spacing between other segments.
Spacing(Spacing),
/// A mathematical equation.
Equation(&'a EquationElem),
/// A box with arbitrary content.
Box(&'a BoxElem, bool),
/// Metadata.
Meta,
}
impl Segment<'_> {
/// The text length of the item.
fn len(&self) -> usize {
match *self {
Self::Text(len) => len,
Self::Spacing(_) => SPACING_REPLACE.len_utf8(),
Self::Box(_, true) => SPACING_REPLACE.len_utf8(),
Self::Equation(_) | Self::Box(_, _) => OBJ_REPLACE.len_utf8(),
Self::Meta => 0,
}
}
}
/// A prepared item in a paragraph layout.
#[derive(Debug)]
enum Item<'a> {
/// A shaped text run with consistent style and direction.
Text(ShapedText<'a>),
/// Absolute spacing between other items.
Absolute(Abs),
/// Fractional spacing between other items.
Fractional(Fr, Option<(&'a BoxElem, StyleChain<'a>)>),
/// Layouted inline-level content.
Frame(Frame),
/// Metadata.
Meta(Frame),
}
impl<'a> Item<'a> {
/// If this a text item, return it.
fn text(&self) -> Option<&ShapedText<'a>> {
match self {
Self::Text(shaped) => Some(shaped),
_ => None,
}
}
/// The text length of the item.
fn len(&self) -> usize {
match self {
Self::Text(shaped) => shaped.text.len(),
Self::Absolute(_) | Self::Fractional(_, _) => SPACING_REPLACE.len_utf8(),
Self::Frame(_) => OBJ_REPLACE.len_utf8(),
Self::Meta(_) => 0,
}
}
/// The natural layouted width of the item.
fn width(&self) -> Abs {
match self {
Self::Text(shaped) => shaped.width,
Self::Absolute(v) => *v,
Self::Frame(frame) => frame.width(),
Self::Fractional(_, _) | Self::Meta(_) => Abs::zero(),
}
}
}
/// Maps byte offsets back to spans.
#[derive(Default)]
pub struct SpanMapper(Vec<(usize, Span)>);
impl SpanMapper {
/// Create a new span mapper.
pub fn new() -> Self {
Self::default()
}
/// Push a span for a segment with the given length.
pub fn push(&mut self, len: usize, span: Span) {
self.0.push((len, span));
}
/// Determine the span at the given byte offset.
///
/// May return a detached span.
pub fn span_at(&self, offset: usize) -> (Span, u16) {
let mut cursor = 0;
for &(len, span) in &self.0 {
if (cursor..=cursor + len).contains(&offset) {
return (span, u16::try_from(offset - cursor).unwrap_or(0));
}
cursor += len;
}
(Span::detached(), 0)
}
}
/// A layouted line, consisting of a sequence of layouted paragraph items that
/// are mostly borrowed from the preparation phase. This type enables you to
/// measure the size of a line in a range before committing to building the
/// line's frame.
///
/// At most two paragraph items must be created individually for this line: The
/// first and last one since they may be broken apart by the start or end of the
/// line, respectively. But even those can partially reuse previous results when
/// the break index is safe-to-break per rustybuzz.
struct Line<'a> {
/// Bidi information about the paragraph.
bidi: &'a BidiInfo<'a>,
/// The trimmed range the line spans in the paragraph.
trimmed: Range,
/// The untrimmed end where the line ends.
end: usize,
/// A reshaped text item if the line sliced up a text item at the start.
first: Option<Item<'a>>,
/// Inner items which don't need to be reprocessed.
inner: &'a [Item<'a>],
/// A reshaped text item if the line sliced up a text item at the end. If
/// there is only one text item, this takes precedence over `first`.
last: Option<Item<'a>>,
/// The width of the line.
width: Abs,
/// Whether the line should be justified.
justify: bool,
/// Whether the line ends with a hyphen or dash, either naturally or through
/// hyphenation.
dash: bool,
}
impl<'a> Line<'a> {
/// Iterate over the line's items.
fn items(&self) -> impl Iterator<Item = &Item<'a>> {
self.first.iter().chain(self.inner).chain(&self.last)
}
/// Return items that intersect the given `text_range`.
fn slice(&self, text_range: Range) -> impl Iterator<Item = &Item<'a>> {
let mut cursor = self.trimmed.start;
let mut start = 0;
let mut end = 0;
for (i, item) in self.items().enumerate() {
if cursor <= text_range.start {
start = i;
}
let len = item.len();
if cursor < text_range.end || cursor + len <= text_range.end {
end = i + 1;
} else {
break;
}
cursor += len;
}
self.items().skip(start).take(end - start)
}
/// How many glyphs are in the text where we can insert additional
/// space when encountering underfull lines.
fn justifiables(&self) -> usize {
let mut count = 0;
for shaped in self.items().filter_map(Item::text) {
count += shaped.justifiables();
}
// CJK character at line end should not be adjusted.
if self
.items()
.last()
.and_then(Item::text)
.map(|s| s.cjk_justifiable_at_last())
.unwrap_or(false)
{
count -= 1;
}
count
}
/// How much can the line stretch
fn stretchability(&self) -> Abs {
self.items().filter_map(Item::text).map(|s| s.stretchability()).sum()
}
/// How much can the line shrink
fn shrinkability(&self) -> Abs {
self.items().filter_map(Item::text).map(|s| s.shrinkability()).sum()
}
/// The sum of fractions in the line.
fn fr(&self) -> Fr {
self.items()
.filter_map(|item| match item {
Item::Fractional(fr, _) => Some(*fr),
_ => None,
})
.sum()
}
}
/// Collect all text of the paragraph into one string. This also performs
/// string-level preprocessing like case transformations.
#[allow(clippy::type_complexity)]
fn collect<'a>(
children: &'a [Content],
styles: &'a StyleChain<'a>,
consecutive: bool,
) -> SourceResult<(String, Vec<(Segment<'a>, StyleChain<'a>)>, SpanMapper)> {
let mut full = String::new();
let mut quoter = Quoter::new();
let mut segments = vec![];
let mut spans = SpanMapper::new();
let mut iter = children.iter().peekable();
let first_line_indent = ParElem::first_line_indent_in(*styles);
if !first_line_indent.is_zero()
&& consecutive
&& AlignElem::alignment_in(*styles).x.resolve(*styles)
== TextElem::dir_in(*styles).start().into()
{
full.push(SPACING_REPLACE);
segments.push((Segment::Spacing(first_line_indent.into()), *styles));
}
let hang = ParElem::hanging_indent_in(*styles);
if !hang.is_zero() {
full.push(SPACING_REPLACE);
segments.push((Segment::Spacing((-hang).into()), *styles));
}
while let Some(mut child) = iter.next() {
let outer = styles;
let mut styles = *styles;
if let Some((elem, local)) = child.to_styled() {
child = elem;
styles = outer.chain(local);
}
let segment = if child.is::<SpaceElem>() {
full.push(' ');
Segment::Text(1)
} else if let Some(elem) = child.to::<TextElem>() {
let prev = full.len();
if let Some(case) = TextElem::case_in(styles) {
full.push_str(&case.apply(&elem.text()));
} else {
full.push_str(&elem.text());
}
Segment::Text(full.len() - prev)
} else if let Some(elem) = child.to::<HElem>() {
full.push(SPACING_REPLACE);
Segment::Spacing(elem.amount())
} else if let Some(elem) = child.to::<LinebreakElem>() {
let c = if elem.justify(styles) { '\u{2028}' } else { '\n' };
full.push(c);
Segment::Text(c.len_utf8())
} else if let Some(elem) = child.to::<SmartQuoteElem>() {
let prev = full.len();
if SmartQuoteElem::enabled_in(styles) {
let lang = TextElem::lang_in(styles);
let region = TextElem::region_in(styles);
let quotes = Quotes::from_lang(lang, region);
let peeked = iter.peek().and_then(|child| {
let child = if let Some((child, _)) = child.to_styled() {
child
} else {
child
};
if let Some(elem) = child.to::<TextElem>() {
elem.text().chars().next()
} else if child.is::<SmartQuoteElem>() {
Some('"')
} else if child.is::<SpaceElem>() || child.is::<HElem>() {
Some(SPACING_REPLACE)
} else {
Some(OBJ_REPLACE)
}
});
full.push_str(quoter.quote("es, elem.double(styles), peeked));
} else {
full.push(if elem.double(styles) { '"' } else { '\'' });
}
Segment::Text(full.len() - prev)
} else if let Some(elem) = child.to::<EquationElem>() {
full.push(OBJ_REPLACE);
Segment::Equation(elem)
} else if let Some(elem) = child.to::<BoxElem>() {
let frac = elem.width(styles).is_fractional();
full.push(if frac { SPACING_REPLACE } else { OBJ_REPLACE });
Segment::Box(elem, frac)
} else if child.is::<MetaElem>() {
Segment::Meta
} else {
bail!(child.span(), "unexpected paragraph child");
};
if let Some(last) = full.chars().last() {
quoter.last(last);
}
spans.push(segment.len(), child.span());
if let (Some((Segment::Text(last_len), last_styles)), Segment::Text(len)) =
(segments.last_mut(), segment)
{
if *last_styles == styles {
*last_len += len;
continue;
}
}
segments.push((segment, styles));
}
Ok((full, segments, spans))
}
/// Prepare paragraph layout by shaping the whole paragraph and layouting all
/// contained inline-level content.
fn prepare<'a>(
vt: &mut Vt,
children: &'a [Content],
text: &'a str,
segments: Vec<(Segment<'a>, StyleChain<'a>)>,
spans: SpanMapper,
styles: StyleChain<'a>,
region: Size,
) -> SourceResult<Preparation<'a>> {
let bidi = BidiInfo::new(
text,
match TextElem::dir_in(styles) {
Dir::LTR => Some(BidiLevel::ltr()),
Dir::RTL => Some(BidiLevel::rtl()),
_ => None,
},
);
let mut cursor = 0;
let mut items = vec![];
// Shape / layout the children and collect them into items.
for (segment, styles) in segments {
let end = cursor + segment.len();
match segment {
Segment::Text(_) => {
shape_range(&mut items, vt, &bidi, cursor..end, &spans, styles);
}
Segment::Spacing(spacing) => match spacing {
Spacing::Rel(v) => {
let resolved = v.resolve(styles).relative_to(region.x);
items.push(Item::Absolute(resolved));
}
Spacing::Fr(v) => {
items.push(Item::Fractional(v, None));
}
},
Segment::Equation(equation) => {
let pod = Regions::one(region, Axes::splat(false));
let mut frame = equation.layout(vt, styles, pod)?.into_frame();
frame.translate(Point::with_y(TextElem::baseline_in(styles)));
items.push(Item::Frame(frame));
}
Segment::Box(elem, _) => {
if let Sizing::Fr(v) = elem.width(styles) {
items.push(Item::Fractional(v, Some((elem, styles))));
} else {
let pod = Regions::one(region, Axes::splat(false));
let mut frame = elem.layout(vt, styles, pod)?.into_frame();
frame.translate(Point::with_y(TextElem::baseline_in(styles)));
items.push(Item::Frame(frame));
}
}
Segment::Meta => {
let mut frame = Frame::new(Size::zero());
frame.meta(styles, true);
items.push(Item::Meta(frame));
}
}
cursor = end;
}
Ok(Preparation {
bidi,
items,
spans,
styles,
hyphenate: shared_get(styles, children, TextElem::hyphenate_in),
lang: shared_get(styles, children, TextElem::lang_in),
align: AlignElem::alignment_in(styles).x.resolve(styles),
justify: ParElem::justify_in(styles),
hang: ParElem::hanging_indent_in(styles),
})
}
/// Group a range of text by BiDi level and script, shape the runs and generate
/// items for them.
fn shape_range<'a>(
items: &mut Vec<Item<'a>>,
vt: &Vt,
bidi: &BidiInfo<'a>,
range: Range,
spans: &SpanMapper,
styles: StyleChain<'a>,
) {
let mut process = |range: Range, level: BidiLevel| {
let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL };
let shaped = shape(vt, range.start, &bidi.text[range], spans, styles, dir);
items.push(Item::Text(shaped));
};
let mut prev_level = BidiLevel::ltr();
let mut prev_script = Script::Unknown;
let mut cursor = range.start;
// Group by embedding level and script.
for i in range.clone() {
if !bidi.text.is_char_boundary(i) {
continue;
}
let level = bidi.levels[i];
let script =
bidi.text[i..].chars().next().map_or(Script::Unknown, |c| c.script());
if level != prev_level || !is_compatible(script, prev_script) {
if cursor < i {
process(cursor..i, prev_level);
}
cursor = i;
prev_level = level;
prev_script = script;
} else if is_generic_script(prev_script) {
prev_script = script;
}
}
process(cursor..range.end, prev_level);
}
/// Whether this is not a specific script.
fn is_generic_script(script: Script) -> bool {
matches!(script, Script::Unknown | Script::Common | Script::Inherited)
}
/// Whether these script can be part of the same shape run.
fn is_compatible(a: Script, b: Script) -> bool {
is_generic_script(a) || is_generic_script(b) || a == b
}
/// Get a style property, but only if it is the same for all children of the
/// paragraph.
fn shared_get<T: PartialEq>(
styles: StyleChain<'_>,
children: &[Content],
getter: fn(StyleChain) -> T,
) -> Option<T> {
let value = getter(styles);
children
.iter()
.filter_map(|child| child.to_styled())
.all(|(_, local)| getter(styles.chain(local)) == value)
.then_some(value)
}
/// Find suitable linebreaks.
fn linebreak<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec<Line<'a>> {
let linebreaks = ParElem::linebreaks_in(p.styles).unwrap_or_else(|| {
if ParElem::justify_in(p.styles) {
Linebreaks::Optimized
} else {
Linebreaks::Simple
}
});
match linebreaks {
Linebreaks::Simple => linebreak_simple(vt, p, width),
Linebreaks::Optimized => linebreak_optimized(vt, p, width),
}
}
/// Perform line breaking in simple first-fit style. This means that we build
/// lines greedily, always taking the longest possible line. This may lead to
/// very unbalanced line, but is fast and simple.
fn linebreak_simple<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec<Line<'a>> {
let mut lines = vec![];
let mut start = 0;
let mut last = None;
for (end, mandatory, hyphen) in breakpoints(p) {
// Compute the line and its size.
let mut attempt = line(vt, p, start..end, mandatory, hyphen);
// If the line doesn't fit anymore, we push the last fitting attempt
// into the stack and rebuild the line from the attempt's end. The
// resulting line cannot be broken up further.
if !width.fits(attempt.width) {
if let Some((last_attempt, last_end)) = last.take() {
lines.push(last_attempt);
start = last_end;
attempt = line(vt, p, start..end, mandatory, hyphen);
}
}
// Finish the current line if there is a mandatory line break (i.e.
// due to "\n") or if the line doesn't fit horizontally already
// since then no shorter line will be possible.
if mandatory || !width.fits(attempt.width) {
lines.push(attempt);
start = end;
last = None;
} else {
last = Some((attempt, end));
}
}
if let Some((line, _)) = last {
lines.push(line);
}
lines
}
/// Perform line breaking in optimized Knuth-Plass style. Here, we use more
/// context to determine the line breaks than in the simple first-fit style. For
/// example, we might choose to cut a line short even though there is still a
/// bit of space to improve the fit of one of the following lines. The
/// Knuth-Plass algorithm is based on the idea of "cost". A line which has a
/// very tight or very loose fit has a higher cost than one that is just right.
/// Ending a line with a hyphen incurs extra cost and endings two successive
/// lines with hyphens even more.
///
/// To find the layout with the minimal total cost the algorithm uses dynamic
/// programming: For each possible breakpoint it determines the optimal
/// paragraph layout _up to that point_. It walks over all possible start points
/// for a line ending at that point and finds the one for which the cost of the
/// line plus the cost of the optimal paragraph up to the start point (already
/// computed and stored in dynamic programming table) is minimal. The final
/// result is simply the layout determined for the last breakpoint at the end of
/// text.
fn linebreak_optimized<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec<Line<'a>> {
/// The cost of a line or paragraph layout.
type Cost = f64;
/// An entry in the dynamic programming table.
struct Entry<'a> {
pred: usize,
total: Cost,
line: Line<'a>,
}
// Cost parameters.
const HYPH_COST: Cost = 0.5;
const CONSECUTIVE_DASH_COST: Cost = 300.0;
const MAX_COST: Cost = 1_000_000.0;
const MIN_RATIO: f64 = -1.0;
// Dynamic programming table.
let mut active = 0;
let mut table = vec![Entry {
pred: 0,
total: 0.0,
line: line(vt, p, 0..0, false, false),
}];
let em = TextElem::size_in(p.styles);
for (end, mandatory, hyphen) in breakpoints(p) {
let k = table.len();
let eof = end == p.bidi.text.len();
let mut best: Option<Entry> = None;
// Find the optimal predecessor.
for (i, pred) in table.iter_mut().enumerate().skip(active) {
// Layout the line.
let start = pred.line.end;
let attempt = line(vt, p, start..end, mandatory, hyphen);
// Determine how much the line's spaces would need to be stretched
// to make it the desired width.
let delta = width - attempt.width;
// Determine how much stretch are permitted.
let adjust = if delta >= Abs::zero() {
attempt.stretchability()
} else {
attempt.shrinkability()
};
// Ideally, the ratio should between -1.0 and 1.0, but sometimes a value above 1.0
// is possible, in which case the line is underfull.
let mut ratio = delta / adjust;
if ratio.is_nan() {
// The line is not stretchable, but it just fits.
// This often happens with monospace fonts and CJK texts.
ratio = 0.0;
}
if ratio.is_infinite() {
// The line's not stretchable, we calculate the ratio in another way...
ratio = delta / (em / 2.0);
// ...and because it is underfull/overfull, make sure the ratio is at least 1.0.
if ratio > 0.0 {
ratio += 1.0;
} else {
ratio -= 1.0;
}
}
// Determine the cost of the line.
let min_ratio = if p.justify { MIN_RATIO } else { 0.0 };
let mut cost = if ratio < min_ratio {
// The line is overfull. This is the case if
// - justification is on, but we'd need to shrink too much
// - justification is off and the line just doesn't fit
// Since any longer line will also be overfull, we can deactivate
// this breakpoint.
active = i + 1;
MAX_COST
} else if mandatory || eof {
// This is a mandatory break and the line is not overfull, so
// all breakpoints before this one become inactive since no line
// can span above the mandatory break.
active = k;
// If ratio > 0, we need to stretch the line only when justify is needed.
// If ratio < 0, we always need to shrink the line.
if (ratio > 0.0 && attempt.justify) || ratio < 0.0 {
ratio.powi(3).abs()
} else {
0.0
}
} else {
// Normal line with cost of |ratio^3|.
ratio.powi(3).abs()
};
// Penalize hyphens.
if hyphen {
cost += HYPH_COST;
}
// In Knuth paper, cost = (1 + 100|r|^3 + p)^2 + a,
// where r is the ratio, p=50 is penaty, and a=3000 is consecutive penaty.
// We divide the whole formula by 10, resulting (0.01 + |r|^3 + p)^2 + a,
// where p=0.5 and a=300
cost = (0.01 + cost).powi(2);
// Penalize two consecutive dashes (not necessarily hyphens) extra.
if attempt.dash && pred.line.dash {
cost += CONSECUTIVE_DASH_COST;
}
// The total cost of this line and its chain of predecessors.
let total = pred.total + cost;
// If this attempt is better than what we had before, take it!
if best.as_ref().map_or(true, |best| best.total >= total) {
best = Some(Entry { pred: i, total, line: attempt });
}
}
table.push(best.unwrap());
}
// Retrace the best path.
let mut lines = vec![];
let mut idx = table.len() - 1;
while idx != 0 {
table.truncate(idx + 1);
let entry = table.pop().unwrap();
lines.push(entry.line);
idx = entry.pred;
}
lines.reverse();
lines
}
/// Determine all possible points in the text where lines can broken.
///
/// Returns for each breakpoint the text index, whether the break is mandatory
/// (after `\n`) and whether a hyphen is required (when breaking inside of a
/// word).
fn breakpoints<'a>(p: &'a Preparation<'a>) -> Breakpoints<'a> {
Breakpoints {
p,
linebreaks: LineBreakIterator::new(p.bidi.text),
syllables: None,
offset: 0,
suffix: 0,
end: 0,
mandatory: false,
}
}
/// An iterator over the line break opportunities in a text.
struct Breakpoints<'a> {
/// The paragraph's items.
p: &'a Preparation<'a>,
/// The inner iterator over the unicode line break opportunities.
linebreaks: LineBreakIterator<'a>,
/// Iterator over syllables of the current word.
syllables: Option<hypher::Syllables<'a>>,
/// The current text offset.
offset: usize,
/// The trimmed end of the current word.
suffix: usize,
/// The untrimmed end of the current word.
end: usize,
/// Whether the break after the current word is mandatory.
mandatory: bool,
}
impl Iterator for Breakpoints<'_> {
type Item = (usize, bool, bool);
fn next(&mut self) -> Option<Self::Item> {
// If we're currently in a hyphenated "word", process the next syllable.
if let Some(syllable) = self.syllables.as_mut().and_then(Iterator::next) {
self.offset += syllable.len();
if self.offset == self.suffix {
self.offset = self.end;
}
// Filter out hyphenation opportunities where hyphenation was
// actually disabled.
let hyphen = self.offset < self.end;
if hyphen && !self.hyphenate(self.offset) {
return self.next();
}
return Some((self.offset, self.mandatory && !hyphen, hyphen));
}
// Get the next "word".
(self.end, self.mandatory) = self.linebreaks.next()?;
// Hyphenate the next word.
if self.p.hyphenate != Some(false) {
if let Some(lang) = self.lang(self.offset) {
let word = &self.p.bidi.text[self.offset..self.end];
let trimmed = word.trim_end_matches(|c: char| !c.is_alphabetic());
if !trimmed.is_empty() {
self.suffix = self.offset + trimmed.len();
self.syllables = Some(hypher::hyphenate(trimmed, lang));
return self.next();
}
}
}
self.offset = self.end;
Some((self.end, self.mandatory, false))
}
}
impl Breakpoints<'_> {
/// Whether hyphenation is enabled at the given offset.
fn hyphenate(&self, offset: usize) -> bool {
self.p
.hyphenate
.or_else(|| {
let shaped = self.p.find(offset)?.text()?;
Some(TextElem::hyphenate_in(shaped.styles))
})
.unwrap_or(false)
}
/// The text language at the given offset.
fn lang(&self, offset: usize) -> Option<hypher::Lang> {
let lang = self.p.lang.or_else(|| {
let shaped = self.p.find(offset)?.text()?;
Some(TextElem::lang_in(shaped.styles))
})?;
let bytes = lang.as_str().as_bytes().try_into().ok()?;
hypher::Lang::from_iso(bytes)
}
}
/// Create a line which spans the given range.
fn line<'a>(
vt: &Vt,
p: &'a Preparation,
mut range: Range,
mandatory: bool,
hyphen: bool,
) -> Line<'a> {
let end = range.end;
let mut justify = p.justify && end < p.bidi.text.len() && !mandatory;
if range.is_empty() {
return Line {
bidi: &p.bidi,
end,
trimmed: range,
first: None,
inner: &[],
last: None,
width: Abs::zero(),
justify,
dash: false,
};
}
// Slice out the relevant items.
let (expanded, mut inner) = p.slice(range.clone());
let mut width = Abs::zero();
// Reshape the last item if it's split in half or hyphenated.
let mut last = None;
let mut dash = false;
if let Some((Item::Text(shaped), before)) = inner.split_last() {
// Compute the range we want to shape, trimming whitespace at the
// end of the line.
let base = expanded.end - shaped.text.len();
let start = range.start.max(base);
let text = &p.bidi.text[start..range.end];
let trimmed = text.trim_end();
range.end = start + trimmed.len();
// Deal with hyphens, dashes and justification.
let shy = trimmed.ends_with('\u{ad}');
dash = hyphen || shy || trimmed.ends_with(['-', '–', '—']);
justify |= text.ends_with('\u{2028}');
// Usually, we don't want to shape an empty string because:
// - We don't want the height of trimmed whitespace in a different
// font to be considered for the line height.
// - Even if it's in the same font, its unnecessary.
//
// There is one exception though. When the whole line is empty, we
// need the shaped empty string to make the line the appropriate
// height. That is the case exactly if the string is empty and there
// are no other items in the line.
if hyphen || start + shaped.text.len() > range.end {
if hyphen || start < range.end || before.is_empty() {
let mut reshaped = shaped.reshape(vt, &p.spans, start..range.end);
if hyphen || shy {
reshaped.push_hyphen(vt);
}
width += reshaped.width;
last = Some(Item::Text(reshaped));
}
inner = before;
}
}
// Reshape the start item if it's split in half.
let mut first = None;
if let Some((Item::Text(shaped), after)) = inner.split_first() {
// Compute the range we want to shape.
let base = expanded.start;
let end = range.end.min(base + shaped.text.len());
// Reshape if necessary.
if range.start + shaped.text.len() > end {
if range.start < end {
let reshaped = shaped.reshape(vt, &p.spans, range.start..end);
width += reshaped.width;
first = Some(Item::Text(reshaped));
}
inner = after;
}
}
// Measure the inner items.
for item in inner {
width += item.width();
}
Line {
bidi: &p.bidi,
trimmed: range,
end,
first,
inner,
last,
width,
justify,
dash,
}
}
/// Combine layouted lines into one frame per region.
fn finalize(
vt: &mut Vt,
p: &Preparation,
lines: &[Line],
region: Size,
expand: bool,
) -> SourceResult<Fragment> {
// Determine the paragraph's width: Full width of the region if we
// should expand or there's fractional spacing, fit-to-width otherwise.
let width = if !region.x.is_finite()
|| (!expand && lines.iter().all(|line| line.fr().is_zero()))
{
p.hang + lines.iter().map(|line| line.width).max().unwrap_or_default()
} else {
region.x
};
// Stack the lines into one frame per region.
let mut frames: Vec<Frame> = lines
.iter()
.map(|line| commit(vt, p, line, width, region.y))
.collect::<SourceResult<_>>()?;
// Prevent orphans.
let leading = ParElem::leading_in(p.styles);
if frames.len() >= 2 && !frames[1].is_empty() {
let second = frames.remove(1);
let first = &mut frames[0];
merge(first, second, leading);
}
// Prevent widows.
let len = frames.len();
if len >= 2 && !frames[len - 2].is_empty() {
let second = frames.pop().unwrap();
let first = frames.last_mut().unwrap();
merge(first, second, leading);
}
Ok(Fragment::frames(frames))
}
/// Merge two line frames
fn merge(first: &mut Frame, second: Frame, leading: Abs) {
let offset = first.height() + leading;
let total = offset + second.height();
first.push_frame(Point::with_y(offset), second);
first.size_mut().y = total;
}
/// Commit to a line and build its frame.
fn commit(
vt: &mut Vt,
p: &Preparation,
line: &Line,
width: Abs,
full: Abs,
) -> SourceResult<Frame> {
let mut remaining = width - line.width - p.hang;
let mut offset = Abs::zero();
// Reorder the line from logical to visual order.
let (reordered, starts_rtl) = reorder(line);
if !starts_rtl {
offset += p.hang;
}
// Handle hanging punctuation to the left.
if let Some(Item::Text(text)) = reordered.first() {
if let Some(glyph) = text.glyphs.first() {
if !text.dir.is_positive()
&& TextElem::overhang_in(text.styles)
&& (reordered.len() > 1 || text.glyphs.len() > 1)
{
let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
offset -= amount;
remaining += amount;
}
}
}
// Handle hanging punctuation to the right.
if let Some(Item::Text(text)) = reordered.last() {
if let Some(glyph) = text.glyphs.last() {
if text.dir.is_positive()
&& TextElem::overhang_in(text.styles)
&& (reordered.len() > 1 || text.glyphs.len() > 1)
{
let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
remaining += amount;
}
}
}
// Determine how much additional space is needed.
// The justicication_ratio is for the first step justification,
// extra_justification is for the last step.
// For more info on multi-step justification, see Procedures for Inter-
// Character Space Expansion in W3C document Chinese Layout Requirements.
let fr = line.fr();
let mut justification_ratio = 0.0;
let mut extra_justification = Abs::zero();
let shrink = line.shrinkability();
let stretch = line.stretchability();
if remaining < Abs::zero() && shrink > Abs::zero() {
// Attempt to reduce the length of the line, using shrinkability.
justification_ratio = (remaining / shrink).max(-1.0);
remaining = (remaining + shrink).min(Abs::zero());
} else if line.justify && fr.is_zero() {
// Attempt to increase the length of the line, using stretchability.
if stretch > Abs::zero() {
justification_ratio = (remaining / stretch).min(1.0);
remaining = (remaining - stretch).max(Abs::zero());
}
let justifiables = line.justifiables();
if justifiables > 0 && remaining > Abs::zero() {
// Underfull line, distribute the extra space.
extra_justification = remaining / justifiables as f64;
remaining = Abs::zero();
}
}
let mut top = Abs::zero();
let mut bottom = Abs::zero();
// Build the frames and determine the height and baseline.
let mut frames = vec![];
for item in reordered {
let mut push = |offset: &mut Abs, frame: Frame| {
let width = frame.width();
top.set_max(frame.baseline());
bottom.set_max(frame.size().y - frame.baseline());
frames.push((*offset, frame));
*offset += width;
};
match item {
Item::Absolute(v) => {
offset += *v;
}
Item::Fractional(v, elem) => {
let amount = v.share(fr, remaining);
if let Some((elem, styles)) = elem {
let region = Size::new(amount, full);
let pod = Regions::one(region, Axes::new(true, false));
let mut frame = elem.layout(vt, *styles, pod)?.into_frame();
frame.translate(Point::with_y(TextElem::baseline_in(*styles)));
push(&mut offset, frame);
} else {
offset += amount;
}
}
Item::Text(shaped) => {
let frame = shaped.build(vt, justification_ratio, extra_justification);
push(&mut offset, frame);
}
Item::Frame(frame) | Item::Meta(frame) => {
push(&mut offset, frame.clone());
}
}
}
// Remaining space is distributed now.
if !fr.is_zero() {
remaining = Abs::zero();
}
let size = Size::new(width, top + bottom);
let mut output = Frame::new(size);
output.set_baseline(top);
// Construct the line's frame.
for (offset, frame) in frames {
let x = offset + p.align.position(remaining);
let y = top - frame.baseline();
output.push_frame(Point::new(x, y), frame);
}
Ok(output)
}
/// Return a line's items in visual order.
fn reorder<'a>(line: &'a Line<'a>) -> (Vec<&Item<'a>>, bool) {
let mut reordered = vec![];
// The bidi crate doesn't like empty lines.
if line.trimmed.is_empty() {
return (line.slice(line.trimmed.clone()).collect(), false);
}
// Find the paragraph that contains the line.
let para = line
.bidi
.paragraphs
.iter()
.find(|para| para.range.contains(&line.trimmed.start))
.unwrap();
// Compute the reordered ranges in visual order (left to right).
let (levels, runs) = line.bidi.visual_runs(para, line.trimmed.clone());
let starts_rtl = levels.first().map_or(false, |level| level.is_rtl());
// Collect the reordered items.
for run in runs {
// Skip reset L1 runs because handling them would require reshaping
// again in some cases.
if line.bidi.levels[run.start] != levels[run.start] {
continue;
}
let prev = reordered.len();
reordered.extend(line.slice(run.clone()));
if levels[run.start].is_rtl() {
reordered[prev..].reverse();
}
}
(reordered, starts_rtl)
}
/// How much a character should hang into the end margin.
///
/// For more discussion, see:
/// https://recoveringphysicist.com/21/
fn overhang(c: char) -> f64 {
match c {
// Dashes.
'–' | '—' => 0.2,
'-' => 0.55,
// Punctuation.
'.' | ',' => 0.8,
':' | ';' => 0.3,
// Arabic and Ideographic
'\u{60C}' | '\u{6D4}' => 0.4,
'\u{3001}' | '\u{3002}' => 1.0,
_ => 0.0,
}
}
|