#include <TextExtractor.h>
TextExtractor::Word object represents a word on a PDF page. Each word contains a sequence of characters in one or more styles (see TextExtractor::Style).
Definition at line 430 of file TextExtractor.h.
pdftron::PDF::Word::Word |
( |
| ) |
|
Rect pdftron::PDF::Word::GetBBox |
( |
| ) |
|
- Parameters
-
out_bbox | The bounding box for this word (in unrotated page coordinates). |
- Note
- To account for the effect of page '/Rotate' attribute, transform all points using page.GetDefaultMatrix().
void pdftron::PDF::Word::GetBBox |
( |
double |
out_bbox[4] | ) |
|
Style pdftron::PDF::Word::GetCharStyle |
( |
int |
char_idx | ) |
|
- Parameters
-
char_idx | The index of a character in this word. |
- Returns
- The style associated with a given character.
int pdftron::PDF::Word::GetCurrentNum |
( |
| ) |
|
- Returns
- the index of this word of the current line. A word that starts the line will return 0, whereas the last word in the line will return (line.GetNumWords()-1).
std::vector<double> pdftron::PDF::Word::GetGlyphQuad |
( |
int |
glyph_idx | ) |
|
- Parameters
-
glyph_idx | The index of a glyph in this word. |
out_quad | The quadrilateral representing a tight bounding box for a given glyph in the word (in unrotated page coordinates). |
void pdftron::PDF::Word::GetGlyphQuad |
( |
int |
glyph_idx, |
|
|
double |
out_quad[8] |
|
) |
| |
Word pdftron::PDF::Word::GetNextWord |
( |
| ) |
|
- Returns
- the next word on the current line.
int pdftron::PDF::Word::GetNumGlyphs |
( |
| ) |
|
- Returns
- The number of glyphs in this word.
std::vector<double> pdftron::PDF::Word::GetQuad |
( |
| ) |
|
- Parameters
-
out_quad | The quadrilateral representing a tight bounding box for this word (in unrotated page coordinates). |
void pdftron::PDF::Word::GetQuad |
( |
double |
out_quad[8] | ) |
|
const Unicode* pdftron::PDF::Word::GetString |
( |
| ) |
|
- Returns
- the content of this word represented as a Unicode string.
int pdftron::PDF::Word::GetStringLen |
( |
| ) |
|
- Returns
- the number of characters in this word.
Style pdftron::PDF::Word::GetStyle |
( |
| ) |
|
- Returns
- predominant style for this word.
bool pdftron::PDF::Word::IsValid |
( |
| ) |
|
- Returns
- true if this is a valid word, false otherwise.
bool pdftron::PDF::Word::operator!= |
( |
const Word & |
| ) |
const |
bool pdftron::PDF::Word::operator== |
( |
const Word & |
| ) |
const |
The documentation for this class was generated from the following file: