Class Encoding
java.lang.Object
org.jcodings.Encoding
- All Implemented Interfaces:
Cloneable
- Direct Known Subclasses:
AbstractEncoding
-
Field Summary
FieldsModifier and TypeFieldDescriptionstatic final intprivate Charsetprivate static intprivate intprivate intprivate booleanprivate booleanprivate final booleanprivate final booleanprotected booleanprotected booleanprotected final intprotected final intprivate byte[]static final byteprivate String -
Constructor Summary
Constructors -
Method Summary
Modifier and TypeMethodDescriptionabstract voidapplyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, Object arg) Expand case folds given a character class (used for case insensitive matching)static byteasciiToLower(int c) static byteasciiToUpper(int c) abstract CaseFoldCodeItem[]caseFoldCodesByString(int flag, byte[] bytes, int p, int end) Expand AST string nodes into their folded alternatives (look at:Analyser.expandCaseFoldString) Oniguruma equivalent:get_case_fold_codes_by_strabstract intOniguruma equivalent:case_mapabstract intcodeToMbc(int code, byte[] bytes, int p) Extracts code point into it's multibyte representationabstract intcodeToMbcLength(int code) Returns character length given a code point Oniguruma equivalent:code_to_mbclenabstract int[]ctypeCodeRange(int ctype, IntHolder sbOut) Returns code range for a given character type Oniguruma equivalent:get_ctype_code_rangestatic intdigitVal(int code) final booleanIf this encoding is capable of being represented by a Java Charset then provide it.The name of the equivalent Java Charset for this encoding.final intgetIndex()final byte[]getName()final inthashCode()final booleanisAlnum(int code) final booleanisAlpha(int code) static booleanisAscii(byte b) static booleanisAscii(int code) final booleanfinal booleanisBlank(int code) final booleanisCntrl(int code) abstract booleanisCodeCType(int code, int ctype) Perform a check whether given code is of given character type (e.g.final booleanisDigit(int code) final booleanisDummy()final booleanfinal booleanisGraph(int code) final booleanisLower(int code) static booleanisMbcAscii(byte b) booleanisMbcCrnl(byte[] bytes, int p, int end) final booleanisMbcHead(byte[] bytes, int p, int end) final booleanisMbcWord(byte[] bytes, int p, int end) abstract booleanisNewLine(byte[] bytes, int p, int end) Returns true ifbytes[p]is a head of a new line character Oniguruma equivalent:is_mbc_newlinefinal booleanisNewLine(int code) final booleanisPrint(int code) final booleanisPunct(int code) abstract booleanisReverseMatchAllowed(byte[] bytes, int p, int end) Returns true if it's safe to use reversal Boyer-Moore search fail fast algorithm Oniguruma equivalent:is_allowed_reverse_matchfinal booleanisSbWord(int code) final booleanfinal booleanisSpace(int code) final booleanfinal booleanisUpper(int code) final booleanisUTF8()final booleanisWord(int code) static booleanisWordGraphPrint(int ctype) final booleanisXDigit(int code) abstract intleftAdjustCharHead(byte[] bytes, int p, int s, int end) Seeks the previous character head in a stream Oniguruma equivalent:left_adjust_char_headabstract intlength(byte c) Returns character length given character head returns1for singlebyte encodings or performs direct length table lookup for multibyte ones.abstract intlength(byte[] bytes, int p, int end) Returns character length given stream, character position and stream end returns1for singlebyte encodings or performs sanity validations for multibyte ones and returns the character length, missing characters in the stream otherwisestatic Encodingstatic Encodingfinal intReturns maximum character byte length that can appear in an encoding Oniguruma equivalent:max_enc_lenfinal intDeprecated.abstract intmbcCaseFold(int flag, byte[] bytes, IntHolder pp, int end, byte[] to) Performs case folding for a character atbytes[pp.value]final intDeprecated.abstract intmbcToCode(byte[] bytes, int p, int end) Returns code point for a character Oniguruma equivalent:mbc_to_codefinal intReturns minimum character byte length that can appear in an encoding Oniguruma equivalent:min_enc_lenstatic intodigitVal(int code) final intprevCharHead(byte[] bytes, int p, int s, int end) abstract intpropertyNameToCType(byte[] bytes, int p, int end) Returns character type given character type name (used when e.g.(package private) Encodingreplicate(byte[] name) final intrightAdjustCharHead(byte[] bytes, int p, int s, int end) final intrightAdjustCharHeadWithPrev(byte[] bytes, int p, int s, int end, IntHolder prev) protected final voidsetDummy()protected final voidsetName(byte[] name) protected final voidfinal intstep(byte[] bytes, int p, int end, int n) final intstepBack(byte[] bytes, int p, int s, int end, int n) final intstrByteLengthNull(byte[] bytes, int p, int end) abstract intstrCodeAt(byte[] bytes, int p, int end, int index) abstract intstrLength(byte[] bytes, int p, int end) final intstrLengthNull(byte[] bytes, int p, int end) final intstrNCmp(byte[] bytes, int p, int end, byte[] ascii, int asciiP, int n) byte[]Returns lower case table if it's safe to use it directly, otherwisenullUsed for fast case insensitive matching for some singlebyte encodingsfinal StringtoString()final intxdigitVal(int code)
-
Field Details
-
CHAR_INVALID
public static final int CHAR_INVALID- See Also:
-
count
private static int count -
minLength
protected final int minLength -
maxLength
protected final int maxLength -
isFixedWidth
private final boolean isFixedWidth -
isSingleByte
private final boolean isSingleByte -
isAsciiCompatible
private boolean isAsciiCompatible -
isUnicode
protected boolean isUnicode -
isUTF8
protected boolean isUTF8 -
name
private byte[] name -
hashCode
private int hashCode -
index
private int index -
charset
-
isDummy
private boolean isDummy -
stringName
-
NEW_LINE
public static final byte NEW_LINE- See Also:
-
-
Constructor Details
-
Encoding
-
-
Method Details
-
setName
-
setName
protected final void setName(byte[] name) -
setDummy
protected final void setDummy() -
toString
-
equals
-
hashCode
-
getIndex
public final int getIndex() -
getName
public final byte[] getName() -
isDummy
public final boolean isDummy() -
isAsciiCompatible
public final boolean isAsciiCompatible() -
isUnicode
public final boolean isUnicode() -
isUTF8
public final boolean isUTF8() -
getCharset
If this encoding is capable of being represented by a Java Charset then provide it. Otherwise this will raise a CharsetNotFound error via the JDK APIs. To reduce cases like jruby/jruby#4716, we always attempt to find a charset here, and default to using the encoding name which is never null. Either the encoding will exist in the JDK or it will fail hard, rather than propagating a null Charset. Encodings with names different than those found in the JDK can override this getCharsetName to provide that name or getCharset to return the right Charset. -
getCharsetName
The name of the equivalent Java Charset for this encoding. Defaults to the name of the encoding. Subclasses can override this to provide a different name.- Returns:
- the name of the equivalent Java Charset for this encoding
-
replicate
-
length
public abstract int length(byte c) Returns character length given character head returns1for singlebyte encodings or performs direct length table lookup for multibyte ones.- Parameters:
c- Character head Oniguruma equivalent:mbc_enc_lenTo be deprecated very soon (use length(byte[]bytes, int p, int end) version)
-
length
public abstract int length(byte[] bytes, int p, int end) Returns character length given stream, character position and stream end returns1for singlebyte encodings or performs sanity validations for multibyte ones and returns the character length, missing characters in the stream otherwise- Returns:
- 0 Never
> 0 Valid character, length returned
-1 Illegal/malformed character
< -1 (-1 - n) Number of missing bytes for character in p...end range
Oniguruma equivalent:
mbc_enc_lenmodified for 1.9 purposes,
-
maxLength
public final int maxLength()Returns maximum character byte length that can appear in an encoding Oniguruma equivalent:max_enc_len -
maxLengthDistance
Deprecated. -
minLength
public final int minLength()Returns minimum character byte length that can appear in an encoding Oniguruma equivalent:min_enc_len -
isNewLine
public abstract boolean isNewLine(byte[] bytes, int p, int end) Returns true ifbytes[p]is a head of a new line character Oniguruma equivalent:is_mbc_newline -
mbcToCode
public abstract int mbcToCode(byte[] bytes, int p, int end) Returns code point for a character Oniguruma equivalent:mbc_to_code -
codeToMbcLength
public abstract int codeToMbcLength(int code) Returns character length given a code point Oniguruma equivalent:code_to_mbclen -
codeToMbc
public abstract int codeToMbc(int code, byte[] bytes, int p) Extracts code point into it's multibyte representation- Returns:
- character length for the given code point
Oniguruma equivalent:
code_to_mbc
-
mbcCaseFold
Performs case folding for a character atbytes[pp.value]- Parameters:
flag- case fold flagpp- anIntHolderthat points at character headto- a buffer where to extract case folded character Oniguruma equivalent:mbc_case_fold
-
toLowerCaseTable
public byte[] toLowerCaseTable()Returns lower case table if it's safe to use it directly, otherwisenullUsed for fast case insensitive matching for some singlebyte encodings- Returns:
- lower case table
-
applyAllCaseFold
Expand case folds given a character class (used for case insensitive matching)- Parameters:
flag- case fold flagfun- case folding functor (look at:ApplyCaseFold)arg- case folding functor argument (look at:ApplyCaseFoldArg) Oniguruma equivalent:apply_all_case_fold
-
caseFoldCodesByString
Expand AST string nodes into their folded alternatives (look at:Analyser.expandCaseFoldString) Oniguruma equivalent:get_case_fold_codes_by_str -
propertyNameToCType
public abstract int propertyNameToCType(byte[] bytes, int p, int end) Returns character type given character type name (used when e.g. \p{Alpha}) Oniguruma equivalent:property_name_to_ctype -
isCodeCType
public abstract boolean isCodeCType(int code, int ctype) Perform a check whether given code is of given character type (e.g. used by isWord(someByte) and similar methods)- Parameters:
code- a code point of a characterctype- a character type to check against Oniguruma equivalent:is_code_ctype
-
ctypeCodeRange
Returns code range for a given character type Oniguruma equivalent:get_ctype_code_range -
leftAdjustCharHead
public abstract int leftAdjustCharHead(byte[] bytes, int p, int s, int end) Seeks the previous character head in a stream Oniguruma equivalent:left_adjust_char_head- Parameters:
bytes- byte streamp- positions- stopend- end
-
isReverseMatchAllowed
public abstract boolean isReverseMatchAllowed(byte[] bytes, int p, int end) Returns true if it's safe to use reversal Boyer-Moore search fail fast algorithm Oniguruma equivalent:is_allowed_reverse_match -
caseMap
-
rightAdjustCharHead
public final int rightAdjustCharHead(byte[] bytes, int p, int s, int end) -
rightAdjustCharHeadWithPrev
-
prevCharHead
public final int prevCharHead(byte[] bytes, int p, int s, int end) -
stepBack
public final int stepBack(byte[] bytes, int p, int s, int end, int n) -
step
public final int step(byte[] bytes, int p, int end, int n) -
strLength
public abstract int strLength(byte[] bytes, int p, int end) -
strCodeAt
public abstract int strCodeAt(byte[] bytes, int p, int end, int index) -
strLengthNull
public final int strLengthNull(byte[] bytes, int p, int end) -
strByteLengthNull
public final int strByteLengthNull(byte[] bytes, int p, int end) -
strNCmp
public final int strNCmp(byte[] bytes, int p, int end, byte[] ascii, int asciiP, int n) -
isNewLine
public final boolean isNewLine(int code) -
isGraph
public final boolean isGraph(int code) -
isPrint
public final boolean isPrint(int code) -
isAlnum
public final boolean isAlnum(int code) -
isAlpha
public final boolean isAlpha(int code) -
isLower
public final boolean isLower(int code) -
isUpper
public final boolean isUpper(int code) -
isCntrl
public final boolean isCntrl(int code) -
isPunct
public final boolean isPunct(int code) -
isSpace
public final boolean isSpace(int code) -
isBlank
public final boolean isBlank(int code) -
isDigit
public final boolean isDigit(int code) -
isXDigit
public final boolean isXDigit(int code) -
isWord
public final boolean isWord(int code) -
isMbcWord
public final boolean isMbcWord(byte[] bytes, int p, int end) -
isSbWord
public final boolean isSbWord(int code) -
isMbcHead
public final boolean isMbcHead(byte[] bytes, int p, int end) -
isMbcCrnl
public boolean isMbcCrnl(byte[] bytes, int p, int end) -
digitVal
public static int digitVal(int code) -
odigitVal
public static int odigitVal(int code) -
xdigitVal
public final int xdigitVal(int code) -
isMbcAscii
public static boolean isMbcAscii(byte b) -
isAscii
public static boolean isAscii(int code) -
isAscii
public static boolean isAscii(byte b) -
asciiToLower
public static byte asciiToLower(int c) -
asciiToUpper
public static byte asciiToUpper(int c) -
isWordGraphPrint
public static boolean isWordGraphPrint(int ctype) -
mbcodeStartPosition
Deprecated. -
isSingleByte
public final boolean isSingleByte() -
isFixedWidth
public final boolean isFixedWidth() -
load
-
load
-