Class: Meteor::Ml::Xhtml::ParserImpl
- Inherits:
-
Core::Kernel
- Object
- Parser
- Core::Kernel
- Meteor::Ml::Xhtml::ParserImpl
- Defined in:
- lib/meteor.rb,
lib/meteor.rb
Overview
XHTML parser (XHTMLパーサ)
Direct Known Subclasses
Constant Summary
- KAIGYO_CODE =
KAIGYO_CODE = "r?n|r"
["\r\n", "\n", "\r"]
- NBSP_2 =
' '- NBSP_3 =
'nbsp'- BR_1 =
"\r?\n|\r"- BR_2 =
'<br/>'- BR_3 =
'<br\\/>'- META =
'META'- META_S =
'meta'- OPTION =
'option'- SELECTED =
'selected'- INPUT =
'input'- CHECKED =
'checked'- RADIO =
'radio'- DISABLE_ELEMENT =
DISABLE_ELEMENT = "input|textarea|select|optgroup"
['input', 'textarea', 'select', 'optgroup']
- DISABLED =
'disabled'- READONLY_TYPE =
READONLY_TYPE = "text|password"
['text', 'password']
- TEXTAREA =
'textarea'- READONLY =
'readonly'- SELECT =
'select'- MULTIPLE =
'multiple'- SELECTED_M =
@@pattern_option = Regexp.new(OPTION) @@pattern_selected = Regexp.new(SELECTED) @@pattern_input = Regexp.new(INPUT) @@pattern_checked = Regexp.new(CHECKED) @@pattern_radio = Regexp.new(RADIO) @@pattern_disable_element = Regexp.new(DISABLE_ELEMENT) @@pattern_disabled = Regexp.new(DISABLED) @@pattern_readonly_type = Regexp.new(READONLY_TYPE) @@pattern_textarea = Regexp.new(TEXTAREA) @@pattern_readonly = Regexp.new(READONLY) @@pattern_select = Regexp.new(SELECT) @@pattern_multiple = Regexp.new(MULTIPLE)
'\\sselected="[^"]*"\\s|\\sselected="[^"]*"$'- SELECTED_M1 =
'\\sselected="([^"]*)"\\s|\\sselected="([^"]*)"$'- SELECTED_R =
'selected="[^"]*"'- SELECTED_U =
'selected="selected"'- CHECKED_M =
'\\schecked="[^"]*"\\s|\\schecked="[^"]*"$'- CHECKED_M1 =
'\\schecked="([^"]*)"\\s|\\schecked="([^"]*)"$'- CHECKED_R =
'checked="[^"]*"'- CHECKED_U =
'checked="checked"'- DISABLED_M =
'\\sdisabled="[^"]*"\\s|\\sdisabled="[^"]*"$'- DISABLED_M1 =
'\\sdisabled="([^"]*)"\\s|\\sdisabled="([^"]*)"$'- DISABLED_R =
'disabled="[^"]*"'- DISABLED_U =
'disabled="disabled"'- READONLY_M =
'\\sreadonly="[^"]*"\\s|\\sreadonly="[^"]*"$'- READONLY_M1 =
'\\sreadonly="([^"]*)"\\s|\\sreadonly="([^"]*)"$'- READONLY_R =
'readonly="[^"]*"'- READONLY_U =
'readonly="readonly"'- MULTIPLE_M =
'\\smultiple="[^"]*"\\s|\\smultiple="[^"]*"$'- MULTIPLE_M1 =
'\\smultiple="([^"]*)"\\s|\\smultiple="([^"]*)"$'- MULTIPLE_R =
'multiple="[^"]*"'- MULTIPLE_U =
'multiple="multiple"'- HTTP_EQUIV =
'http-equiv'- CONTENT_TYPE =
'Content-Type'- CONTENT =
'content'- TRUE =
'true'- FALSE =
'false'- TYPE_L =
'type'- TYPE_U =
'TYPE'- PATTERN_UNESCAPE =
'&(amp|quot|apos|gt|lt|nbsp);'- TABLE_FOR_ESCAPE_ =
{ '&' => '&', '"' => '"', '\'' => ''', '<' => '<', '>' => '>', ' ' => ' ', }
- TABLE_FOR_ESCAPE_CONTENT_ =
{ '&' => '&', '"' => '"', '\'' => ''', '<' => '<', '>' => '>', ' ' => ' ', "\r\n" => '<br/>', "\r" => '<br/>', "\n" => '<br/>', }
- PATTERN_ESCAPE =
'[&"\'<> ]'- PATTERN_ESCAPE_CONTENT =
'[&"\'<> \\n]'- @@match_tag_2 =
@@match_tag_2 = "textarea|option|pre"
['textarea', 'option', 'pre']
- @@attr_logic =
- Array
-
論理値で指定する属性
['disabled', 'readonly', 'checked', 'selected', 'multiple']
- @@pattern_selected_m =
Regexp.new(SELECTED_M)
- @@pattern_selected_m1 =
Regexp.new(SELECTED_M1)
- @@pattern_selected_r =
Regexp.new(SELECTED_R)
- @@pattern_checked_m =
Regexp.new(CHECKED_M)
- @@pattern_checked_m1 =
Regexp.new(CHECKED_M1)
- @@pattern_checked_r =
Regexp.new(CHECKED_R)
- @@pattern_disabled_m =
Regexp.new(DISABLED_M)
- @@pattern_disabled_m1 =
Regexp.new(DISABLED_M1)
- @@pattern_disabled_r =
Regexp.new(DISABLED_R)
- @@pattern_readonly_m =
Regexp.new(READONLY_M)
- @@pattern_readonly_m1 =
Regexp.new(READONLY_M1)
- @@pattern_readonly_r =
Regexp.new(READONLY_R)
- @@pattern_multiple_m =
Regexp.new(MULTIPLE_M)
- @@pattern_multiple_m1 =
Regexp.new(MULTIPLE_M1)
- @@pattern_multiple_r =
Regexp.new(MULTIPLE_R)
- @@pattern_unescape =
Regexp.new(PATTERN_UNESCAPE)
- @@pattern_br_2 =
Regexp.new(BR_3)
- @@pattern_escape =
Regexp.new(PATTERN_ESCAPE)
- @@pattern_escape_content =
Regexp.new(PATTERN_ESCAPE_CONTENT)
- @@pattern_and_1 =
Regexp.new(AND_1)
- @@pattern_lt_1 =
Regexp.new(LT_1)
- @@pattern_gt_1 =
Regexp.new(GT_1)
- @@pattern_dq_1 =
Regexp.new(DOUBLE_QUATATION)
- @@pattern_ap_1 =
Regexp.new(AP_1)
- @@pattern_space_1 =
Regexp.new(SPACE)
- @@pattern_br_1 =
Regexp.new(BR_1)
- @@pattern_lt_2 =
Regexp.new(LT_2)
- @@pattern_gt_2 =
Regexp.new(GT_2)
- @@pattern_dq_2 =
Regexp.new(QO_2)
- @@pattern_ap_2 =
Regexp.new(AP_2)
- @@pattern_space_2 =
Regexp.new(NBSP_2)
- @@pattern_and_2 =
Regexp.new(AND_2)
Constants inherited from Core::Kernel
AND_1, AND_2, AND_3, AP_1, AP_2, AP_3, ATTR_EQ, CLEAN_1, CLEAN_2, DOUBLE_QUATATION, EMPTY, EN_1, ERASE_ATTR_1, ESCAPE_ENTITY_REF, GET_ATTRS_MAP, GET_ATTR_1, GT_1, GT_2, GT_3, LT_1, LT_2, LT_3, MODE, MODE_AF, MODE_BF, MODE_UTF8, PATTERN_FIND_1, PATTERN_FIND_2_1, PATTERN_FIND_2_2, PATTERN_FIND_2_3, PATTERN_FIND_3, PATTERN_FIND_4, PATTERN_FIND_5, QO_2, QO_3, SEARCH_CX_1, SEARCH_CX_2, SEARCH_CX_3, SEARCH_CX_4, SEARCH_CX_5, SEARCH_CX_6, SET_ATTR_1, SET_CX_1, SET_CX_2, SET_CX_3, SET_CX_4, SET_MONO_1, SPACE, TAG_CLOSE, TAG_CLOSE3, TAG_OPEN, TAG_OPEN3, TAG_SEARCH_1_1, TAG_SEARCH_1_2, TAG_SEARCH_1_3, TAG_SEARCH_1_4, TAG_SEARCH_1_4_2, TAG_SEARCH_2_1, TAG_SEARCH_2_1_2, TAG_SEARCH_2_2, TAG_SEARCH_2_2_2, TAG_SEARCH_2_3, TAG_SEARCH_2_3_2, TAG_SEARCH_2_3_2_2, TAG_SEARCH_2_4, TAG_SEARCH_2_4_2, TAG_SEARCH_2_4_2_2, TAG_SEARCH_2_4_2_3, TAG_SEARCH_2_4_3, TAG_SEARCH_2_4_3_2, TAG_SEARCH_2_4_4, TAG_SEARCH_2_6, TAG_SEARCH_2_7, TAG_SEARCH_3_1, TAG_SEARCH_3_1_2, TAG_SEARCH_3_1_2_2, TAG_SEARCH_3_2, TAG_SEARCH_3_2_2, TAG_SEARCH_3_2_2_2, TAG_SEARCH_4_1, TAG_SEARCH_4_2, TAG_SEARCH_4_3, TAG_SEARCH_4_4, TAG_SEARCH_4_5, TAG_SEARCH_4_6, TAG_SEARCH_4_7, TAG_SEARCH_4_7_2, TAG_SEARCH_NC_1_1, TAG_SEARCH_NC_1_2, TAG_SEARCH_NC_1_3, TAG_SEARCH_NC_1_4, TAG_SEARCH_NC_1_4_2, TAG_SEARCH_NC_2_1, TAG_SEARCH_NC_2_1_2, TAG_SEARCH_NC_2_2, TAG_SEARCH_NC_2_2_2, TAG_SEARCH_NC_2_3, TAG_SEARCH_NC_2_3_2, TAG_SEARCH_NC_2_3_2_2, TAG_SEARCH_NC_2_4, TAG_SEARCH_NC_2_4_2, TAG_SEARCH_NC_2_4_2_2, TAG_SEARCH_NC_2_4_2_3, TAG_SEARCH_NC_2_4_3, TAG_SEARCH_NC_2_4_3_2, TAG_SEARCH_NC_2_4_4, TAG_SEARCH_NC_2_6, TAG_SEARCH_NC_2_7, TAG_SEARCH_NC_3_1, TAG_SEARCH_NC_3_1_2, TAG_SEARCH_NC_3_1_2_2, TAG_SEARCH_NC_3_2, TAG_SEARCH_NC_3_2_2, TAG_SEARCH_NC_3_2_2_2
Constants inherited from Parser
HTML, HTML5, XHTML, XHTML5, XML
Instance Attribute Summary
Attributes inherited from Core::Kernel
doc_type, document_hook, element_cache, element_hook
Instance Method Summary (collapse)
-
- (String) content_type
get content type (コンテントタイプを取得する).
- - (Object) escape(content)
- - (Object) escape_content(content, elm)
-
- (ParserImpl) initialize(*args)
constructor
initializer (イニシャライザ).
-
- (Object) parse(document)
set document in parser (ドキュメントをパーサにセットする).
-
- (Object) read(file_path, encoding)
read file , set in parser (ファイルを読み込み、パーサにセットする).
Methods inherited from Core::Kernel
#attr, #attr_map, #character_encoding, #character_encoding=, #content, #create_element_pattern, #cxtag, #document, #document=, #element, #execute, #find, #flush, #remove_element, #root_element, #shadow
Constructor Details
- (ParserImpl) initialize - (ParserImpl) initialize(ps)
initializer (イニシャライザ)
4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 |
# File 'lib/meteor.rb', line 4071 def initialize(*args) super() @doc_type = Parser::XHTML case args.length when ZERO initialize_0 when ONE initialize_1(args[0]) else raise ArgumentError end end |
Instance Method Details
- (String) content_type
get content type (コンテントタイプを取得する)
4141 4142 4143 |
# File 'lib/meteor.rb', line 4141 def content_type() @root.content_type end |
- (Object) escape(content)
4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 |
# File 'lib/meteor.rb', line 4323 def escape(content) #特殊文字の置換 #「&」->「&」 if content.include?(AND_1) then content.gsub!(@@pattern_and_1, AND_2) end #「<」->「<」 if content.include?(LT_1) then content.gsub!(@@pattern_lt_1, LT_2) end #「>」->「>」 if content.include?(GT_1) then content.gsub!(@@pattern_gt_1, GT_2) end #「"」->「"l」 if content.include?(DOUBLE_QUATATION) then content.gsub!(@@pattern_dq_1, QO_2) end #「'」->「'」 if content.include?(AP_1) then content.gsub!(@@pattern_ap_1, AP_2) end #「 」->「 」 if content.include?(SPACE) then content.gsub!(@@pattern_space_1, NBSP_2) end content end |
- (Object) escape_content(content, elm)
4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 |
# File 'lib/meteor.rb', line 4330 def escape_content(content, elm) content = escape(content) if elm.cx || !is_match(@@match_tag_2, elm.name) then #「¥r?¥n」->「<br>」 content.gsub!(@@pattern_br_1, BR_2) end content end |
- (Object) parse(document)
set document in parser (ドキュメントをパーサにセットする)
4109 4110 4111 4112 |
# File 'lib/meteor.rb', line 4109 def parse(document) @root.document = document analyze_ml() end |
- (Object) read(file_path, encoding)
read file , set in parser (ファイルを読み込み、パーサにセットする)
4119 4120 4121 4122 |
# File 'lib/meteor.rb', line 4119 def read(file_path, encoding) super(file_path, encoding) analyze_ml() end |