#!/bin/sh # Copyright for the Uniform Repository Service (c) 1995 - 2012, # by Gerald Banon. All rights reserved. # Version 2.1 # parseXML.tcl \ namespace eval parseXML { # > # ---------------------------------------------------------------------- # Lindex proc Lindex {list iList} { foreach i $iList { set list [lindex $list $i] } return $list } # Lindex - end # ---------------------------------------------------------------------- # Llength proc Llength {list iList} { return [llength [Lindex $list $iList]] } # Llength - end # ---------------------------------------------------------------------- # Lchange # example: # Lchange {a {} c} 1 {b d} # => a {b d} c proc Lchange {list iList arg} { set list2 $arg while {![string equal {} $iList]} { set i [lindex $iList end] set iList [lreplace $iList end end] set list2 [lreplace [Lindex $list $iList] $i $i $list2] } return $list2 } # Lchange - end # ---------------------------------------------------------------------- # Lappend # example: # Lappend {a b c} {} d # => a b c d # Lappend {a b c} 1 d # => a {b d} c # Lappend {a {} c} 1 {b d} # => a {{b d}} c # Lappend {a {b x} c} {1 0} d # => a {{b d} x} c proc Lappend {list iList arg} { # global xxx set list2 [Lindex $list $iList] lappend list2 $arg while {![string equal {} $iList]} { # puts $iList set i [lindex $iList end] set iList [lreplace $iList end end] # puts --[Lindex $list $iList]-- # puts $i # incr xxx # puts -$xxx- # if [string equal 2 $xxx] {error {stop}} # puts 1.$list2 set list2 [lreplace [Lindex $list $iList] $i $i $list2] # puts 2.$list2 } return $list2 } # Lappend - end # ---------------------------------------------------------------------- # CodeAttributeValue proc CodeAttributeValue {inputString} { set attributeCharacter 0 set outputString {} set tagFlag 0 foreach character [split $inputString {}] { # since " are allowed within tag value, tag value strings must identified if [string equal {<} $character] { set tagFlag 1 } elseif {[string equal {>} $character] && !$attributeCharacter} { # since > are allowed within attribute value, && !$attributeCharacter must be added in the if condition set tagFlag 0 } if $tagFlag { # inside tag name - outside tag value if $attributeCharacter { if [string equal {"} $character] {set attributeCharacter 0} } else { if [string equal {"} $character] {set attributeCharacter 1} } if $attributeCharacter { if [string equal {<} $character] { lappend outputString {!<!} } elseif [string equal {>} $character] { lappend outputString {!>!} } else { lappend outputString $character } } else { lappend outputString $character } } else { lappend outputString $character } } return [join $outputString {}] } # CodeAttributeValue - end # ---------------------------------------------------------------------- # DecodeAttributeValue proc DecodeAttributeValue {inputString} { regsub -all {!>!} $inputString {>} outputString ;# !>! --> > regsub -all {!<!} $outputString {<} outputString ;# !<! --> < return $outputString } # DecodeAttributeValue - end # ---------------------------------------------------------------------- # ProcessTag proc ProcessTag {parentName iList tag createSchema} { global parseXML::dataList global parseXML::tagNameList # global createSchema set tagName $tag set attributePart {} set attributeList {} regexp {([^ ]*) (.*)} $tag m tagName attributePart regsub -all {([^=]*)=("[^"]*")} $attributePart {\1 \2} attributeList ;# drop = if [string equal {} $parentName] { set dataList [list [list $tagName $attributeList {}]] } else { # puts [list $dataList $iList [list $parentName.$tagName $attributeList {}]] set dataList [Lappend $dataList $iList [list $parentName.$tagName $attributeList {}]] } # Create tagNameList if $createSchema { if [string equal {} $parentName] { set tagName2 $tagName } else { set tagName2 $parentName.$tagName } set i 1 ;# must be 1 to simplify ExtractData # if [regexp {PELLEGRINO} $attributeList] {puts $attributeList} foreach {attributeName attributeValue} $attributeList { # if {[lsearch $tagNameList [list $tagName2,$attributeName *]] == -1} { lappend tagNameList [list $tagName2,$attributeName [concat $iList [expr [Llength $dataList $iList] - 1] 1 $i]] # } incr i 2 ;# must be 2 to simplify ExtractData } } # Create tagNameList - end return $tagName } # ProcessTag - end # ---------------------------------------------------------------------- # XML2tcl proc XML2tcl {xml createSchema} { global parseXML::dataList ;# set by XML2tcl2 global parseXML::tagNameList ;# set by XML2tcl2 if createSchema == 1 global parseXML::schema ;# set by XML2tcl # set xxx [CodeAttributeValue $xml] # Store xxx {../../../../../iconet.com.br/banon/2005/08.13.18.19/doc/xxx.txt} XML2tcl2 {} {} [CodeAttributeValue $xml] $createSchema set dataList [DecodeAttributeValue $dataList] if $createSchema {CreateTagArrays} ;# needed by Select if $createSchema {set schema [join $tagNameList \n]} } # XML2tcl - end # ---------------------------------------------------------------------- # XML2tcl2 proc XML2tcl2 {parentName iList xml createSchema} { global parseXML::dataList ;# set by XML2tcl2 global parseXML::tagNameList ;# set by XML2tcl2 if createSchema == 1 # global createSchema # if ![info exists createSchema] {set createSchema 0} if {$createSchema && [string equal {} $parentName]} {set tagNameList {}} # if [expr [info level] == 5] {return} if [expr [info level] > 99] {return} # puts [list level: [info level]] set xml [string trim $xml "\n "] regsub {<\?[^?]*\?>} $xml {} rest set iList2 [concat $iList {0 2}] # set i 1 # while {$i < 99 && ![string equal {} $rest]} # ;# doesn't when they are more than 98 items at the same level - commented by GJFB in 2010-12-19 while {![string equal {} $rest]} { ## puts [list i: $i] # puts --$rest if [regexp {<([^>]*)>(.*)} $rest m tag rest2] { # puts >>$tag if [regsub {/$} $tag {} tag] { # puts OK1 # puts {} # puts [list $parentName $iList $tag $createSchema] ProcessTag $parentName $iList $tag $createSchema set rest $rest2 } else { # puts OK2 set tagName [ProcessTag $parentName $iList $tag $createSchema] # puts [list tagName: $tagName] # set length [string length $rest2] # puts [string range $rest2 [expr $length - 30] end] if [regexp -indices "\[^<\]*(.*)" $rest2 m rest] { # puts OK3 # regsub "$m$" $rest2 {} tagValue ;# doesn't work - tcl aborts set tagValue [string range $rest2 0 [expr [lindex $m 0] - 1]] set rest [string range $rest2 [lindex $rest 0] [lindex $rest 1]] if [string equal {} $parentName] { XML2tcl2 $tagName $iList2 $tagValue $createSchema } else { XML2tcl2 $parentName.$tagName $iList2 $tagValue $createSchema } } else { # puts OK4 set rest $rest2 ;# case of
tag (
doesn't exist) - discard
} } set rest [string trim $rest "\n "] } else { set dataList [Lchange $dataList $iList $rest] set rest {} # Create tagNameList if $createSchema { # if {[lsearch $tagNameList [list $parentName *]] == -1} # lappend tagNameList [list $parentName $iList] # # } # Create tagNameList - end } set length [llength $iList2] set index [expr $length - 2] set position [lindex $iList2 $index] incr position # puts 1.$iList2 set iList2 [lreplace $iList2 $index $index $position] # puts 2.$iList2 # incr i } } # XML2tcl2 - end # ---------------------------------------------------------------------- # CreateTagArrays proc CreateTagArrays {} { global parseXML::tagNameList ;# set in XML2tcl2 global parseXML::tagKeyArray if [info exists tagKeyArray] {unset tagKeyArray} foreach item $tagNameList { set tagName [lindex $item 0] set tagKey [lindex $item 1] set tagKeyArray($tagKey) $tagName } } # CreateTagArrays - end # ---------------------------------------------------------------------- # ExtractData # example: # table.tr.td.a,href {0 2 0 2 0 2 0 1 1} # table.tr.td.a {0 2 0 2 0 2 0 2} # ExtractData {{0 2 0 2 0 2 0 2}} # => yyy proc ExtractData {indexList} { global parseXML::dataList if [string equal {} $indexList] { set value {} } else { foreach index $indexList { set value [lindex $dataList $index] } } return [string trim $value] } # ExtractData - end # ---------------------------------------------------------------------- # CreateTag proc CreateTag {dataList} { global parseXML::xml # if [expr [info level] == 9] {return} foreach tag $dataList { set tagName [lindex $tag 0] regsub {^.*\.} $tagName {} tagName append xml "<$tagName" set attributeList [lindex $tag 1] foreach {attributeName attributeValue} $attributeList { append xml " $attributeName=\"$attributeValue\"" } set tagValue [lindex $tag 2] if [string equal {} $tagValue] { append xml " />" } else { append xml ">" foreach item $tagValue { if {[llength $item] == 1} { append xml "$item " } else { CreateTag [list $item] } } set xml [string trimright $xml] append xml "" } } } # CreateTag - end # ---------------------------------------------------------------------- # TCL2xml proc TCL2xml {listName} { global parseXML::xml upvar $listName dataList if [info exists xml] {unset xml} set xml {} CreateTag $dataList } # TCL2xml - end # ---------------------------------------------------------------------- # Select # Examples: # Select {} CURRICULO-VITAE.PRODUCAO-BIBLIOGRAFICA.ARTIGOS-PUBLICADOS.ARTIGO-PUBLICADO # Select {0 2 1 2 1 2 0} ,SEQUENCIA-PRODUCAO # Select {0 2 1 2 1 2 0} .DADOS-BASICOS-DO-ARTIGO,TITULO-DO-ARTIGO # Select {0 2 1 2 1 2 0} .AUTORES proc Select {key tag} { # key == {} # key == {0 2 1 2 1 2 0} # tag == CURRICULO-VITAE.PRODUCAO-BIBLIOGRAFICA.ARTIGOS-PUBLICADOS.ARTIGO-PUBLICADO # tag == AUTORES global parseXML::tagNameList global parseXML::tagKeyArray # puts [llength $tagNameList] # absoluteTagPath set keyLength [llength $key] if {$keyLength > 2} { set tagList {} # numberOfTags # 4 set numberOfTags [expr ($keyLength + 1) / 2] set i 1 # puts >>$key # puts [split $tagKeyArray([lindex [array names tagKeyArray "$key *"] 0]) {.,}] foreach item [split $tagKeyArray([lindex [array names tagKeyArray "$key *"] 0]) {.,}] { lappend tagList $item if {$i == $numberOfTags} {break} incr i } set absoluteTagPath "[join $tagList {.}]$tag" } else { set absoluteTagPath $tag } # puts $absoluteTagPath set keyList {} if [string equal {} $key] { foreach index [lsearch -all $tagNameList "$absoluteTagPath*"] { lappend keyList [lindex [lindex $tagNameList $index] 1] } } else { # puts "$absoluteTagPath* {$key *}" # puts --[lsearch -all $tagNameList "$absoluteTagPath* {$key *}"]-- foreach index [lsearch -all $tagNameList "$absoluteTagPath* {$key *}"] { lappend keyList [lindex [lindex $tagNameList $index] 1] } } # puts $keyList set absoluteTagPathList [split $absoluteTagPath {.,}] set n [expr 2 * ([llength $absoluteTagPathList] - 1)] set keyList2 {} foreach item $keyList { lappend keyList2 [lrange $item 0 $n] } return [lsort -unique $keyList2] } # Select - end # ---------------------------------------------------------------------- # < } ;# end namespace eval parseXML # Testing if 0 { source cgi/mirrorfind-.tcl ;# Load source utilities1.tcl ;# Store source parseXML.tcl } if 0 { # testing if 0 { set example { abc a xxx } } if 0 { set example {
ccc yyy eee
fff zzz ddd
} } if 0 { set example {
Estimativa do conteudo de vapor d'agua a partir da radiacao solar direta Tt's pages xxxx el Niño
Aragão, L. E. O. C.; Shimabukuro, Y. E.; Banon, G. J. F.; Espírito Santo, F. D. B.; Instituto Nacional de Pesquisas Espaciais; INPE
 
51 
} } if 0 { set example {
Author instructions for ISMM 2007 full paper
Banon, Gerald Jean Francis; Braga-Neto, Ulisses

ID: dpi.inpe.br/banon-pc2@80/2006/11.01.13.53
 
  
} } if 0 { Load {c:/tmp/capes/xx.html} example ;# didn't work } if 0 { Load {../../../../../iconet.com.br/banon/2005/08.13.18.19/doc/curriculo2.xml} example } if 0 { # Waldir Load {../../../../../iconet.com.br/banon/2005/08.13.18.19/doc/8338993621160610.xml} example } if 0 { # Chian Load {../../../../../iconet.com.br/banon/2005/08.13.18.19/doc/7234195960084079.xml} example } if 0 { # Setzer Load {../../../../../iconet.com.br/banon/2005/08.13.18.19/doc/1332033502251284.xml} example } if 0 { # Load {../../../../../iconet.com.br/banon/2005/08.13.18.19/doc/0504720587589323.xml} example } if 0 { # Camara Load {../../../../../iconet.com.br/banon/2005/08.13.18.19/doc/0333390666972274.xml} example } if 0 { # Camara Load {../../../../../iconet.com.br/banon/2005/08.13.18.19/doc/curriculoGilbertoCamara.xml} example } # set createSchema 1 # if $createSchema { # if [info exists schema] {unset schema} # set tagNameList {} # } # RUN if 0 { # >>> set errorInfo {} regsub -all {\\"} $example {"} example ;# BANCA "Sergio Pellegrino\" NOME-PARA-CITACAO-DO-PARTICIPANTE-DA-BANCA "PELLEGRINO, S." -> BANCA "Sergio Pellegrino" NOME-PARA-CITACAO-DO-PARTICIPANTE-DA-BANCA "PELLEGRINO, S." - otherwise one gets: list element in quotes followed by "PELLEGRINO," instead of space (with 0333390666972274.xml GCN in 2013-05-30) llength [parseXML::XML2tcl $example 1] set errorInfo set nome_rg_dono_cv [parseXML::ExtractData [parseXML::Select {} CURRICULO-VITAE.DADOS-GERAIS,NOME-COMPLETO]] } if 0 { set errorInfo {} catch {parseXML::XML2tcl2 {} {} [parseXML::CodeAttributeValue $example] 1} puts $errorInfo llength [set parseXML::dataList [parseXML::DecodeAttributeValue $parseXML::dataList]] CreateTagArrays ;# needed by Select } parseXML::XML2tcl $example 1 # puts $dataList # => {t1 {a1 "1 2"} {{t1.t11 {} {{t1.t11.t2 {a2 "2>4" b2 "3" } {}} {t1.t11.t2 {a2 " xx" b2 "aa" } {}} {t1.t11.t2 {} {}} {t1.t11.t3 {a3 "3 4" b3 "9"} {{t1.t11.t3.t5 {a5 "vv" } {}}}} {t1.t11.t3 {a3 "3 6" b3 "8"} {{t1.t11.t3.t5 {a5 "vvcc" } {}}}} {t1.t11.t4 {a4 "d"} {abc a xxx}}}}}} set schema [join $parseXML::tagNameList \n] # puts $schema # t1,a1 {0 1 1} # t1.t11.t2,a2 {0 2 0 2 0 1 1} # t1.t11.t2,b2 {0 2 0 2 0 1 3} # t1.t11.t3,a3 {0 2 0 2 3 1 1} # t1.t11.t3,b3 {0 2 0 2 3 1 3} # t1.t11.t3.t5,a5 {0 2 0 2 3 2 0 1 1} # t1.t11.t4,a4 {0 2 0 2 5 1 1} # t1.t11.t4 {0 2 0 2 5 2} if 0 { Store schema {../../../../../iconet.com.br/banon/2005/08.13.18.19/doc/schema8338993621160610.txt} } if 0 { Store schema {../../../../../iconet.com.br/banon/2005/08.13.18.19/doc/schema7234195960084079.txt} } if 0 { Store schema {../../../../../iconet.com.br/banon/2005/08.13.18.19/doc/schema1332033502251284.txt} } if 0 { parseXML::Select {} CURRICULO-VITAE.PRODUCAO-BIBLIOGRAFICA.ARTIGOS-PUBLICADOS.ARTIGO-PUBLICADO parseXML::Select {0 2 1 2 1 2 0} ,SEQUENCIA-PRODUCAO parseXML::Select {0 2 1 2 1 2 0} .DADOS-BASICOS-DO-ARTIGO,TITULO-DO-ARTIGO parseXML::Select {0 2 1 2 1 2 0} .AUTORES parseXML::Select {} CURRICULO-VITAE.DADOS-GERAIS,NOME-COMPLETO puts [parseXML::ExtractData [parseXML::Select {} CURRICULO-VITAE.DADOS-GERAIS,NOME-COMPLETO]] foreach item [parseXML::Select {} CURRICULO-VITAE.PRODUCAO-BIBLIOGRAFICA.ARTIGOS-PUBLICADOS.ARTIGO-PUBLICADO] { puts [parseXML::ExtractData [parseXML::Select $item ,SEQUENCIA-PRODUCAO]] puts [parseXML::ExtractData [parseXML::Select $item .DADOS-BASICOS-DO-ARTIGO,TITULO-DO-ARTIGO]] # puts [parseXML::Select $item .AUTORES] foreach item2 [parseXML::Select $item .AUTORES] { # puts $item2 # puts [parseXML::Select $item2 ,NOME-COMPLETO-DO-AUTOR] puts [parseXML::ExtractData [parseXML::Select $item2 ,NOME-COMPLETO-DO-AUTOR]] puts [parseXML::ExtractData [parseXML::Select $item2 ,ORDEM-DE-AUTORIA]] } puts [parseXML::ExtractData [parseXML::Select $item .PALAVRAS-CHAVE,PALAVRA-CHAVE-1]] break } # puts [parseXML::ExtractData {{0 2 0 2 4 2 0 2 9 2 14 2 0 2}}] ;# Setzer # foreach item [parseXML::Select {} CURRICULO-VITAE.PRODUCAO-BIBLIOGRAFICA.ARTIGOS-PUBLICADOS.ARTIGO-PUBLICADO,SEQUENCIA-PRODUCAO] { # puts [parseXML::ExtractData $item] # } } if 0 { Store schema {../../../../../iconet.com.br/banon/2005/08.13.18.19/doc/schema.txt} } return regsub -all "\n *" $example {} example2 # RUN TCL2xml dataList puts [llength $example2] puts $xml puts [llength $xml] if 0 { Store xml {C:/Gerald/URLib 2/col/iconet.com.br/banon/2005/08.13.18.19/doc/curriculo2.xml} } puts [string compare $example2 $xml] if 0 { set i 0 while {[string equal [string range $example2 0 $i] [string range $xml 0 $i]]} {incr i} puts $i } }