Solr documents are used to represent general data in a reasonably simple format made up of lists, integers, logicals, longs, doubles, dates, etc. each with an optional name. These correspond very naturally to R objects.

readSolrDoc(doc, ...)

Arguments

doc

the object containing the data. This can be the name of a file, a parsed XML document or an XML node.

...

additional parameters for the methods.

Value

An R object representing the data in the Solr document, typically a named vector or named list.

References

Lucene text search system.

Author

Duncan Temple Lang

Examples

f = system.file("exampleData", "solr.xml", package = "XML")
readSolrDoc(f)
#> $responseHeader
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 90
#> 
#> 
#> $index
#> $index$numDocs
#> [1] 17
#> 
#> $index$maxDoc
#> [1] 17
#> 
#> $index$numTerms
#> [1] 1044
#> 
#> $index$version
#> [1] 1.297337e+12
#> 
#> $index$optimized
#> [1] TRUE
#> 
#> $index$current
#> [1] TRUE
#> 
#> $index$hasDeletions
#> [1] FALSE
#> 
#> $index$directory
#> [1] "\n      org.apache.lucene.store.NIOFSDirectory:org.apache.lucene.store.NIOFSDirectory@[{PATH}/dev/trunk/solr/example/solr/data/index\n      lockFactory=org.apache.lucene.store.NativeFSLockFactory@349319d9\n    "
#> 
#> $index$lastModified
#> [1] "2011-02-10 11:29:03 UTC"
#> 
#> 
#> $fields
#> $fields$store_0_coordinate
#> $fields$store_0_coordinate$type
#> [1] "tdouble"
#> 
#> $fields$store_0_coordinate$schema
#> [1] "IT-----OF----"
#> 
#> $fields$store_0_coordinate$dynamicBase
#> [1] "*_coordinate"
#> 
#> $fields$store_0_coordinate$index
#> [1] "(unstored field)"
#> 
#> $fields$store_0_coordinate$docs
#> [1] 14
#> 
#> $fields$store_0_coordinate$distinct
#> [1] 64
#> 
#> $fields$store_0_coordinate$topTerms
#> $fields$store_0_coordinate$topTerms$`2.0`
#> [1] 14
#> 
#> $fields$store_0_coordinate$topTerms$`44.0`
#> [1] 6
#> 
#> $fields$store_0_coordinate$topTerms$`37.775177001953125`
#> [1] 4
#> 
#> $fields$store_0_coordinate$topTerms$`37.7734375`
#> [1] 4
#> 
#> $fields$store_0_coordinate$topTerms$`37.77519989013672`
#> [1] 4
#> 
#> $fields$store_0_coordinate$topTerms$`36.0`
#> [1] 4
#> 
#> $fields$store_0_coordinate$topTerms$`37.7752`
#> [1] 4
#> 
#> $fields$store_0_coordinate$topTerms$`37.77519999999822`
#> [1] 4
#> 
#> $fields$store_0_coordinate$topTerms$`37.77519999956712`
#> [1] 4
#> 
#> $fields$store_0_coordinate$topTerms$`40.71429991722107`
#> [1] 2
#> 
#> 
#> $fields$store_0_coordinate$histogram
#> $fields$store_0_coordinate$histogram$`1`
#> [1] 46
#> 
#> $fields$store_0_coordinate$histogram$`2`
#> [1] 9
#> 
#> $fields$store_0_coordinate$histogram$`4`
#> [1] 7
#> 
#> $fields$store_0_coordinate$histogram$`8`
#> [1] 1
#> 
#> $fields$store_0_coordinate$histogram$`16`
#> [1] 1
#> 
#> 
#> 
#> $fields$text
#> $fields$text$type
#> [1] "text"
#> 
#> $fields$text$schema
#> [1] "IT-M---------"
#> 
#> $fields$text$index
#> [1] "(unstored field)"
#> 
#> $fields$text$docs
#> [1] 17
#> 
#> $fields$text$distinct
#> [1] 389
#> 
#> $fields$text$topTerms
#> $fields$text$topTerms$electron
#> [1] 14
#> 
#> $fields$text$topTerms$`2`
#> [1] 8
#> 
#> $fields$text$topTerms$inc
#> [1] 8
#> 
#> $fields$text$topTerms$x
#> [1] 8
#> 
#> $fields$text$topTerms$`1`
#> [1] 8
#> 
#> $fields$text$topTerms$gb
#> [1] 7
#> 
#> $fields$text$topTerms$`3`
#> [1] 7
#> 
#> $fields$text$topTerms$`0`
#> [1] 6
#> 
#> $fields$text$topTerms$`20`
#> [1] 5
#> 
#> $fields$text$topTerms$mb
#> [1] 5
#> 
#> 
#> $fields$text$histogram
#> $fields$text$histogram$`1`
#> [1] 278
#> 
#> $fields$text$histogram$`2`
#> [1] 67
#> 
#> $fields$text$histogram$`4`
#> [1] 31
#> 
#> $fields$text$histogram$`8`
#> [1] 12
#> 
#> $fields$text$histogram$`16`
#> [1] 1
#> 
#> 
#> 
#> $fields$cat
#> $fields$cat$type
#> [1] "string"
#> 
#> $fields$cat$schema
#> [1] "I-SM---OF---l"
#> 
#> $fields$cat$index
#> [1] "I-S----O----"
#> 
#> $fields$cat$docs
#> [1] 16
#> 
#> $fields$cat$distinct
#> [1] 14
#> 
#> $fields$cat$topTerms
#> $fields$cat$topTerms$electronics
#> [1] 14
#> 
#> $fields$cat$topTerms$memory
#> [1] 3
#> 
#> $fields$cat$topTerms$`graphics card`
#> [1] 2
#> 
#> $fields$cat$topTerms$search
#> [1] 2
#> 
#> $fields$cat$topTerms$`hard drive`
#> [1] 2
#> 
#> $fields$cat$topTerms$connector
#> [1] 2
#> 
#> $fields$cat$topTerms$software
#> [1] 2
#> 
#> $fields$cat$topTerms$monitor
#> [1] 2
#> 
#> $fields$cat$topTerms$copier
#> [1] 1
#> 
#> $fields$cat$topTerms$music
#> [1] 1
#> 
#> 
#> $fields$cat$histogram
#> $fields$cat$histogram$`1`
#> [1] 6
#> 
#> $fields$cat$histogram$`2`
#> [1] 6
#> 
#> $fields$cat$histogram$`4`
#> [1] 1
#> 
#> $fields$cat$histogram$`8`
#> [1] 0
#> 
#> $fields$cat$histogram$`16`
#> [1] 1
#> 
#> 
#> 
#> 
#> $info
#> $info$key
#> $info$key$I
#> [1] "Indexed"
#> 
#> $info$key$T
#> [1] "Tokenized"
#> 
#> $info$key$S
#> [1] "Stored"
#> 
#> $info$key$M
#> [1] "Multivalued"
#> 
#> $info$key$V
#> [1] "TermVector Stored"
#> 
#> $info$key$o
#> [1] "Store Offset With TermVector"
#> 
#> $info$key$p
#> [1] "Store Position With TermVector"
#> 
#> $info$key$O
#> [1] "Omit Norms"
#> 
#> $info$key$L
#> [1] "Lazy"
#> 
#> $info$key$B
#> [1] "Binary"
#> 
#> $info$key$f
#> [1] "Sort Missing First"
#> 
#> $info$key$l
#> [1] "Sort Missing Last"
#> 
#> 
#> $info$NOTE
#> [1] "Document Frequency (df) is not updated when a document is marked for deletion. df values include\n      deleted documents.\n    "
#> 
#>