Convert a data.frame to a data.tree structure

# S3 method for class 'data.frame'
as.Node(
  x,
  ...,
  mode = c("table", "network"),
  pathName = "pathString",
  pathDelimiter = "/",
  colLevels = NULL,
  na.rm = TRUE
)

FromDataFrameTable(
  table,
  pathName = "pathString",
  pathDelimiter = "/",
  colLevels = NULL,
  na.rm = TRUE,
  check = c("check", "no-warn", "no-check"),
  suffix = "_attr"
)

FromDataFrameNetwork(network, check = c("check", "no-warn", "no-check"))

Arguments

x

The data.frame in the required format.

...

Any other argument implementations of this might need

mode

Either "table" (if x is a data.frame in tree or table format) or "network"

pathName

The name of the column in x containing the path of the row

pathDelimiter

The delimiter used to separate nodes in pathName

colLevels

Nested list of column names, determining on what node levels the attributes are written to.

na.rm

If TRUE, then NA's are treated as NULL and values will not be set on nodes

table

a data.frame in table or tree format, i.e. having a row for each leaf (and optionally for additional nodes). There should be a column called pathName, separated by pathDelimiter, describing the path of each row.

check

Either

  • "check": if the name conformance should be checked and warnings should be printed in case of non-conformance (the default)

  • "no-warn": if the name conformance should be checked, but no warnings should be printed in case of non-conformance (if you expect non-conformance)

  • "no-check" or FALSE: if the name conformance should not be checked; use this if performance is critical. However, in case of non-conformance, expect cryptic follow-up errors

suffix

optional suffix added to the column name in case the column name is the same as a path element. Defaults to '_attr'

network

A data.frame in network format, i.e. it must adhere to the following requirements:

  • It must contain as many rows as there are nodes (excluding the root, there is no row for the root)

  • Its first and second columns contain the network relationships. This can be either climbing (from parent to children) or descending (from child to parent)

  • Its subsequent columns contain the attributes to be set on the nodes

  • It must contain a single root

  • There are no cycles in the network

Value

The root Node of the data.tree structure

Examples

data(acme)

#Tree
x <- ToDataFrameTree(acme, "pathString", "p", "cost")
x
#>                           levelName
#> 1  Acme Inc.                       
#> 2   ¦--Accounting                  
#> 3   ¦   ¦--New Software            
#> 4   ¦   °--New Accounting Standards
#> 5   ¦--Research                    
#> 6   ¦   ¦--New Product Line        
#> 7   ¦   °--New Labs                
#> 8   °--IT                          
#> 9       ¦--Outsource               
#> 10      ¦--Go agile                
#> 11      °--Switch to R             
#>                                       pathString    p    cost
#> 1                                      Acme Inc.   NA      NA
#> 2                           Acme Inc./Accounting   NA      NA
#> 3              Acme Inc./Accounting/New Software 0.50 1000000
#> 4  Acme Inc./Accounting/New Accounting Standards 0.75  500000
#> 5                             Acme Inc./Research   NA      NA
#> 6            Acme Inc./Research/New Product Line 0.25 2000000
#> 7                    Acme Inc./Research/New Labs 0.90  750000
#> 8                                   Acme Inc./IT   NA      NA
#> 9                         Acme Inc./IT/Outsource 0.20  400000
#> 10                         Acme Inc./IT/Go agile 0.05  250000
#> 11                      Acme Inc./IT/Switch to R 1.00   50000
xN <- as.Node(x)
print(xN, "p", "cost")
#>                           levelName    p    cost
#> 1  Acme Inc.                          NA      NA
#> 2   ¦--Accounting                     NA      NA
#> 3   ¦   ¦--New Software             0.50 1000000
#> 4   ¦   °--New Accounting Standards 0.75  500000
#> 5   ¦--Research                       NA      NA
#> 6   ¦   ¦--New Product Line         0.25 2000000
#> 7   ¦   °--New Labs                 0.90  750000
#> 8   °--IT                             NA      NA
#> 9       ¦--Outsource                0.20  400000
#> 10      ¦--Go agile                 0.05  250000
#> 11      °--Switch to R              1.00   50000

#Table
x <- ToDataFrameTable(acme, "pathString", "p", "cost")
x
#>                                      pathString    p    cost
#> 1             Acme Inc./Accounting/New Software 0.50 1000000
#> 2 Acme Inc./Accounting/New Accounting Standards 0.75  500000
#> 3           Acme Inc./Research/New Product Line 0.25 2000000
#> 4                   Acme Inc./Research/New Labs 0.90  750000
#> 5                        Acme Inc./IT/Outsource 0.20  400000
#> 6                         Acme Inc./IT/Go agile 0.05  250000
#> 7                      Acme Inc./IT/Switch to R 1.00   50000
xN <- FromDataFrameTable(x)
print(xN, "p", "cost")
#>                           levelName    p    cost
#> 1  Acme Inc.                          NA      NA
#> 2   ¦--Accounting                     NA      NA
#> 3   ¦   ¦--New Software             0.50 1000000
#> 4   ¦   °--New Accounting Standards 0.75  500000
#> 5   ¦--Research                       NA      NA
#> 6   ¦   ¦--New Product Line         0.25 2000000
#> 7   ¦   °--New Labs                 0.90  750000
#> 8   °--IT                             NA      NA
#> 9       ¦--Outsource                0.20  400000
#> 10      ¦--Go agile                 0.05  250000
#> 11      °--Switch to R              1.00   50000

#More complex Table structure, using colLevels
acme$Set(floor = c(1, 2, 3),  filterFun = function(x) x$level == 2)
x <- ToDataFrameTable(acme, "pathString", "floor", "p", "cost")
x
#>                                      pathString floor    p    cost
#> 1             Acme Inc./Accounting/New Software     1 0.50 1000000
#> 2 Acme Inc./Accounting/New Accounting Standards     1 0.75  500000
#> 3           Acme Inc./Research/New Product Line     2 0.25 2000000
#> 4                   Acme Inc./Research/New Labs     2 0.90  750000
#> 5                        Acme Inc./IT/Outsource     3 0.20  400000
#> 6                         Acme Inc./IT/Go agile     3 0.05  250000
#> 7                      Acme Inc./IT/Switch to R     3 1.00   50000
xN <- FromDataFrameTable(x, colLevels = list(NULL, "floor", c("p", "cost")), na.rm = TRUE)
print(xN, "floor", "p", "cost")
#>                           levelName floor    p    cost
#> 1  Acme Inc.                           NA   NA      NA
#> 2   ¦--Accounting                       1   NA      NA
#> 3   ¦   ¦--New Software                NA 0.50 1000000
#> 4   ¦   °--New Accounting Standards    NA 0.75  500000
#> 5   ¦--Research                         2   NA      NA
#> 6   ¦   ¦--New Product Line            NA 0.25 2000000
#> 7   ¦   °--New Labs                    NA 0.90  750000
#> 8   °--IT                               3   NA      NA
#> 9       ¦--Outsource                   NA 0.20  400000
#> 10      ¦--Go agile                    NA 0.05  250000
#> 11      °--Switch to R                 NA 1.00   50000

#Network
x <- ToDataFrameNetwork(acme, "p", "cost", direction = "climb")
x
#>          from                       to    p    cost
#> 1   Acme Inc.               Accounting   NA      NA
#> 2   Acme Inc.                 Research   NA      NA
#> 3   Acme Inc.                       IT   NA      NA
#> 4  Accounting             New Software 0.50 1000000
#> 5  Accounting New Accounting Standards 0.75  500000
#> 6    Research         New Product Line 0.25 2000000
#> 7    Research                 New Labs 0.90  750000
#> 8          IT                Outsource 0.20  400000
#> 9          IT                 Go agile 0.05  250000
#> 10         IT              Switch to R 1.00   50000
xN <- FromDataFrameNetwork(x)
print(xN, "p", "cost")
#>                           levelName    p    cost
#> 1  Acme Inc.                          NA      NA
#> 2   ¦--Accounting                     NA      NA
#> 3   ¦   ¦--New Software             0.50 1000000
#> 4   ¦   °--New Accounting Standards 0.75  500000
#> 5   ¦--Research                       NA      NA
#> 6   ¦   ¦--New Product Line         0.25 2000000
#> 7   ¦   °--New Labs                 0.90  750000
#> 8   °--IT                             NA      NA
#> 9       ¦--Outsource                0.20  400000
#> 10      ¦--Go agile                 0.05  250000
#> 11      °--Switch to R              1.00   50000