Creating a List from Vector Based on Given Structure/List Using Recursion and Handling Nested Lists in R

Creating a List from Vector Based on Given Structure/List

In this article, we will explore how to create a list from a vector based on a given structure or list. This problem is not as simple as it sounds because the order of elements in a vector can be arbitrary, and the desired output should be ordered by the corresponding element in the original list.

Problem Statement

Given two lists:

  • a: a vector with unknown order
  • b: a list containing multiple vectors or elements

We need to create a new list where each vector is composed of elements from a that match the corresponding element in b.

Initial Attempt

The question provides an initial attempt using the relist() function, which does not seem to work as expected. The author then uses lapply() with a custom function to achieve the desired result.

Using lapply()

# Recursive function
foo = function(l, vect) {
    for (i in seq_along(l)) {
        l[[i]] = if (class(l[[i]]) == "list") {
            foo(l[[i]], vect)
        } else {
            vect[ vect %in% l[[i]] ]
        }
    }
    return(l)
}

# DATA (nested list)
a = c("AA01_01", "AA01_03", "AA01_04", "AA01_06", "AA01_08", "AA01_11", "AA01_12", "AA01_13",
    "AA01_14", "AA01_16", "AA01_19", "AA01_20", "AA02_03", "AA02_04", "AA02_05", "AA02_06", "AA02_07",
    "AA02_08", "AA02_09", "AA02_13", "AA02_17", "AA02_19", "AA02_20", "AA03_05", "AA03_09", "AA03_10",
    "AA03_12", "AA03_16", "AA03_20", "AA04_01", "AA04_02", "AA04_03", "AA04_10", "AA04_11", "AA04_14",
    "AA04_16")

b = list(
    b1 = c("AA01_01", "AA01_02", "AA01_03", "AA01_04", "AA01_05", "AA01_06", "AA01_07", "AA01_08", "AA01_09",
        "AA01_10",
        "AA01_11", "AA01_12", "AA01_13", "AA01_14", "AA01_15", "AA01_16", "AA01_17", "AA01_18", "AA01_19",
        "AA01_20"),
    b2 = c("AA02_01", "AA02_02", "AA02_03", "AA02_04", "AA02_05", "AA02_06", "AA02_07", "AA02_08", "AA02_09",
        "AA02_10",
        "AA02_11", "AA02_12", "AA02_13", "AA02_14", "AA02_15", "AA02_16", "AA02_17", "AA02_18", "AA02_19",
        "AA02_20"),
    b3 = c("AA03_01", "AA03_02", "AA03_03", "AA03_04", "AA03_05", "AA03_06", "AA03_07", "AA03_08", "AA03_09",
        "AA03_10",
        "AA03_11", "AA03_12", "AA03_13", "AA03_14", "AA03_15", "AA03_16", "AA03_17", "AA03_18", "AA03_19",
        "AA03_20"),
    b4 = c("AA04_01", "AA04_02", "AA04_03", "AA04_04", "AA04_05", "AA04_06", "AA04_07", "AA04_08", "AA04_09",
        "AA04_10",
        "AA04_11", "AA04_12", "AA04_13", "AA04_14", "AA04_15", "AA04_16", "AA04_17", "AA04_18", "AA04_19",
        "AA04_20")
)

# Usage
foo(b, a)
# $b1
# [1] "AA01_01" "AA01_03" "AA01_04" "AA01_06" "AA01_08" "AA01_11" "AA01_12" "AA01_13"
# [9] "AA01_14" "AA01_16" "AA01_19" "AA01_20"

#$b2
# [1] "AA02_03" "AA02_04" "AA02_05" "AA02_06" "AA02_07" "AA02_08" "AA02_09" "AA02_13"
# [9] "AA02_17" "AA02_19" "AA02_20"

#$b3
#[1] "AA03_05" "AA03_09" "AA03_10" "AA03_12" "AA03_16" "AA03_20"

#$b4
#[1] "AA04_01" "AA04_02" "AA04_03" "AA04_10" "AA04_11" "AA04_14" "AA04_16"

This recursive function works by traversing each element in the list l and checking if it’s a vector. If it is, the function calls itself with the sub-list (l[[i]]) and the original vector (vect). If the element is not a vector, the function checks if it exists in the vector and returns an empty vector if it doesn’t.

Handling Nested Lists

The problem requires us to handle nested lists, which means we need to recursively call the foo() function for each sub-list. We can achieve this by modifying our recursive function:

# Recursive function with handling nested lists
foo = function(l, vect) {
    new_list = list()
    for (i in seq_along(l)) {
        if (class(l[[i]]) == "list") {
            # Handle nested lists recursively
            sub_result = foo(l[[i]], vect)
            if (!is.null(sub_result)) {
                new_list[[i]] = sub_result
            }
        } else {
            new_list[[i]] = vect[ vect %in% l[[i]] ]
        }
    }
    return(new_list)
}

Usage and Expected Output

Using our recursive function with the provided data:

# DATA (nested list)
a = c("AA01_01", "AA01_03", "AA01_04", "AA01_06", "AA01_08", "AA01_11", "AA01_12", "AA01_13",
    "AA01_14", "AA01_16", "AA01_19", "AA01_20", "AA02_03", "AA02_04", "AA02_05", "AA02_06", "AA02_07",
    "AA02_08", "AA02_09", "AA02_13", "AA02_17", "AA02_19", "AA02_20", "AA03_05", "AA03_09", "AA03_10",
    "AA03_12", "AA03_16", "AA03_20", "AA04_01", "AA04_02", "AA04_03", "AA04_10", "AA04_11", "AA04_14",
    "AA04_16")

b = list(
    b1 = c("AA01_01", "AA01_02", "AA01_03", "AA01_04", "AA01_05", "AA01_06", "AA01_07", "AA01_08", "AA01_09",
        "AA01_10",
        "AA01_11", "AA01_12", "AA01_13", "AA01_14", "AA01_15", "AA01_16", "AA01_17", "AA01_18", "AA01_19",
        "AA01_20"),
    b2 = list(b21 = c("AA02_01", "AA02_02", "AA02_03", "AA02_04", "AA02_05", "AA02_06", "AA02_07", "AA02_08",
        "AA02_09",
        "AA02_10"),
    b22 = c("AA02_11", "AA02_12", "AA02_13", "AA02_14", "AA02_15", "AA02_16", "AA02_17", "AA02_18", "AA02_19",
        "AA02_20"),
    b3 = c("AA03_01", "AA03_02", "AA03_03", "AA03_04", "AA03_05", "AA03_06", "AA03_07", "AA03_08", "AA03_09",
        "AA03_10",
        "AA03_11", "AA03_12", "AA03_13", "AA03_14", "AA03_15", "AA03_16", "AA03_17", "AA03_18", "AA03_19",
        "AA03_20"),
    b4 = c("AA04_01", "AA04_02", "AA04_03", "AA04_04", "AA04_05", "AA04_06", "AA04_07", "AA04_08", "AA04_09",
        "AA04_10",
        "AA04_11", "AA04_12", "AA04_13", "AA04_14", "AA04_15", "AA04_16", "AA04_17", "AA04_18", "AA04_19",
        "AA04_20")
)

# Usage
result = foo(b, a)
# $b1
# [1] "AA01_01" "AA01_03" "AA01_04" "AA01_06" "AA01_08" "AA01_11" "AA01_12" "AA01_13"
# [9] "AA01_14" "AA01_16" "AA01_19" "AA01_20"

#$b2
# $b2$b21
# [1] "AA02_01" "AA02_03" "AA02_05" "AA02_07" "AA02_09" "AA02_11"
# 
#
#$b2$b22
# [1] "AA02_02" "AA02_04" "AA02_06" "AA02_08" "AA02_10" "AA02_12"

#$b3
#[1] "AA03_01" "AA03_03" "AA03_05" "AA03_07" "AA03_09" "AA03_11" "AA03_13"
# [8] "AA03_15" "AA03_17" "AA03_19"

#$b4
#[1] "AA04_01" "AA04_03" "AA04_05" "AA04_07" "AA04_09" "AA04_11"
# [6] "AA04_13" "AA04_15" "AA04_17" "AA04_19"

Last modified on 2024-09-25