R Programming Manual for Visual Studio Code

Table of Contents

  1. Setting Up R in VSCode
  2. Essential VSCode Extensions for R
  3. R Basics and Syntax
  4. Data Types and Structures
  5. Working with Data
  6. Control Structures
  7. Functions
  8. Data Visualization
  9. Statistical Analysis
  10. Package Management
  11. File I/O Operations
  12. Debugging in VSCode
  13. Best Practices
  14. Common Tasks and Examples

Setting Up R in VSCode

Prerequisites

  1. Install R
    • Download from CRAN
    • Install R base system on your operating system
    • Verify installation by opening terminal and typing R --version
  2. Install Rtools (Windows Only)
    • Download from CRAN Rtools
    • Required for building packages from source

VSCode Configuration

  1. Install R Extension
    • Open VSCode Extensions panel (Ctrl+Shift+X)
    • Search for "R" by REditorSupport
    • Install the extension
  2. Configure R Path
    • Open Settings (Ctrl+,)
    • Search for "r.rterm"
    • Set the path to your R executable:
      • Windows: C:\Program Files\R\R-4.x.x\bin\x64\R.exe
      • Mac: /usr/local/bin/R
      • Linux: /usr/bin/R

Essential VSCode Extensions for R

Core Extensions

  1. R (REditorSupport.r) - Syntax highlighting, IntelliSense, and debugging
  2. R LSP Client (REditorSupport.r-lsp) - Language server protocol support
  3. R Debugger (RDebugger.r-debugger) - Advanced debugging capabilities

Recommended Extensions

  1. Rainbow CSV - Better CSV file viewing
  2. Bracket Pair Colorizer - Visual bracket matching
  3. GitLens - Enhanced Git integration
  4. Markdown All in One - For R Markdown support

R Basics and Syntax

Basic Operations

# Assignment operators
x <- 5          # Preferred assignment operator
y = 10          # Also works but less common
15 -> z         # Right assignment (rarely used)

# Arithmetic operators
a <- 10 + 5     # Addition: 15
b <- 10 - 5     # Subtraction: 5
c <- 10 * 5     # Multiplication: 50
d <- 10 / 5     # Division: 2
e <- 10^2       # Exponentiation: 100
f <- 10 %% 3    # Modulo: 1
g <- 10 %/% 3   # Integer division: 3

# Comparison operators
10 == 10        # Equal: TRUE
10 != 5         # Not equal: TRUE
10 > 5          # Greater than: TRUE
10 < 5          # Less than: FALSE
10 >= 10        # Greater or equal: TRUE
10 <= 5         # Less or equal: FALSE

# Logical operators
TRUE & FALSE    # AND: FALSE
TRUE | FALSE    # OR: TRUE
!TRUE          # NOT: FALSE

Comments and Documentation

# Single line comment

# Multi-line comments using multiple # symbols
# This is line 1 of a comment
# This is line 2 of a comment

# Function documentation (roxygen2 style)
#' Add two numbers together
#' 
#' @param x A number
#' @param y A number
#' @return The sum of x and y
#' @examples
#' add_numbers(2, 3)
add_numbers <- function(x, y) {
  return(x + y)
}

Data Types and Structures

Basic Data Types

# Numeric (double)
num <- 42.5
class(num)              # "numeric"

# Integer
int <- 42L              # L suffix for integer
class(int)              # "integer"

# Character (string)
char <- "Hello, World!"
class(char)             # "character"

# Logical (boolean)
logical <- TRUE
class(logical)          # "logical"

# Complex
complex <- 3 + 2i
class(complex)          # "complex"

# Check data type
is.numeric(num)         # TRUE
is.character(char)      # TRUE
is.logical(logical)     # TRUE

Data Structures

Vectors

# Creating vectors
numeric_vector <- c(1, 2, 3, 4, 5)
character_vector <- c("apple", "banana", "cherry")
logical_vector <- c(TRUE, FALSE, TRUE)

# Vector operations
length(numeric_vector)                    # 5
numeric_vector[1]                        # First element: 1
numeric_vector[c(1, 3, 5)]              # Elements 1, 3, 5
numeric_vector[numeric_vector > 3]       # Elements > 3

# Named vectors
named_vector <- c(a = 1, b = 2, c = 3)
named_vector["a"]                        # Access by name

Lists

# Creating lists
my_list <- list(
  numbers = c(1, 2, 3),
  text = "Hello",
  logical = TRUE,
  nested_list = list(a = 1, b = 2)
)

# Accessing list elements
my_list$numbers                          # By name with $
my_list[["numbers"]]                     # By name with [[]]
my_list[[1]]                            # By index
my_list["numbers"]                       # Returns list with one element

Data Frames

# Creating data frames
df <- data.frame(
  name = c("Alice", "Bob", "Charlie"),
  age = c(25, 30, 35),
  city = c("New York", "London", "Tokyo"),
  stringsAsFactors = FALSE
)

# Accessing data frame elements
df$name                                  # Column by name
df[["name"]]                            # Column by name (alternative)
df[1, ]                                 # First row
df[, 1]                                 # First column
df[1:2, c("name", "age")]               # Subset rows and columns

# Data frame information
nrow(df)                                # Number of rows
ncol(df)                                # Number of columns
dim(df)                                 # Dimensions
names(df)                               # Column names
str(df)                                 # Structure
summary(df)                             # Summary statistics

Matrices

# Creating matrices
matrix1 <- matrix(1:12, nrow = 3, ncol = 4)
matrix2 <- matrix(1:12, nrow = 3, ncol = 4, byrow = TRUE)

# Matrix operations
dim(matrix1)                            # Dimensions
matrix1[2, 3]                           # Element at row 2, column 3
matrix1[, 1]                            # First column
matrix1[1, ]                            # First row

# Matrix arithmetic
matrix1 + matrix2                       # Element-wise addition
matrix1 * matrix2                       # Element-wise multiplication
matrix1 %*% t(matrix2)                  # Matrix multiplication

Working with Data

Data Import and Export

# Reading CSV files
data <- read.csv("file.csv")
data <- read.csv("file.csv", header = TRUE, sep = ",")

# Reading other formats
data <- read.table("file.txt", header = TRUE, sep = "\t")
data <- read.delim("file.txt")

# Writing data
write.csv(data, "output.csv", row.names = FALSE)
write.table(data, "output.txt", sep = "\t", row.names = FALSE)

# Reading from URLs
url_data <- read.csv("https://example.com/data.csv")

Data Manipulation

# Viewing data
head(df)                                # First 6 rows
tail(df)                                # Last 6 rows
View(df)                                # Open in viewer (VSCode)

# Filtering data
subset(df, age > 25)                    # Rows where age > 25
df[df$age > 25, ]                      # Alternative syntax

# Sorting data
df[order(df$age), ]                     # Sort by age (ascending)
df[order(-df$age), ]                    # Sort by age (descending)

# Adding columns
df$salary <- c(50000, 60000, 70000)
df$bonus <- df$salary * 0.1

# Removing columns
df$bonus <- NULL                        # Remove bonus column
df <- df[, !names(df) %in% c("bonus")]  # Alternative method

Missing Data

# Creating data with missing values
data_with_na <- c(1, 2, NA, 4, 5)

# Checking for missing values
is.na(data_with_na)                     # Logical vector
any(is.na(data_with_na))               # TRUE if any NA
sum(is.na(data_with_na))               # Count of NA values

# Handling missing values
na.omit(data_with_na)                   # Remove NA values
data_with_na[!is.na(data_with_na)]     # Remove NA values (alternative)
mean(data_with_na, na.rm = TRUE)       # Calculate mean ignoring NA

Control Structures

Conditional Statements

# if-else statements
x <- 10

if (x > 5) {
  print("x is greater than 5")
} else if (x == 5) {
  print("x equals 5")
} else {
  print("x is less than 5")
}

# Vectorized conditional (ifelse)
numbers <- c(1, 5, 10, 15, 20)
result <- ifelse(numbers > 10, "High", "Low")
print(result)

# switch statement
grade <- "A"
description <- switch(grade,
  A = "Excellent",
  B = "Good",
  C = "Average",
  D = "Below Average",
  F = "Fail",
  "Unknown Grade"
)

Loops

# for loop
for (i in 1:5) {
  print(paste("Iteration:", i))
}

# for loop with vector
fruits <- c("apple", "banana", "cherry")
for (fruit in fruits) {
  print(paste("I like", fruit))
}

# while loop
counter <- 1
while (counter <= 5) {
  print(paste("Counter:", counter))
  counter <- counter + 1
}

# repeat loop with break
counter <- 1
repeat {
  print(paste("Counter:", counter))
  counter <- counter + 1
  if (counter > 5) {
    break
  }
}

# next statement (skip iteration)
for (i in 1:10) {
  if (i %% 2 == 0) {
    next  # Skip even numbers
  }
  print(i)
}

Functions

Creating Functions

# Basic function
greet <- function(name) {
  return(paste("Hello,", name))
}

# Function with multiple parameters
calculate_bmi <- function(weight, height) {
  bmi <- weight / (height^2)
  return(bmi)
}

# Function with default parameters
greet_with_title <- function(name, title = "Mr.") {
  return(paste("Hello,", title, name))
}

# Function with multiple return values
calculate_stats <- function(numbers) {
  result <- list(
    mean = mean(numbers),
    median = median(numbers),
    sd = sd(numbers),
    min = min(numbers),
    max = max(numbers)
  )
  return(result)
}

# Using functions
greet("Alice")
calculate_bmi(70, 1.75)
greet_with_title("Smith", "Dr.")
stats <- calculate_stats(c(1, 2, 3, 4, 5))

Built-in Functions

# Mathematical functions
abs(-5)                                 # Absolute value: 5
sqrt(16)                               # Square root: 4
log(10)                                # Natural logarithm
log10(100)                             # Base-10 logarithm: 2
exp(1)                                 # Exponential: e
round(3.14159, 2)                      # Round to 2 decimals: 3.14
ceiling(3.2)                           # Round up: 4
floor(3.8)                             # Round down: 3

# String functions
nchar("Hello")                         # String length: 5
toupper("hello")                       # Uppercase: "HELLO"
tolower("HELLO")                       # Lowercase: "hello"
substr("Hello World", 1, 5)           # Substring: "Hello"
paste("Hello", "World")               # Concatenate: "Hello World"
paste0("Hello", "World")              # Concatenate without space: "HelloWorld"

# Statistical functions
mean(c(1, 2, 3, 4, 5))                # Mean: 3
median(c(1, 2, 3, 4, 5))              # Median: 3
sd(c(1, 2, 3, 4, 5))                  # Standard deviation
var(c(1, 2, 3, 4, 5))                 # Variance
min(c(1, 2, 3, 4, 5))                 # Minimum: 1
max(c(1, 2, 3, 4, 5))                 # Maximum: 5
range(c(1, 2, 3, 4, 5))               # Range: c(1, 5)

Data Visualization

Base R Graphics

# Basic plots
x <- 1:10
y <- x^2

# Scatter plot
plot(x, y, main = "Scatter Plot", xlab = "X values"