sevki
c9e1dba412
setup CI/CD for kernel development - added CodeQL for code scanning - every pr is built as an image and is available for 30days on https://oklinux.dev - tagged and released on github for now Signed-off-by: sevki <s@sevki.io>
558 lines
14 KiB
C
558 lines
14 KiB
C
/*
|
|
* Chromium OS alt-syscall tables
|
|
*
|
|
* Copyright (C) 2015 Google, Inc.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*/
|
|
|
|
#include <linux/alt-syscall.h>
|
|
#include <linux/compat.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/init.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/kcmp.h>
|
|
#include <linux/module.h>
|
|
#include <linux/prctl.h>
|
|
#include <linux/sched/types.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/socket.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/timex.h>
|
|
#include <uapi/linux/sched/types.h>
|
|
|
|
#include <asm/unistd.h>
|
|
|
|
#include "alt-syscall.h"
|
|
#include "android_whitelists.h"
|
|
#include "complete_whitelists.h"
|
|
#include "read_write_test_whitelists.h"
|
|
#include "third_party_whitelists.h"
|
|
|
|
/* Intercept and log blocked syscalls. */
|
|
static asmlinkage long block_syscall(struct pt_regs *regs)
|
|
{
|
|
struct task_struct *task = current;
|
|
|
|
pr_warn_ratelimited("[%d] %s: blocked syscall %d\n", task_pid_nr(task),
|
|
task->comm, syscall_get_nr(task, regs));
|
|
|
|
return -ENOSYS;
|
|
}
|
|
|
|
/*
|
|
* In permissive mode, warn that the syscall was blocked, but still allow
|
|
* it to go through. Note that since we don't have an easy way to map from
|
|
* syscall to number of arguments, we pass the maximum (6).
|
|
*/
|
|
static asmlinkage long warn_syscall(struct pt_regs *regs)
|
|
{
|
|
struct task_struct *task = current;
|
|
int nr = syscall_get_nr(task, regs);
|
|
sys_call_ptr_t fn = (sys_call_ptr_t)default_table.table[nr];
|
|
|
|
pr_warn_ratelimited("[%d] %s: syscall %d not whitelisted\n",
|
|
task_pid_nr(task), task->comm, nr);
|
|
|
|
return fn(regs);
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
static asmlinkage long warn_compat_syscall(struct pt_regs *regs)
|
|
{
|
|
struct task_struct *task = current;
|
|
int nr = syscall_get_nr(task, regs);
|
|
sys_call_ptr_t fn = (sys_call_ptr_t)default_table.compat_table[nr];
|
|
|
|
pr_warn_ratelimited("[%d] %s: compat syscall %d not whitelisted\n",
|
|
task_pid_nr(task), task->comm, nr);
|
|
|
|
return fn(regs);
|
|
}
|
|
#endif /* CONFIG_COMPAT */
|
|
|
|
static asmlinkage long alt_sys_prctl(struct pt_regs *regs)
|
|
{
|
|
struct task_struct *task = current;
|
|
unsigned long args[6];
|
|
|
|
syscall_get_arguments(task, regs, args);
|
|
|
|
if (args[0] == PR_ALT_SYSCALL &&
|
|
args[1] == PR_ALT_SYSCALL_SET_SYSCALL_TABLE)
|
|
return -EPERM;
|
|
|
|
return ksys_prctl(args[0], args[1], args[2], args[3], args[4]);
|
|
}
|
|
|
|
/* Thread priority used by Android. */
|
|
#define ANDROID_PRIORITY_FOREGROUND -2
|
|
#define ANDROID_PRIORITY_DISPLAY -4
|
|
#define ANDROID_PRIORITY_URGENT_DISPLAY -8
|
|
#define ANDROID_PRIORITY_AUDIO -16
|
|
#define ANDROID_PRIORITY_URGENT_AUDIO -19
|
|
#define ANDROID_PRIORITY_HIGHEST -20
|
|
|
|
/* Reduced priority when running inside container. */
|
|
#define CONTAINER_PRIORITY_FOREGROUND -1
|
|
#define CONTAINER_PRIORITY_DISPLAY -2
|
|
#define CONTAINER_PRIORITY_URGENT_DISPLAY -4
|
|
#define CONTAINER_PRIORITY_AUDIO -8
|
|
#define CONTAINER_PRIORITY_URGENT_AUDIO -9
|
|
#define CONTAINER_PRIORITY_HIGHEST -10
|
|
|
|
/*
|
|
* TODO(mortonm): Move the implementation of these Android-specific
|
|
* alt-syscalls (starting with android_*) to their own .c file.
|
|
*/
|
|
static asmlinkage long android_getpriority(struct pt_regs *regs)
|
|
{
|
|
struct task_struct *task = current;
|
|
long prio, nice;
|
|
unsigned long args[6];
|
|
int which, who;
|
|
|
|
syscall_get_arguments(task, regs, args);
|
|
which = args[0];
|
|
who = args[1];
|
|
|
|
prio = ksys_getpriority(which, who);
|
|
if (prio <= 20)
|
|
return prio;
|
|
|
|
nice = -(prio - 20);
|
|
switch (nice) {
|
|
case CONTAINER_PRIORITY_FOREGROUND:
|
|
nice = ANDROID_PRIORITY_FOREGROUND;
|
|
break;
|
|
case CONTAINER_PRIORITY_DISPLAY:
|
|
nice = ANDROID_PRIORITY_DISPLAY;
|
|
break;
|
|
case CONTAINER_PRIORITY_URGENT_DISPLAY:
|
|
nice = ANDROID_PRIORITY_URGENT_DISPLAY;
|
|
break;
|
|
case CONTAINER_PRIORITY_AUDIO:
|
|
nice = ANDROID_PRIORITY_AUDIO;
|
|
break;
|
|
case CONTAINER_PRIORITY_URGENT_AUDIO:
|
|
nice = ANDROID_PRIORITY_URGENT_AUDIO;
|
|
break;
|
|
case CONTAINER_PRIORITY_HIGHEST:
|
|
nice = ANDROID_PRIORITY_HIGHEST;
|
|
break;
|
|
}
|
|
|
|
return -nice + 20;
|
|
}
|
|
|
|
static asmlinkage long android_keyctl(struct pt_regs *regs)
|
|
{
|
|
return -EACCES;
|
|
}
|
|
|
|
#ifdef CONFIG_KCMP
|
|
static asmlinkage long android_kcmp(struct pt_regs *regs)
|
|
{
|
|
struct task_struct *task = current;
|
|
unsigned long args[6];
|
|
pid_t pid1, pid2;
|
|
int type;
|
|
unsigned long idx1, idx2;
|
|
|
|
syscall_get_arguments(task, regs, args);
|
|
type = args[2];
|
|
if (type == KCMP_SYSVSEM)
|
|
return -ENOSYS;
|
|
|
|
pid1 = args[0];
|
|
pid2 = args[1];
|
|
idx1 = args[3];
|
|
idx2 = args[4];
|
|
return ksys_kcmp(pid1, pid2, type, idx1, idx2);
|
|
}
|
|
#endif
|
|
|
|
static asmlinkage long android_setpriority(struct pt_regs *regs)
|
|
{
|
|
struct task_struct *task = current;
|
|
unsigned long args[6];
|
|
int which, who, niceval;
|
|
|
|
syscall_get_arguments(task, regs, args);
|
|
which = args[0];
|
|
who = args[1];
|
|
niceval = args[2];
|
|
|
|
if (niceval < 0) {
|
|
if (niceval < -20)
|
|
niceval = -20;
|
|
niceval = niceval / 2;
|
|
}
|
|
return ksys_setpriority(which, who, niceval);
|
|
}
|
|
|
|
static asmlinkage long
|
|
do_android_sched_setscheduler(pid_t pid, int policy,
|
|
struct sched_param __user *param)
|
|
{
|
|
struct sched_param lparam;
|
|
struct task_struct *p;
|
|
long retval;
|
|
|
|
if (!param || pid < 0)
|
|
return -EINVAL;
|
|
if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
|
|
return -EFAULT;
|
|
|
|
rcu_read_lock();
|
|
retval = -ESRCH;
|
|
p = pid ? find_task_by_vpid(pid) : current;
|
|
if (likely(p))
|
|
get_task_struct(p);
|
|
rcu_read_unlock();
|
|
|
|
if (likely(p)) {
|
|
const struct cred *cred = current_cred();
|
|
kuid_t android_root_uid, android_system_uid;
|
|
|
|
/*
|
|
* Allow root(0) and system(1000) processes to set RT scheduler.
|
|
*
|
|
* The system_server process run under system provides
|
|
* SchedulingPolicyService which is used by audioflinger and
|
|
* other services to boost their threads, so allow it to set RT
|
|
* scheduler for other threads.
|
|
*/
|
|
android_root_uid = make_kuid(cred->user_ns, 0);
|
|
android_system_uid = make_kuid(cred->user_ns, 1000);
|
|
if ((uid_eq(cred->euid, android_root_uid) ||
|
|
uid_eq(cred->euid, android_system_uid)) &&
|
|
ns_capable(cred->user_ns, CAP_SYS_NICE))
|
|
retval = sched_setscheduler_nocheck(p, policy, &lparam);
|
|
else
|
|
retval = sched_setscheduler(p, policy, &lparam);
|
|
put_task_struct(p);
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
static asmlinkage long
|
|
android_sched_setscheduler(struct pt_regs *regs)
|
|
{
|
|
struct task_struct *task = current;
|
|
unsigned long args[6];
|
|
pid_t pid;
|
|
int policy;
|
|
struct sched_param __user *param;
|
|
|
|
syscall_get_arguments(task, regs, args);
|
|
pid = args[0];
|
|
policy = args[1];
|
|
param = (struct sched_param __user *)args[2];
|
|
|
|
/* negative values for policy are not valid */
|
|
if (policy < 0)
|
|
return -EINVAL;
|
|
return do_android_sched_setscheduler(pid, policy, param);
|
|
}
|
|
|
|
/*
|
|
* sched_setparam() passes in -1 for its policy, to let the functions
|
|
* it calls know not to change it.
|
|
*/
|
|
#define SETPARAM_POLICY -1
|
|
|
|
static asmlinkage long android_sched_setparam(struct pt_regs *regs)
|
|
{
|
|
struct task_struct *task = current;
|
|
unsigned long args[6];
|
|
pid_t pid;
|
|
struct sched_param __user *param;
|
|
|
|
syscall_get_arguments(task, regs, args);
|
|
pid = args[0];
|
|
param = (struct sched_param __user *)args[1];
|
|
|
|
return do_android_sched_setscheduler(pid, SETPARAM_POLICY, param);
|
|
}
|
|
|
|
static asmlinkage long __maybe_unused android_socket(struct pt_regs *regs)
|
|
{
|
|
struct task_struct *task = current;
|
|
unsigned long args[6];
|
|
int domain, type, socket;
|
|
|
|
syscall_get_arguments(task, regs, args);
|
|
domain = args[0];
|
|
type = args[1];
|
|
socket = args[2];
|
|
|
|
if (domain == AF_VSOCK)
|
|
return -EACCES;
|
|
return __sys_socket(domain, type, socket);
|
|
}
|
|
|
|
static asmlinkage long android_perf_event_open(struct pt_regs *regs)
|
|
{
|
|
struct task_struct *task = current;
|
|
unsigned long args[6];
|
|
struct perf_event_attr __user *attr_uptr;
|
|
pid_t pid;
|
|
int cpu, group_fd;
|
|
unsigned long flags;
|
|
|
|
if (!allow_devmode_syscalls)
|
|
return -EACCES;
|
|
|
|
syscall_get_arguments(task, regs, args);
|
|
attr_uptr = (struct perf_event_attr __user *)args[0];
|
|
pid = args[1];
|
|
cpu = args[2];
|
|
group_fd = args[3];
|
|
flags = args[4];
|
|
|
|
return ksys_perf_event_open(attr_uptr, pid, cpu, group_fd, flags);
|
|
}
|
|
|
|
static asmlinkage long android_adjtimex(struct pt_regs *regs)
|
|
{
|
|
struct task_struct *task = current;
|
|
struct __kernel_timex kbuf;
|
|
struct __kernel_timex __user *buf;
|
|
unsigned long args[6];
|
|
|
|
syscall_get_arguments(task, regs, args);
|
|
buf = (struct __kernel_timex __user *)args[0];
|
|
|
|
/* adjtimex() is allowed only for read. */
|
|
if (copy_from_user(&kbuf, buf, sizeof(struct __kernel_timex)))
|
|
return -EFAULT;
|
|
if (kbuf.modes != 0)
|
|
return -EPERM;
|
|
return ksys_adjtimex(buf);
|
|
}
|
|
|
|
static asmlinkage long android_clock_adjtime(struct pt_regs *regs)
|
|
{
|
|
struct task_struct *task = current;
|
|
struct __kernel_timex kbuf;
|
|
unsigned long args[6];
|
|
clockid_t which_clock;
|
|
struct __kernel_timex __user *buf;
|
|
|
|
syscall_get_arguments(task, regs, args);
|
|
which_clock = args[0];
|
|
buf = (struct __kernel_timex __user *)args[1];
|
|
|
|
/* clock_adjtime() is allowed only for read. */
|
|
if (copy_from_user(&kbuf, buf, sizeof(struct __kernel_timex)))
|
|
return -EFAULT;
|
|
if (kbuf.modes != 0)
|
|
return -EPERM;
|
|
return ksys_clock_adjtime(which_clock, buf);
|
|
}
|
|
|
|
static asmlinkage long android_getcpu(struct pt_regs *regs)
|
|
{
|
|
struct task_struct *task = current;
|
|
unsigned long args[6];
|
|
unsigned __user *cpu;
|
|
unsigned __user *node;
|
|
struct getcpu_cache __user *tcache;
|
|
|
|
syscall_get_arguments(task, regs, args);
|
|
cpu = (unsigned __user *)args[0];
|
|
node = (unsigned __user *)args[1];
|
|
tcache = (struct getcpu_cache __user *)args[2];
|
|
|
|
if (node || tcache)
|
|
return -EPERM;
|
|
return ksys_getcpu(cpu, node, tcache);
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
static asmlinkage long android_compat_adjtimex(struct pt_regs *regs)
|
|
{
|
|
struct task_struct *task = current;
|
|
struct old_timex32 kbuf;
|
|
struct old_timex32 __user *buf;
|
|
unsigned long args[6];
|
|
|
|
syscall_get_arguments(task, regs, args);
|
|
buf = (struct old_timex32 __user *)args[0];
|
|
|
|
/* adjtimex() is allowed only for read. */
|
|
if (copy_from_user(&kbuf, buf, sizeof(struct old_timex32)))
|
|
return -EFAULT;
|
|
if (kbuf.modes != 0)
|
|
return -EPERM;
|
|
return ksys_adjtimex_time32(buf);
|
|
}
|
|
|
|
static asmlinkage long
|
|
android_compat_clock_adjtime(struct pt_regs *regs)
|
|
{
|
|
struct task_struct *task = current;
|
|
struct old_timex32 kbuf;
|
|
unsigned long args[6];
|
|
clockid_t which_clock;
|
|
struct old_timex32 __user *buf;
|
|
|
|
syscall_get_arguments(task, regs, args);
|
|
which_clock = args[0];
|
|
buf = (struct old_timex32 __user *)args[1];
|
|
|
|
/* clock_adjtime() is allowed only for read. */
|
|
if (copy_from_user(&kbuf, buf, sizeof(struct old_timex32)))
|
|
return -EFAULT;
|
|
if (kbuf.modes != 0)
|
|
return -EPERM;
|
|
return ksys_clock_adjtime32(which_clock, buf);
|
|
}
|
|
#endif /* CONFIG_COMPAT */
|
|
|
|
static const struct syscall_whitelist whitelists[] __initconst = {
|
|
SYSCALL_WHITELIST(read_write_test),
|
|
SYSCALL_WHITELIST(android),
|
|
PERMISSIVE_SYSCALL_WHITELIST(android),
|
|
SYSCALL_WHITELIST(third_party),
|
|
PERMISSIVE_SYSCALL_WHITELIST(third_party),
|
|
SYSCALL_WHITELIST(complete),
|
|
PERMISSIVE_SYSCALL_WHITELIST(complete)
|
|
};
|
|
|
|
static int __init alt_syscall_apply_whitelist(const struct syscall_whitelist *wl,
|
|
struct alt_sys_call_table *t)
|
|
{
|
|
unsigned int i;
|
|
unsigned long *whitelist = kcalloc(BITS_TO_LONGS(t->size),
|
|
sizeof(unsigned long), GFP_KERNEL);
|
|
|
|
if (!whitelist)
|
|
return -ENOMEM;
|
|
|
|
for (i = 0; i < wl->nr_whitelist; i++) {
|
|
unsigned int nr = wl->whitelist[i].nr;
|
|
|
|
if (nr >= t->size) {
|
|
kfree(whitelist);
|
|
return -EINVAL;
|
|
}
|
|
bitmap_set(whitelist, nr, 1);
|
|
if (wl->whitelist[i].alt)
|
|
t->table[nr] = wl->whitelist[i].alt;
|
|
}
|
|
|
|
for (i = 0; i < t->size; i++) {
|
|
if (!test_bit(i, whitelist)) {
|
|
t->table[i] = wl->permissive ?
|
|
(sys_call_ptr_t)warn_syscall :
|
|
(sys_call_ptr_t)block_syscall;
|
|
}
|
|
}
|
|
|
|
kfree(whitelist);
|
|
return 0;
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
static int __init
|
|
alt_syscall_apply_compat_whitelist(const struct syscall_whitelist *wl,
|
|
struct alt_sys_call_table *t)
|
|
{
|
|
unsigned int i;
|
|
unsigned long *whitelist = kcalloc(BITS_TO_LONGS(t->compat_size),
|
|
sizeof(unsigned long), GFP_KERNEL);
|
|
|
|
if (!whitelist)
|
|
return -ENOMEM;
|
|
|
|
for (i = 0; i < wl->nr_compat_whitelist; i++) {
|
|
unsigned int nr = wl->compat_whitelist[i].nr;
|
|
|
|
if (nr >= t->compat_size) {
|
|
kfree(whitelist);
|
|
return -EINVAL;
|
|
}
|
|
bitmap_set(whitelist, nr, 1);
|
|
if (wl->compat_whitelist[i].alt)
|
|
t->compat_table[nr] = wl->compat_whitelist[i].alt;
|
|
}
|
|
|
|
for (i = 0; i < t->compat_size; i++) {
|
|
if (!test_bit(i, whitelist)) {
|
|
t->compat_table[i] = wl->permissive ?
|
|
(sys_call_ptr_t)warn_compat_syscall :
|
|
(sys_call_ptr_t)block_syscall;
|
|
}
|
|
}
|
|
|
|
kfree(whitelist);
|
|
return 0;
|
|
}
|
|
#else
|
|
static inline int __init
|
|
alt_syscall_apply_compat_whitelist(const struct syscall_whitelist *wl,
|
|
struct alt_sys_call_table *t)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif /* CONFIG_COMPAT */
|
|
|
|
static int __init alt_syscall_init_one(const struct syscall_whitelist *wl)
|
|
{
|
|
struct alt_sys_call_table *t;
|
|
int err;
|
|
|
|
t = kzalloc(sizeof(*t), GFP_KERNEL);
|
|
if (!t)
|
|
return -ENOMEM;
|
|
strncpy(t->name, wl->name, sizeof(t->name));
|
|
|
|
err = arch_dup_sys_call_table(t);
|
|
if (err)
|
|
return err;
|
|
|
|
err = alt_syscall_apply_whitelist(wl, t);
|
|
if (err)
|
|
return err;
|
|
err = alt_syscall_apply_compat_whitelist(wl, t);
|
|
if (err)
|
|
return err;
|
|
|
|
return register_alt_sys_call_table(t);
|
|
}
|
|
|
|
/*
|
|
* Register an alternate syscall table for each whitelist. Note that the
|
|
* lack of a module_exit() is intentional - once a syscall table is registered
|
|
* it cannot be unregistered.
|
|
*/
|
|
static int __init chromiumos_alt_syscall_init(void)
|
|
{
|
|
unsigned int i;
|
|
int err;
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
register_sysctl_init("kernel/chromiumos/alt_syscall",
|
|
chromiumos_sysctl_table);
|
|
#endif
|
|
|
|
err = arch_dup_sys_call_table(&default_table);
|
|
if (err)
|
|
return err;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(whitelists); i++) {
|
|
err = alt_syscall_init_one(&whitelists[i]);
|
|
if (err)
|
|
pr_warn("Failed to register syscall table %s: %d\n",
|
|
whitelists[i].name, err);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
module_init(chromiumos_alt_syscall_init);
|