EVE Light on Life

Python源码学习(5) 函数

PyCodeObject

对象主要包含的内容:

  • co_argcount: Code Block 的位置参数个数
  • co_nlocals: Code Block 中局部变量的个数(包括位置参数)
  • co_stacksize: 执行Code Block的栈空间大小
  • co_code: 字节码,以PyStringObject形式存在
  • co_consts: 常量,PyTupleObject对象
  • co_names: Code Block中的符号,PyTupleObject对象
  • co_varnames: 局部变量名集合
  • co_freevar: 闭包相关
  • co_cellvar: 嵌套函数局部变量集合
  • co_filename: Code Block对应的py文件路径
  • co_name: Code Block的名字
  • co_firstlineno: Code Block对应的.py文件中的起始行
  • co_lnotab: 字节码指令与py文件中source code行号的对应关系,PyStringObject对象

名字空间

注意一句话:python虚拟机在执行的过程中,会有很大一部分时间消耗在从这条名字空间链中确定一个符号所对应的对象是什么。

一个code block 对应一个名字空间。

类、函数、module都对应一个独立的名字空间,它就会对应一个PyCodeObject对象。

pyc文件

主要存三样东西:

  • magic number: 一个整数,区别不同的python版本
  • 文件创建时间信息
  • PyCodeObject对象

PyFrameobject对象

是一个变长对象,根据PyCodeObject使用不同大小的运行栈空间

虚拟机中执行的对象。

对象主要内容:

  • f_back: 执行环境链上的前一个frame
  • f_code: 对应的PyCodeObject对象
  • f_builtins: builtin名字空间
  • f_globals: global名字空间
  • f_locals: local名字空间
  • f_valuestack: 运行时栈的栈底位置
  • f_stacktop: 运行时栈的栈顶位置
  • f_lasti: 上一条字节码指令在f_code中的偏移位置
  • f_lineno: 当前字节码对应的源代码行
  • f_localsplus: 动态内存空间

在python中可以通过sys._getframe()方法获得当前活动的frame

LEGB规则

应用到一个模块作用域的概念

  • local
  • enclosing
  • global
  • builtin

虚拟机PyEval_EvalFrameEx

PyThreadState对象

if控制流

for控制流

函数

参数

有四种:

  • 位置参数(positional argument)
  • 键参数(key argument)
  • 扩展位置参数(excess positional argument)
  • 扩展键参数(excess key argument)

PyFunctionObject对象

typedef struct {
    PyObject_HEAD
    PyObject *func_code;	/* 函数对应的 code object */
    PyObject *func_globals;	/* 函数运行时的globals名字空间 */
    PyObject *func_defaults;	/* NULL or a tuple */
    PyObject *func_closure;	/* NULL or a tuple of cell objects 用于实现closure */
    PyObject *func_doc;		/* The __doc__ attribute, 函数文档 */
    PyObject *func_name;	/* The __name__ attribute, a string object */
    PyObject *func_dict;	/* The __dict__ attribute, a dict or NULL */
    PyObject *func_weakreflist;	/* List of weak references */
    PyObject *func_module;	/* The __module__ attribute, can be anything */

    /* Invariant:
     *     func_closure contains the bindings for func_code->co_freevars, so
     *     PyTuple_Size(func_closure) == PyCode_GetNumFree(func_code)
     *     (func_closure may be NULL if PyCode_GetNumFree(func_code) == 0).
     */
} PyFunctionObject;

一个函数区只能产生一个PyCodeObject对象,但是可能重复调用同时存在几个PyFunctionObject对象,这些PyFunctionObject对象的状态是不同的,但是他们都对应唯一一个PyCodeObject对象。

函数创建MAKE_FUNCTION

MAKE_FUNCTION指令:

v = POP();   //取出与函数对应的PyCodeObject对象
x = PyFunction_New(v, f->f_globals);
Py_DECREF(V);
//处理函数参数默认值
if (x != NULL && oparg >0){
	v = PyTuple_New(oparg);
	while(--oparg >= 0){
		w = POP();
		PyTule_Set_ITEM(v, oparg, w);
	}
	err = PyFunction_SetDefaults(x, v);
	Py_DECREF(v);
}
PUSH(X);
break;

PyFunction_New函数:

PyObject *
PyFunction_New(PyObject *code, PyObject *globals)
{
    PyFunctionObject *op = PyObject_GC_New(PyFunctionObject,
                                        &PyFunction_Type);
    static PyObject *__name__ = 0;
    if (op != NULL) {
        PyObject *doc;
        PyObject *consts;
        PyObject *module;
        op->func_weakreflist = NULL;
        Py_INCREF(code);
        op->func_code = code;函数
        Py_INCREF(globals);
        op->func_globals = globals;
        op->func_name = ((PyCodeObject *)code)->co_name;
        Py_INCREF(op->func_name);
        op->func_defaults = NULL; /* No default arguments */
        op->func_closure = NULL;
        consts = ((PyCodeObject *)code)->co_consts;
        if (PyTuple_Size(consts) >= 1) {
            doc = PyTuple_GetItem(consts, 0);
            if (!PyString_Check(doc) && !PyUnicode_Check(doc))
                doc = Py_None;
        }
        else
            doc = Py_None;
        Py_INCREF(doc);
        op->func_doc = doc;
        op->func_dict = NULL;
        op->func_module = NULL;

        /* __module__: If module name is in globals, use it.
           Otherwise, use None.
        */
        if (!__name__) {
            __name__ = PyString_InternFromString("__name__");
            if (!__name__) {
                Py_DECREF(op);
                return NULL;
            }
        }
        module = PyDict_GetItem(globals, __name__);
        if (module) {
            Py_INCREF(module);
            op->func_module = module;
        }
    }
    else
        return NULL;
    _PyObject_GC_TRACK(op);
    return (PyObject *)op;
}

无参数函数调用

创建无参数函数的字节码MAKEFUNCTION的参数为0,无需处理参数。

无参数函数调用比较简单,从call_function函数进入,前面判断是Method还是CFunction的调用,最后进入fast_function函数。

参数的传递

局部变量意义

函数调用

static PyObject *
call_function(PyObject ***pp_stack, int oparg					//运行栈顶指针和字节码参数(代表函数参数个数)
#ifdef WITH_TSC
                , uint64* pintr0, uint64* pintr1
#endif
                )
{
    int na = oparg & 0xff;	//低字节,位置参数个数
    int nk = (oparg>>8) & 0xff;		//高字节,键参数个数
    int n = na + 2 * nk;   
    PyObject **pfunc = (*pp_stack) - n - 1;    //获得运行栈的PyFunctionObject对象
    PyObject *func = *pfunc;
    PyObject *x, *w;

    /* Always dispatch PyCFunction first, because these are
       presumed to be the most frequent callable object.
    */
    if (PyCFunction_Check(func) && nk == 0) {
        int flags = PyCFunction_GET_FLAGS(func);
        PyThreadState *tstate = PyThreadState_GET();

        PCALL(PCALL_CFUNCTION);
        if (flags & (METH_NOARGS | METH_O)) {
            PyCFunction meth = PyCFunction_GET_FUNCTION(func);
            PyObject *self = PyCFunction_GET_SELF(func);
            if (flags & METH_NOARGS && na == 0) {
                C_TRACE(x, (*meth)(self,NULL));
            }
            else if (flags & METH_O && na == 1) {
                PyObject *arg = EXT_POP(*pp_stack);
                C_TRACE(x, (*meth)(self,arg));
                Py_DECREF(arg);
            }
            else {
                err_args(func, flags, na);
                x = NULL;
            }
        }
        else {
            PyObject *callargs;
            callargs = load_args(pp_stack, na);
            READ_TIMESTAMP(*pintr0);
            C_TRACE(x, PyCFunction_Call(func,callargs,NULL));
            READ_TIMESTAMP(*pintr1);
            Py_XDECREF(callargs);
        }
    } else {
        if (PyMethod_Check(func) && PyMethod_GET_SELF(func) != NULL) {
            /* optimize access to bound methods */
            PyObject *self = PyMethod_GET_SELF(func);
            PCALL(PCALL_METHOD);
            PCALL(PCALL_BOUND_METHOD);
            Py_INCREF(self);
            func = PyMethod_GET_FUNCTION(func);
            Py_INCREF(func);
            Py_DECREF(*pfunc);
            *pfunc = self;
            na++;
            n++;
        } else
            Py_INCREF(func);
        READ_TIMESTAMP(*pintr0);
				/* 函数调用 */
        if (PyFunction_Check(func))
            x = fast_function(func, pp_stack, n, na, nk);
        else
            x = do_call(func, pp_stack, na, nk);
        READ_TIMESTAMP(*pintr1);
        Py_DECREF(func);
    }

    /* Clear the stack of the function object.  Also removes
       the arguments in case they weren't consumed already
       (fast_function() and err_args() leave them on the stack).
     */
    while ((*pp_stack) > pfunc) {
        w = EXT_POP(*pp_stack);
        Py_DECREF(w);
        PCALL(PCALL_POP);
    }
    return x;
}
/* The fast_function() function optimize calls for which no argument
   tuple is necessary; the objects are passed directly from the stack.
   For the simplest case -- a function that takes only positional
   arguments and is called with only positional arguments -- it
   inlines the most primitive frame setup code from
   PyEval_EvalCodeEx(), which vastly reduces the checks that must be
   done before evaluating the frame.
*/

static PyObject *
fast_function(PyObject *func, PyObject ***pp_stack, int n, int na, int nk)
{
    PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
    PyObject *globals = PyFunction_GET_GLOBALS(func);
    PyObject *argdefs = PyFunction_GET_DEFAULTS(func);
    PyObject **d = NULL;
    int nd = 0;

    PCALL(PCALL_FUNCTION);
    PCALL(PCALL_FAST_FUNCTION);
		/* 一般函数快速通道 */
    if (argdefs == NULL && co->co_argcount == n && nk==0 &&
        co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) {
        PyFrameObject *f;
        PyObject *retval = NULL;
        PyThreadState *tstate = PyThreadState_GET();
        PyObject **fastlocals, **stack;
        int i;

        PCALL(PCALL_FASTER_FUNCTION);
        assert(globals != NULL);
        /* XXX Perhaps we should create a specialized
           PyFrame_New() that doesn't take locals, but does
           take builtins without sanity checking them.
        */
        assert(tstate != NULL);
        f = PyFrame_New(tstate, co, globals, NULL);
        if (f == NULL)
            return NULL;

        fastlocals = f->f_localsplus;
        stack = (*pp_stack) - n;

        for (i = 0; i < n; i++) {
            Py_INCREF(*stack);
            fastlocals[i] = *stack++;
        }
        retval = PyEval_EvalFrameEx(f,0);
        ++tstate->recursion_depth;
        Py_DECREF(f);
        --tstate->recursion_depth;
        return retval;
    }

		/* 函数有默认参数 */
    if (argdefs != NULL) {
        d = &PyTuple_GET_ITEM(argdefs, 0);
        nd = Py_SIZE(argdefs);
    }
    return PyEval_EvalCodeEx(co, globals,
                             (PyObject *)NULL, (*pp_stack)-n, na,
                             (*pp_stack)-2*nk, nk, d, nd,
                             PyFunction_GET_CLOSURE(func));
}
PyObject *
PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
           PyObject **args, int argcount, PyObject **kws, int kwcount,
           PyObject **defs, int defcount, PyObject *closure)
{
    register PyFrameObject *f;
    register PyObject *retval = NULL;
    register PyObject **fastlocals, **freevars;
    PyThreadState *tstate = PyThreadState_GET();
    PyObject *x, *u;

    if (globals == NULL) {
        PyErr_SetString(PyExc_SystemError,
                        "PyEval_EvalCodeEx: NULL globals");
        return NULL;
    }

    assert(tstate != NULL);
    assert(globals != NULL);
    f = PyFrame_New(tstate, co, globals, locals);
    if (f == NULL)
        return NULL;

    fastlocals = f->f_localsplus;
    freevars = f->f_localsplus + co->co_nlocals;

    if (co->co_argcount > 0 ||
        co->co_flags & (CO_VARARGS | CO_VARKEYWORDS)) {
        int i;
        int n = argcount;
        PyObject *kwdict = NULL;

				/* 处理有键参数 */
        if (co->co_flags & CO_VARKEYWORDS) {
            kwdict = PyDict_New();
            if (kwdict == NULL)
                goto fail;
            i = co->co_argcount;
            if (co->co_flags & CO_VARARGS)
                i++;
            SETLOCAL(i, kwdict);
        }
        if (argcount > co->co_argcount) {
            if (!(co->co_flags & CO_VARARGS)) {
                PyErr_Format(PyExc_TypeError,
                    "%.200s() takes %s %d "
                    "argument%s (%d given)",
                    PyString_AsString(co->co_name),
                    defcount ? "at most" : "exactly",
                    co->co_argcount,
                    co->co_argcount == 1 ? "" : "s",
                    argcount + kwcount);
                goto fail;
            }
            n = co->co_argcount;
        }
        for (i = 0; i < n; i++) {
            x = args[i];
            Py_INCREF(x);
            SETLOCAL(i, x);
        }

				/* 扩展位置参数情况处理 */
        if (co->co_flags & CO_VARARGS) {
            u = PyTuple_New(argcount - n);
            if (u == NULL)
                goto fail;
            SETLOCAL(co->co_argcount, u);
            for (i = n; i < argcount; i++) {
                x = args[i];
                Py_INCREF(x);
                PyTuple_SET_ITEM(u, i-n, x);
            }
        }

				/* 处理键参数 */
        for (i = 0; i < kwcount; i++) {
            PyObject **co_varnames;
            PyObject *keyword = kws[2*i];
            PyObject *value = kws[2*i + 1];
            int j;
            if (keyword == NULL || !(PyString_Check(keyword)
#ifdef Py_USING_UNICODE
                                     || PyUnicode_Check(keyword)
#endif
                        )) {
                PyErr_Format(PyExc_TypeError,
                    "%.200s() keywords must be strings",
                    PyString_AsString(co->co_name));
                goto fail;
            }
            /* Speed hack: do raw pointer compares. As names are
               normally interned this should almost always hit. */
            co_varnames = ((PyTupleObject *)(co->co_varnames))->ob_item;
            for (j = 0; j < co->co_argcount; j++) {
                PyObject *nm = co_varnames[j];
                if (nm == keyword)
                    goto kw_found;
            }
            /* Slow fallback, just in case */
            for (j = 0; j < co->co_argcount; j++) {
                PyObject *nm = co_varnames[j];
                int cmp = PyObject_RichCompareBool(
                    keyword, nm, Py_EQ);
                if (cmp > 0)
                    goto kw_found;
                else if (cmp < 0)
                    goto fail;
            }
            if (kwdict == NULL) {
                PyObject *kwd_str = kwd_as_string(keyword);
                if (kwd_str) {
                    PyErr_Format(PyExc_TypeError,
                                 "%.200s() got an unexpected "
                                 "keyword argument '%.400s'",
                                 PyString_AsString(co->co_name),
                                 PyString_AsString(kwd_str));
                    Py_DECREF(kwd_str);
                }
                goto fail;
            }
            PyDict_SetItem(kwdict, keyword, value);
            continue;
          kw_found:
            if (GETLOCAL(j) != NULL) {
                PyObject *kwd_str = kwd_as_string(keyword);
                if (kwd_str) {
                    PyErr_Format(PyExc_TypeError,
                                 "%.200s() got multiple "
                                 "values for keyword "
                                 "argument '%.400s'",
                                 PyString_AsString(co->co_name),
                                 PyString_AsString(kwd_str));
                    Py_DECREF(kwd_str);
                }
                goto fail;
            }
            Py_INCREF(value);
            SETLOCAL(j, value);
        }
        if (argcount < co->co_argcount) {
            int m = co->co_argcount - defcount;
            for (i = argcount; i < m; i++) {
                if (GETLOCAL(i) == NULL) {
                    int j, given = 0;
                    for (j = 0; j < co->co_argcount; j++)
                        if (GETLOCAL(j))
                            given++;
                    PyErr_Format(PyExc_TypeError,
                        "%.200s() takes %s %d "
                        "argument%s (%d given)",
                        PyString_AsString(co->co_name),
                        ((co->co_flags & CO_VARARGS) ||
                         defcount) ? "at least"
                                   : "exactly",
                        m, m == 1 ? "" : "s", given);
                    goto fail;
                }
            }
            if (n > m)
                i = n - m;
            else
                i = 0;
            for (; i < defcount; i++) {
                if (GETLOCAL(m+i) == NULL) {
                    PyObject *def = defs[i];
                    Py_INCREF(def);
                    SETLOCAL(m+i, def);
                }
            }
        }
    }
    else if (argcount > 0 || kwcount > 0) {
        PyErr_Format(PyExc_TypeError,
                     "%.200s() takes no arguments (%d given)",
                     PyString_AsString(co->co_name),
                     argcount + kwcount);
        goto fail;
    }
    /* Allocate and initialize storage for cell vars, and copy free
       vars into frame.  This isn't too efficient right now. */
    if (PyTuple_GET_SIZE(co->co_cellvars)) {
        int i, j, nargs, found;
        char *cellname, *argname;
        PyObject *c;

        nargs = co->co_argcount;
        if (co->co_flags & CO_VARARGS)
            nargs++;
        if (co->co_flags & CO_VARKEYWORDS)
            nargs++;

        /* Initialize each cell var, taking into account
           cell vars that are initialized from arguments.

           Should arrange for the compiler to put cellvars
           that are arguments at the beginning of the cellvars
           list so that we can march over it more efficiently?
        */
        for (i = 0; i < PyTuple_GET_SIZE(co->co_cellvars); ++i) {
            cellname = PyString_AS_STRING(
                PyTuple_GET_ITEM(co->co_cellvars, i));
            found = 0;
            for (j = 0; j < nargs; j++) {
                argname = PyString_AS_STRING(
                    PyTuple_GET_ITEM(co->co_varnames, j));
                if (strcmp(cellname, argname) == 0) {
                    c = PyCell_New(GETLOCAL(j));
                    if (c == NULL)
                        goto fail;
                    GETLOCAL(co->co_nlocals + i) = c;
                    found = 1;
                    break;
                }
            }
            if (found == 0) {
                c = PyCell_New(NULL);
                if (c == NULL)
                    goto fail;
                SETLOCAL(co->co_nlocals + i, c);
            }
        }
    }
    if (PyTuple_GET_SIZE(co->co_freevars)) {
        int i;
        for (i = 0; i < PyTuple_GET_SIZE(co->co_freevars); ++i) {
            PyObject *o = PyTuple_GET_ITEM(closure, i);
            Py_INCREF(o);
            freevars[PyTuple_GET_SIZE(co->co_cellvars) + i] = o;
        }
    }

    if (co->co_flags & CO_GENERATOR) {
        /* Don't need to keep the reference to f_back, it will be set
         * when the generator is resumed. */
        Py_XDECREF(f->f_back);
        f->f_back = NULL;

        PCALL(PCALL_GENERATOR);

        /* Create a new generator that owns the ready to run frame
         * and return that as the value. */
        return PyGen_New(f);
    }

    retval = PyEval_EvalFrameEx(f,0);

fail: /* Jump here from prelude on failure */

    /* decref'ing the frame can cause __del__ methods to get invoked,
       which can call back into Python.  While we're done with the
       current Python frame (f), the associated C stack is still in use,
       so recursion_depth must be boosted for the duration.
    */
    assert(tstate != NULL);
    ++tstate->recursion_depth;
    Py_DECREF(f);
    --tstate->recursion_depth;
    return retval;
}