-
Notifications
You must be signed in to change notification settings - Fork 766
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Change]修改目前 Paddle docs 中 COPY-FROM 的逻辑 #5941
Changes from 1 commit
b176a47
438396c
337c5f2
03fbb79
fc7774f
0b881c6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,4 +5,6 @@ | |
.idea | ||
build | ||
.vscode | ||
venv/ | ||
venv/ | ||
|
||
*.pyc |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -87,19 +87,28 @@ def find_codeblock_needed(cf_info): | |
if cf_info['src_api'] in api_name_2_id_map: | ||
api_info = api_info_dict[api_name_2_id_map[cf_info['src_api']]] | ||
if 'docstring' in api_info: | ||
codeblocks = extract_code_blocks_from_docstr(api_info['docstring']) | ||
codeblocks = extract_code_blocks_from_docstr( | ||
api_info['docstring'], google_style=False | ||
) | ||
if not codeblocks: | ||
logger.warning('found none codeblocks for %s', str(cf_info)) | ||
logger.warning( | ||
'and the docstring is: %s', api_info['docstring'] | ||
) | ||
return None | ||
cb_name = cf_info['cb_name'] | ||
return ( | ||
codeblocks[0] | ||
if cb_name is None | ||
else find_codeblock_needed_by_name(cb_name, codeblocks) | ||
) | ||
|
||
# we use `cb_name` first, if not exist, then use the first codeblock without name, or codeblocks[0]. | ||
cb = None | ||
if cb_name is not None: | ||
cb = find_codeblock_needed_by_name(cb_name, codeblocks) | ||
else: | ||
for _cb in codeblocks: | ||
if _cb.get('name', None) is None: | ||
cb = _cb | ||
break | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 目前的逻辑是:
我有点担忧的是,这是否会造成混乱,其实如果只有 1、2 或者只有 1、3 我觉得倒没什么,因为逻辑是说得通的,但 1、2、3 都有就有些奇怪了,没有文档很难知道这里面发生了什么 我比较建议,不要直接返回全局第一个,默认就是 Example 下的第一个,Example 外的,强制使用 name 来索引,也即这里的逻辑删掉 3,只保留 1、2,这样仍然可以和现有逻辑保持兼容,而且不会增加额外的心智负担 这样的话可以这样写: example_codeblocks = [codeblock for codeblock in codeblocks if codeblock.get("in_examples")]
return (
example_codeblocks[0]
if cb_name is None
else find_codeblock_needed_by_name(cb_name, codeblocks)
) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 跟我一开始想的一样!!!😆 其实,后面规定 就这么办!🎉🎉🎉 |
||
|
||
return cb if cb is not None else codeblocks[0] | ||
else: | ||
logger.warning('%s not in api_name_2_id_map', cf_info['src_api']) | ||
return None | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1025,13 +1025,14 @@ def filter_out_object_of_api_info_dict(): | |
del api_info_dict[id_api]['object'] | ||
|
||
|
||
def extract_code_blocks_from_docstr(docstr): | ||
def extract_code_blocks_from_docstr(docstr, google_style=True): | ||
""" | ||
extract code-blocks from the given docstring. | ||
DON'T include the multiline-string definition in code-blocks. | ||
The *Examples* section must be the last. | ||
Args: | ||
docstr(str): docstring | ||
google_style(bool): if not use google_style, the code blocks will be extracted from all the parts of docstring. | ||
Return: | ||
code_blocks: A list of code-blocks, indent removed. | ||
element {'name': the code-block's name, 'id': sequence id. | ||
|
@@ -1040,9 +1041,32 @@ def extract_code_blocks_from_docstr(docstr): | |
code_blocks = [] | ||
|
||
mo = re.search(r"Examples?:", docstr) | ||
if mo is None: | ||
return code_blocks | ||
ds_list = docstr[mo.start() :].replace("\t", ' ').split("\n") | ||
if google_style: | ||
if mo is None: | ||
return code_blocks | ||
|
||
ds_list = docstr[mo.start() :].replace("\t", ' ').split("\n") | ||
|
||
else: | ||
if mo is None: | ||
ds_list = docstr.replace("\t", ' ').split("\n") | ||
|
||
else: | ||
# because we use codeblock[0] as default (when we can NOT convert `COPY-FROM`` in other parts of docstring), | ||
# so, we make `Examples` part before other parts, where extract codeblocks first. | ||
_ds_exmaple_part = docstr[mo.start() :] | ||
_ds_other_part = docstr[: mo.start()] | ||
|
||
ds_list = ( | ||
( | ||
_ds_exmaple_part | ||
+ ("\n" if not _ds_exmaple_part.endswith("\n") else "") | ||
+ _ds_other_part | ||
) | ||
.replace("\t", ' ') | ||
.split("\n") | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 只是把 Examples 提前了?看起来是一个 hack,目标应该只是提取全部代码块,能否不用 hack 的方式实现呢? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
这两个问题其实可以算是一个问题,我一起回复吧~ 简单说,是为了兼顾目前 因为,目前文档里面使用 也就是说,目前都是默认使用 原有代码也是这样的处理方式: return (
codeblocks[0]
if cb_name is None
else find_codeblock_needed_by_name(cb_name, codeblocks)
) 理想的方式是:
如此,就可以进行匹配,其中 但是,现在的情况是:
至于
在 docstring 中会提取到两段代码,两段代码都没有名称,后续匹配的时候,就会把第一段代码转换到 所以,如果能够规范 @SigureMo 帮忙看看有没有什么好方法 ~ 🤔 |
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里的逻辑是不是过于复杂了 example_start = float("inf")
for lineno, line in enumerate(docstr.splitlines()):
if re.match("^Examples?:", line.lstrip()):
example_start = lineno
if not google_style or lineno >= example_start:
docstr_list.append(line) 类似这样的思路,整体代码会简洁很多 这里不需要 replace 掉 tabs 了,我在 PaddlePaddle/Paddle#54796 会清理掉剩余全部 tabs 不建议按照原来的代码风格用缩写,原来的代码有点……一言难尽:joy:,改这个文件也是辛苦你了 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这。。。有bug。。。example_start 不对~ 😂 改了一下,也加了单测~ 请评审!:) |
||
lastlineindex = len(ds_list) - 1 | ||
|
||
cb_start_pat = re.compile(r"code-block::\s*python") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
.. _cn_api_profiler_record_event: | ||
|
||
RecordEvent | ||
--------------------- | ||
|
||
.. py:class:: paddle.profiler.RecordEvent(name: str, event_type: TracerEventType=TracerEventType.UserDefined) | ||
|
||
用于用户自定义打点,记录某一段代码运行的时间。 | ||
|
||
|
||
参数 | ||
::::::::: | ||
|
||
- **name** (str) - 记录打点的名字。 | ||
- **event_type** (TracerEventType,可选) - 可选参数,默认值为 TracerEventType.UserDefined。该参数预留为内部使用,最好不要指定该参数。 | ||
|
||
代码示例 | ||
:::::::::: | ||
|
||
COPY-FROM: paddle.profiler.RecordEvent:code-example1 | ||
|
||
.. note:: | ||
RecordEvent 只有在 :ref:`性能分析器 <cn_api_profiler_profiler>` 处于 RECORD 状态才会生效。 | ||
|
||
方法 | ||
:::::::::::: | ||
begin() | ||
''''''''' | ||
|
||
记录开始的时间。 | ||
|
||
**代码示例** | ||
|
||
COPY-FROM: paddle.profiler.RecordEvent.begin:code-example2 | ||
|
||
|
||
end() | ||
''''''''' | ||
|
||
记录结束的时间。 | ||
|
||
**代码示例** | ||
|
||
COPY-FROM: paddle.profiler.RecordEvent.end:code-example3 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
.. _cn_api_profiler_record_event: | ||
|
||
RecordEvent | ||
--------------------- | ||
|
||
.. py:class:: paddle.profiler.RecordEvent(name: str, event_type: TracerEventType=TracerEventType.UserDefined) | ||
|
||
用于用户自定义打点,记录某一段代码运行的时间。 | ||
|
||
|
||
参数 | ||
::::::::: | ||
|
||
- **name** (str) - 记录打点的名字。 | ||
- **event_type** (TracerEventType,可选) - 可选参数,默认值为 TracerEventType.UserDefined。该参数预留为内部使用,最好不要指定该参数。 | ||
|
||
代码示例 | ||
:::::::::: | ||
|
||
|
||
.. code-block:: python | ||
:name: code-example1 | ||
|
||
import paddle | ||
import paddle.profiler as profiler | ||
# method1: using context manager | ||
with profiler.RecordEvent("record_add"): | ||
data1 = paddle.randn(shape=[3]) | ||
data2 = paddle.randn(shape=[3]) | ||
result = data1 + data2 | ||
# method2: call begin() and end() | ||
record_event = profiler.RecordEvent("record_add") | ||
record_event.begin() | ||
data1 = paddle.randn(shape=[3]) | ||
data2 = paddle.randn(shape=[3]) | ||
result = data1 + data2 | ||
record_event.end() | ||
.. note:: | ||
RecordEvent 只有在 :ref:`性能分析器 <cn_api_profiler_profiler>` 处于 RECORD 状态才会生效。 | ||
|
||
方法 | ||
:::::::::::: | ||
begin() | ||
''''''''' | ||
|
||
记录开始的时间。 | ||
|
||
**代码示例** | ||
|
||
|
||
.. code-block:: python | ||
:name: code-example2 | ||
|
||
import paddle | ||
import paddle.profiler as profiler | ||
record_event = profiler.RecordEvent("record_sub") | ||
record_event.begin() | ||
data1 = paddle.randn(shape=[3]) | ||
data2 = paddle.randn(shape=[3]) | ||
result = data1 - data2 | ||
record_event.end() | ||
|
||
end() | ||
''''''''' | ||
|
||
记录结束的时间。 | ||
|
||
**代码示例** | ||
|
||
|
||
.. code-block:: python | ||
:name: code-example3 | ||
|
||
import paddle | ||
import paddle.profiler as profiler | ||
record_event = profiler.RecordEvent("record_mul") | ||
record_event.begin() | ||
data1 = paddle.randn(shape=[3]) | ||
data2 = paddle.randn(shape=[3]) | ||
result = data1 * data2 | ||
record_event.end() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
{ | ||
"83047248": { | ||
"all_names": [ | ||
"paddle.profiler.RecordEvent", | ||
"paddle.profiler.utils.RecordEvent" | ||
], | ||
"id": 83047248, | ||
"type": "type", | ||
"docstring": "Interface for recording a time range by user defined.\n\nArgs:\n name (str): Name of the record event.\n event_type (TracerEventType, optional): Optional, default value is\n `TracerEventType.PythonUserDefined`. It is reserved for internal\n purpose, and it is better not to specify this parameter.\n\nExamples:\n .. code-block:: python\n :name: code-example1\n\n import paddle\n import paddle.profiler as profiler\n # method1: using context manager\n with profiler.RecordEvent(\"record_add\"):\n data1 = paddle.randn(shape=[3])\n data2 = paddle.randn(shape=[3])\n result = data1 + data2\n # method2: call begin() and end()\n record_event = profiler.RecordEvent(\"record_add\")\n record_event.begin()\n data1 = paddle.randn(shape=[3])\n data2 = paddle.randn(shape=[3])\n result = data1 + data2\n record_event.end()\n\n**Note**:\n RecordEvent will take effect only when :ref:`Profiler <api_paddle_profiler_Profiler>` is on and at the state of `RECORD`." | ||
}, | ||
"139946288563392": { | ||
"all_names": [ | ||
"paddle.profiler.RecordEvent.begin" | ||
], | ||
"id": 139946288563392, | ||
"type": "function", | ||
"docstring": "Record the time of beginning.\n\nExamples:\n\n .. code-block:: python\n :name: code-example2\n\n import paddle\n import paddle.profiler as profiler\n record_event = profiler.RecordEvent(\"record_sub\")\n record_event.begin()\n data1 = paddle.randn(shape=[3])\n data2 = paddle.randn(shape=[3])\n result = data1 - data2\n record_event.end()" | ||
}, | ||
"139946288563536": { | ||
"all_names": [ | ||
"paddle.profiler.RecordEvent.end" | ||
], | ||
"id": 139946288563536, | ||
"type": "function", | ||
"docstring": "Record the time of ending.\n\nExamples:\n\n .. code-block:: python\n :name: code-example3\n\n import paddle\n import paddle.profiler as profiler\n record_event = profiler.RecordEvent(\"record_mul\")\n record_event.begin()\n data1 = paddle.randn(shape=[3])\n data2 = paddle.randn(shape=[3])\n result = data1 * data2\n record_event.end()" | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
.. _cn_api_paddle_tensor_gather: | ||
|
||
gather | ||
------------------------------- | ||
|
||
.. py:function:: paddle.gather(x, index, axis=None, name=None) | ||
|
||
根据索引 index 获取输入 ``x`` 的指定 ``aixs`` 维度的条目,并将它们拼接在一起。 | ||
|
||
.. code-block:: python | ||
|
||
Given: | ||
|
||
X = [[1, 2], | ||
[3, 4], | ||
[5, 6]] | ||
|
||
Index = [1, 2] | ||
|
||
axis = 0 | ||
|
||
Then: | ||
|
||
Out = [[3, 4], | ||
[5, 6]] | ||
|
||
参数 | ||
:::::::::::: | ||
- **x** (Tensor) - 输入 Tensor,秩 ``rank >= 1``,支持的数据类型包括 int32、int64、float32、float64 和 uint8 (CPU)、float16(GPU) 。 | ||
- **index** (Tensor) - 索引 Tensor,秩 ``rank = 0 `` 或者 ``rank = 1``,数据类型为 int32 或 int64。 | ||
- **axis** (Tensor) - 指定 index 获取输入的维度,``axis`` 的类型可以是 int 或者 Tensor,当 ``axis`` 为 Tensor 的时候其数据类型为 int32 或者 int64。默认值为 None,当``axis``为 None 的时候其值为 0。 | ||
- **name** (str,可选) - 具体用法请参见 :ref:`api_guide_Name`,一般无需设置,默认值为 None。 | ||
|
||
返回 | ||
:::::::::::: | ||
Tensor,当 index 为一维 Tensor 时,返回和输入 Tensor 的形状相同的 Tensor。当 index 为零维 Tensor 时,返回 Tensor 相对于输入 Tensor 会降维, axis 指向的维度会被降维。 | ||
|
||
|
||
代码示例 | ||
:::::::::::: | ||
|
||
COPY-FROM: paddle.gather |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
.. _cn_api_paddle_tensor_gather: | ||
|
||
gather | ||
------------------------------- | ||
|
||
.. py:function:: paddle.gather(x, index, axis=None, name=None) | ||
|
||
根据索引 index 获取输入 ``x`` 的指定 ``aixs`` 维度的条目,并将它们拼接在一起。 | ||
|
||
.. code-block:: python | ||
|
||
Given: | ||
|
||
X = [[1, 2], | ||
[3, 4], | ||
[5, 6]] | ||
|
||
Index = [1, 2] | ||
|
||
axis = 0 | ||
|
||
Then: | ||
|
||
Out = [[3, 4], | ||
[5, 6]] | ||
|
||
参数 | ||
:::::::::::: | ||
- **x** (Tensor) - 输入 Tensor,秩 ``rank >= 1``,支持的数据类型包括 int32、int64、float32、float64 和 uint8 (CPU)、float16(GPU) 。 | ||
- **index** (Tensor) - 索引 Tensor,秩 ``rank = 0 `` 或者 ``rank = 1``,数据类型为 int32 或 int64。 | ||
- **axis** (Tensor) - 指定 index 获取输入的维度,``axis`` 的类型可以是 int 或者 Tensor,当 ``axis`` 为 Tensor 的时候其数据类型为 int32 或者 int64。默认值为 None,当``axis``为 None 的时候其值为 0。 | ||
- **name** (str,可选) - 具体用法请参见 :ref:`api_guide_Name`,一般无需设置,默认值为 None。 | ||
|
||
返回 | ||
:::::::::::: | ||
Tensor,当 index 为一维 Tensor 时,返回和输入 Tensor 的形状相同的 Tensor。当 index 为零维 Tensor 时,返回 Tensor 相对于输入 Tensor 会降维, axis 指向的维度会被降维。 | ||
|
||
|
||
代码示例 | ||
:::::::::::: | ||
|
||
|
||
.. code-block:: python | ||
|
||
import paddle | ||
|
||
input = paddle.to_tensor([[1,2],[3,4],[5,6]]) | ||
index = paddle.to_tensor([0,1]) | ||
output = paddle.gather(input, index, axis=0) | ||
# expected output: [[1,2],[3,4]] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{ | ||
"139687086729536": { | ||
"all_names": [ | ||
"paddle.fluid.Variable.gather", | ||
"paddle.fluid.framework.Variable.gather", | ||
"paddle.gather", | ||
"paddle.static.Variable.gather", | ||
"paddle.tensor.manipulation.gather", | ||
"paddle.Tensor.gather" | ||
], | ||
"id": 139687086729536, | ||
"type": "function", | ||
"docstring": "Output is obtained by gathering entries of ``axis``\nof ``x`` indexed by ``index`` and concatenate them together.\n\n.. code-block:: python\n\n\n Given:\n\n x = [[1, 2],\n [3, 4],\n [5, 6]]\n\n index = [1, 2]\n axis=[0]\n\n Then:\n\n out = [[3, 4],\n [5, 6]]\n\nArgs:\n x (Tensor): The source input tensor with rank>=1. Supported data type is\n int32, int64, float32, float64 and uint8 (only for CPU),\n float16 (only for GPU).\n index (Tensor): The index input tensor with rank=1. Data type is int32 or int64.\n axis (Tensor|int, optional): The axis of input to be gathered, it's can be int or a Tensor with data type is int32 or int64. The default value is None, if None, the ``axis`` is 0.\n name (str, optional): The default value is None. Normally there is no need for user to set this property.\n For more information, please refer to :ref:`api_guide_Name` .\n\nReturns:\n output (Tensor): The output is a tensor with the same rank as ``x``.\n\nExamples:\n\n .. code-block:: python\n\n import paddle\n\n input = paddle.to_tensor([[1,2],[3,4],[5,6]])\n index = paddle.to_tensor([0,1])\n output = paddle.gather(input, index, axis=0)\n # expected output: [[1,2],[3,4]]" | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
为什么要返回第一个没有 name 的呢?这是有必要的吗?