diff --git a/.gitignore b/.gitignore index d570088..ab8d2d8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ node_modules/ - +*.sublime-* diff --git a/Gemfile.lock b/Gemfile.lock index 8ee4226..567b987 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -16,4 +16,4 @@ DEPENDENCIES review-peg (= 0.2.2) BUNDLED WITH - 1.10.4 + 2.2.7 diff --git a/README.md b/README.md index be006a6..f34ebfe 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,49 @@ +# google-translated version of `Unity Graphics Programming` vol. 1-4 (by `IndieVisualLab`) + +- html: + - https://freder.github.io/UnityGraphicsProgrammingBook1/html-translated/index.html +- epub: + - [UnityGraphicsProgrammingBook1.epub](https://github.com/freder/UnityGraphicsProgrammingBook1/blob/translation/html-translated/UnityGraphicsProgrammingBook1.epub) + - [UnityGraphicsProgrammingBook2.epub](https://github.com/freder/UnityGraphicsProgrammingBook1/blob/translation/html-translated/UnityGraphicsProgrammingBook2.epub) + - [UnityGraphicsProgrammingBook3.epub](https://github.com/freder/UnityGraphicsProgrammingBook1/blob/translation/html-translated/UnityGraphicsProgrammingBook3.epub) + - [UnityGraphicsProgrammingBook4.epub](https://github.com/freder/UnityGraphicsProgrammingBook1/blob/translation/html-translated/UnityGraphicsProgrammingBook4.epub) +- pdf: + - [Unity Graphics Programming Vol. 1.pdf](https://github.com/freder/UnityGraphicsProgrammingBook1/blob/translation/html-translated/Unity%20Graphics%20Programming%20Vol.%201.pdf) + - [Unity Graphics Programming Vol. 2.pdf](https://github.com/freder/UnityGraphicsProgrammingBook1/blob/translation/html-translated/Unity%20Graphics%20Programming%20Vol.%202.pdf) + - [Unity Graphics Programming Vol. 3.pdf](https://github.com/freder/UnityGraphicsProgrammingBook1/blob/translation/html-translated/Unity%20Graphics%20Programming%20Vol.%203.pdf) + - [Unity Graphics Programming Vol. 4.pdf](https://github.com/freder/UnityGraphicsProgrammingBook1/blob/translation/html-translated/Unity%20Graphics%20Programming%20Vol.%204.pdf) + +# build + +coudn't get `npm run pdf` to work, so I'm rolling my own conversion, using `pandoc` and `calibre`. + +``` +# install dependencies +chmod +x setup.sh ; ./setup.sh + +# convert to html +REVIEW_PREFIX='/usr/local/lib/ruby/gems/2.7.0/bin/' npx grunt html \ + +# creates articles/index.html +node process.js + +# collect generated html files and images +mkdir html ; \ + mv articles/*.html html/ ; \ + cp -R articles/images html/images +``` + +- saved google-translated (using chrome extension) files +- "baked" translation (as saved files will still get translated on the fly, as you are scrolling down the page it seems): + - example: `node ../../bake-translation.js "http://localhost:5000/Preface.html" "Preface.html"` +- [removed a bunch of google translate artifacts](https://github.com/freder/UnityGraphicsProgrammingBook1/commit/9e0f9e753a5d68ee27f41a2ae44918e5911c4c68). +- convert to epub: `make-epubs.sh` +- convert to pdf, using `calibre` + +--- + +↓ original readme: + # Unity Graphics Programming vol.1  diff --git a/articles/MarchingCubes.html b/articles/MarchingCubes.html new file mode 100644 index 0000000..fcbbea3 --- /dev/null +++ b/articles/MarchingCubes.html @@ -0,0 +1,694 @@ + + + +
+ + + +マーチングキューブス法とは、ボリュームレンダリング法の一つで、スカラーデータで満たされた3次元ボクセルデータを、ポリゴンデータに変換するアルゴリズムです。William E. Lorensen と Harvey E. Cline によって1987年に最初の論文が発表されました。
+マーチングキューブス法は特許が取得されていましたが、2005年に特許が切れているので、現在は自由に使用できます。
+ +まず、ボリュームデータの空間を3次元グリッドで分割します。
+
++図7.1: 3次元ボリュームデータとグリッド分割 +
+次に分割したグリッドの1つを取り出してみましょう。グリッドの8つの角の値が閾値以上だったら 1 、閾値未満だったら 0 として、8頂点の境界を割り出します。
以下の図は、閾値を0.5とした場合の流れです。
++図7.2: 角の値に応じて境界を割り出す +
+その8つの角の組み合わせは256種類ありますが、回転や反転を駆使すると15種類に収まります。その15種類の組み合わせに対応した三角ポリゴンのパターンを割り当てます。
+
++図7.3: 角の組み合わせ +
+本章で解説するサンプルプロジェクトは、UnityGraphicsProgrammingのUnityプロジェクトhttps://github.com/IndieVisualLab/UnityGraphicsProgramming内にあるAssets/GPUMarchingCubesにあります。
+実装にあたり、Paul Bourke氏のPolygonising a scalar fieldのサイト*1を参考に、Unityに移植させて頂きました。
+[*1] Polygonising a scalar field http://paulbourke.net/geometry/polygonise/
今回はこのサンプルプロジェクトに沿って解説していきます。
+実装は大きくわけて3つあります。
+まずは、メッシュの初期化や描画登録をする GPUMarchingCubesDrawMesh クラスから作っていきます。
+ +前項で説明したとおり、マーチングキューブス法はグリッドの8つの角の組み合わせでポリゴンを生成するアルゴリズムです。リアルタイムにそれを行うには、動的にポリゴンを作る必要があります。
しかし、毎フレームCPU側(C#側)でメッシュの頂点配列を生成するのは非効率です。
そこで、GeometryShaderを使います。GeometryShaderは、大雑把に説明するとVertexShaderとFragmentShaderの間に位置するShaderで、VertexShaderで処理された頂点を増減させることができます。
例えば、1頂点の周囲に6つの頂点を追加して板ポリゴンを生成したりできます。
更に、Shader側(GPU側)で処理するのでとても高速です。
今回はGeometryShaderを使ってMarchingCubesのポリゴンを生成して表示してみます。
まず、 GPUMarchingCubesDrawMeshクラスで使う変数群を定義します。
+リスト7.1: 変数群の定義部分
+using UnityEngine;
+
+public class GPUMarchingCubesDrawMesh : MonoBehaviour {
+
+ #region public
+ public int segmentNum = 32; // グリッドの一辺の分割数
+
+ [Range(0,1)]
+ public float threashold = 0.5f; // メッシュ化するスカラー値のしきい値
+ public Material mat; // レンダリング用のマテリアル
+
+ public Color DiffuseColor = Color.green; // ディフューズカラー
+ public Color EmissionColor = Color.black; // 発光色
+ public float EmissionIntensity = 0; // 発光の強さ
+
+ [Range(0,1)]
+ public float metallic = 0; // メタリック感
+ [Range(0, 1)]
+ public float glossiness = 0.5f; // 光沢感
+ #endregion
+
+ #region private
+ int vertexMax = 0; // 頂点数
+ Mesh[] meshs = null; // Mesh配列
+ Material[] materials = null; // Meshごとのマテリアル配列
+ float renderScale = 1f / 32f; // 表示スケール
+ MarchingCubesDefines mcDefines = null; // MarchingCubes用定数配列群
+ #endregion
+
+}
+
+次にGeometryShaderに渡すためのメッシュを作成します。メッシュの頂点は、分割した3次元グリッド内に1個ずつ配置するようにします。例えば、一辺の分割数が64の場合、64*64*64=262,144個もの頂点が必要になります。
+しかし、Unity2017.1.1f1において、1つのメッシュの頂点数は65,535個が上限となってます。その為、メッシュ1つにつき、頂点数を65,535個以内に収める形で分割します。
+リスト7.2: メッシュ作成部分
+void Initialize()
+{
+ vertexMax = segmentNum * segmentNum * segmentNum;
+
+ Debug.Log("VertexMax " + vertexMax);
+
+ // 1Cubeの大きさをsegmentNumで分割してレンダリング時の大きさを決める
+ renderScale = 1f / segmentNum;
+
+ CreateMesh();
+
+ // シェーダーで使うMarchingCubes用の定数配列の初期化
+ mcDefines = new MarchingCubesDefines();
+}
+
+void CreateMesh()
+{
+ // Meshの頂点数は65535が上限なので、Meshを分割する
+ int vertNum = 65535;
+ int meshNum = Mathf.CeilToInt((float)vertexMax / vertNum); // 分割するMeshの数
+ Debug.Log("meshNum " + meshNum );
+
+ meshs = new Mesh[meshNum];
+ materials = new Material[meshNum];
+
+ // Meshのバウンズ計算
+ Bounds bounds = new Bounds(
+ transform.position,
+ new Vector3(segmentNum, segmentNum, segmentNum) * renderScale
+ );
+
+ int id = 0;
+ for (int i = 0; i < meshNum; i++)
+ {
+ // 頂点作成
+ Vector3[] vertices = new Vector3[vertNum];
+ int[] indices = new int[vertNum];
+ for(int j = 0; j < vertNum; j++)
+ {
+ vertices[j].x = id % segmentNum;
+ vertices[j].y = (id / segmentNum) % segmentNum;
+ vertices[j].z = (id / (segmentNum * segmentNum)) % segmentNum;
+
+ indices[j] = j;
+ id++;
+ }
+
+ // Mesh作成
+ meshs[i] = new Mesh();
+ meshs[i].vertices = vertices;
+ // GeometryShaderでポリゴンを作るのでMeshTopologyはPointsで良い
+ meshs[i].SetIndices(indices, MeshTopology.Points, 0);
+ meshs[i].bounds = bounds;
+
+ materials[i] = new Material(mat);
+ }
+}
+
+MarchingCubesDefinces.cs というソースには、マーチングキューブス法のレンダリングで使う定数配列と、その定数配列をシェーダーに渡すためのComputeBufferが定義されています。ComputeBufferとは、シェーダーで使うデータを格納するバッファです。データはGPU側のメモリに置かれるのでシェーダーからのアクセスが早いです。
+実は、マーチングキューブス法のレンダリングで使う定数配列は、シェーダー側で定義することは可能です。しかし、何故シェーダーで使う定数配列を、C#側で初期化しているのかというと、シェーダーにはリテラル値(直書きした値)の個数が4096までしか登録出来ない制限があるためです。膨大な定数配列をシェーダー内に定義すると、あっという間にリテラル値の数の上限に到達してしまいます。
+そこで、ComputeShaderに格納して渡すことで、リテラル値ではなくなるので上限にひっかからなくなります。そのため、工程が少々増えてしまいますが、C#側でComputeBufferに定数配列を格納してシェーダーに渡すようにしています。
+リスト7.3: ComputeBufferの初期化部分
+void Initialize()
+{
+ vertexMax = segmentNum * segmentNum * segmentNum;
+
+ Debug.Log("VertexMax " + vertexMax);
+
+ // 1Cubeの大きさをsegmentNumで分割してレンダリング時の大きさを決める
+ renderScale = 1f / segmentNum;
+
+ CreateMesh();
+
+ // シェーダーで使うMarchingCubes用の定数配列の初期化
+ mcDefines = new MarchingCubesDefines();
+}
+
+先程のInitialize()関数の中で、MarchingCubesDefinesの初期化を行っています。
+ +次にレンダリング処理を呼び出す関数です。
今回は、複数のメッシュを一度にレンダリングするのと、Unityのライティングの影響を受けられるようにするため、Graphics.DrawMesh() を使います。public 変数で定義したDiffuseColor等の意味は、シェーダー側の解説で説明します。
前項の、MarchingCubesDefinesクラスのComputeBuffer達をmaterial.setBufferでシェーダーに渡しています。
+リスト7.4: レンダリング部分
+void RenderMesh()
+{
+ Vector3 halfSize = new Vector3(segmentNum, segmentNum, segmentNum)
+ * renderScale * 0.5f;
+ Matrix4x4 trs = Matrix4x4.TRS(
+ transform.position,
+ transform.rotation,
+ transform.localScale
+ );
+
+ for (int i = 0; i < meshs.Length; i++)
+ {
+ materials[i].SetPass(0);
+ materials[i].SetInt("_SegmentNum", segmentNum);
+ materials[i].SetFloat("_Scale", renderScale);
+ materials[i].SetFloat("_Threashold", threashold);
+ materials[i].SetFloat("_Metallic", metallic);
+ materials[i].SetFloat("_Glossiness", glossiness);
+ materials[i].SetFloat("_EmissionIntensity", EmissionIntensity);
+
+ materials[i].SetVector("_HalfSize", halfSize);
+ materials[i].SetColor("_DiffuseColor", DiffuseColor);
+ materials[i].SetColor("_EmissionColor", EmissionColor);
+ materials[i].SetMatrix("_Matrix", trs);
+
+ Graphics.DrawMesh(meshs[i], Matrix4x4.identity, materials[i], 0);
+ }
+}
+
+リスト7.5: 呼び出し部分
+// Use this for initialization
+void Start ()
+{
+ Initialize();
+}
+
+void Update()
+{
+ RenderMesh();
+}
+
+Start()でInitialize()を呼び出してメッシュを生成、Update()関数でRenderMesh()を呼び出してレンダリングします。
Update()でRenderMesh()を呼び出す理由は、Graphics.DrawMesh()が即座に描画するわけではなく、「レンダリング処理に一旦登録する」という感じのものだからです。
登録することで、Unityがライトやシャドウを適応してくれます。似たような関数にGraphics.DrawMeshNow()がありますが、こちらは即座に描画するのでUnityのライトやシャドウが適応されません。また、Update()ではなく、OnRenderObject()やOnPostRender()などで呼び出す必要があります。
今回のシェーダは、大きく分けて「実体のレンダリング部」と「影のレンダリング部」の2つに分かれます。さらに、それぞれの中で、頂点シェーダ、ジオメトリシェーダ、フラグメントシェーダの3つのシェーダ関数が実行されます。
+シェーダーのソースが長いので、実装全体はサンプルプロジェクトの方を見てもらうことにして、要所要所だけ解説します。解説するシェーダーのファイルは、GPUMarchingCubesRenderMesh.shaderです。
+ +シェーダーの上の方では、レンダリングで使う構造体の定義をしています。
+リスト7.6: 構造体の定義部分
+// メッシュから渡ってくる頂点データ
+struct appdata
+{
+ float4 vertex : POSITION; // 頂点座標
+};
+
+// 頂点シェーダからジオメトリシェーダに渡すデータ
+struct v2g
+{
+ float4 pos : SV_POSITION; // 頂点座標
+};
+
+// 実体レンダリング時のジオメトリシェーダからフラグメントシェーダに渡すデータ
+struct g2f_light
+{
+ float4 pos : SV_POSITION; // ローカル座標
+ float3 normal : NORMAL; // 法線
+ float4 worldPos : TEXCOORD0; // ワールド座標
+ half3 sh : TEXCOORD3; // SH
+};
+
+// 影のレンダリング時のジオメトリシェーダからフラグメントシェーダに渡すデータ
+struct g2f_shadow
+{
+ float4 pos : SV_POSITION; // 座標
+ float4 hpos : TEXCOORD1;
+};
+
+次に変数の定義をしています。
リスト7.7: 変数の定義部分
+int _SegmentNum; + +float _Scale; +float _Threashold; + +float4 _DiffuseColor; +float3 _HalfSize; +float4x4 _Matrix; + +float _EmissionIntensity; +half3 _EmissionColor; + +half _Glossiness; +half _Metallic; + +StructuredBuffer<float3> vertexOffset; +StructuredBuffer<int> cubeEdgeFlags; +StructuredBuffer<int2> edgeConnection; +StructuredBuffer<float3> edgeDirection; +StructuredBuffer<int> triangleConnectionTable; ++
ここで定義している各種変数の中身は、C#側のRenderMesh()関数の中で、material.Set○○関数で受け渡しています。MarchingCubesDefinesクラスのComputeBuffer達は、StructuredBuffer<○○>と型の呼び名が変わっています。
+ +ほとんどの処理はジオメトリシェーダの方で行うので、頂点シェーダは凄くシンプルです。単純にメッシュから渡される頂点データをそのままジオメトリシェーダに渡しているだけです。
+リスト7.8: 頂点シェーダの実装部分
+// メッシュから渡ってくる頂点データ
+struct appdata
+{
+ float4 vertex : POSITION; // 頂点座標
+};
+
+// 頂点シェーダからジオメトリシェーダに渡すデータ
+struct v2g
+{
+ float4 pos : SV_POSITION; // 座標
+};
+
+// 頂点シェーダ
+v2g vert(appdata v)
+{
+ v2g o = (v2g)0;
+ o.pos = v.vertex;
+ return o;
+}
+
+ちなみに、頂点シェーダは実体と影で共通です。
+ +長いので分割しながら説明します。
+リスト7.9: ジオメトリシェーダーの関数宣言部分
+// 実体のジオメトリシェーダ +[maxvertexcount(15)] // シェーダから出力する頂点の最大数の定義 +void geom_light(point v2g input[1], + inout TriangleStream<g2f_light> outStream) ++
まず、ジオメトリシェーダの宣言部です。
+[maxvertexcount(15)]はシェーダから出力する頂点の最大数の定義です。今回のマーチングキューブス法のアルゴリズムでは1グリッドにつき、三角ポリゴンが最大5つできるので、3*5で合計15個の頂点が出力されます。
そのため、maxvertexcountの()の中に15と記述します。
リスト7.10: グリッドの8つの角のスカラー値取得部分
+float cubeValue[8]; // グリッドの8つの角のスカラー値取得用の配列
+
+// グリッドの8つの角のスカラー値を取得
+for (i = 0; i < 8; i++) {
+ cubeValue[i] = Sample(
+ pos.x + vertexOffset[i].x,
+ pos.y + vertexOffset[i].y,
+ pos.z + vertexOffset[i].z
+ );
+}
+
+posは、メッシュを作成する時にグリッド空間に配置した頂点の座標が入っています。vertexOffsetは、名前の通りposに加えるオフセット座標の配列です。
+このループは、1頂点=1つのグリッドの8つの角の座標のボリュームデータ中のスカラー値を取得しています。vertexOffsetは、グリッドの角の順番を指しています。
+
++図7.4: グリッドの角の座標の順番 +
+リスト7.11: サンプリング関数部分
+// サンプリング関数
+float Sample(float x, float y, float z) {
+
+ // 座標がグリッド空間からはみ出してたいないか?
+ if ((x <= 1) ||
+ (y <= 1) ||
+ (z <= 1) ||
+ (x >= (_SegmentNum - 1)) ||
+ (y >= (_SegmentNum - 1)) ||
+ (z >= (_SegmentNum - 1))
+ )
+ return 0;
+
+ float3 size = float3(_SegmentNum, _SegmentNum, _SegmentNum);
+
+ float3 pos = float3(x, y, z) / size;
+
+ float3 spPos;
+ float result = 0;
+
+ // 3つの球の距離関数
+ for (int i = 0; i < 3; i++) {
+ float sp = -sphere(
+ pos - float3(0.5, 0.25 + 0.25 * i, 0.5),
+ 0.1 + (sin(_Time.y * 8.0 + i * 23.365) * 0.5 + 0.5) * 0.025) + 0.5;
+ result = smoothMax(result, sp, 14);
+ }
+
+ return result;
+}
+
+ボリュームデータから指定した座標のスカラー値を取ってくる関数です。今回は膨大な3Dボリュームデータではなく、距離関数を使ったシンプルなアルゴリズムでスカラー値を算出します。
+今回マーチングキューブス法で描画する3次元形状は、「距離関数」と言うものを使って定義します。
+ここでいう距離関数とは、ざっくり説明すると「距離の条件を満たす関数」です。
+例えば、球体の距離関数は、以下になります。
+リスト7.12: 球体の距離関数
+inline float sphere(float3 pos, float radius)
+{
+ return length(pos) - radius;
+}
+
+pos には、座標が入るのですが、球体の中心座標を原点(0,0,0)とした場合で考えます。radiusは半径です。
+length(pos)で長さを求めていますが、これは原点とposまでの距離で、それを半径radiusで引くので、半径以下の長さの場合、当たり前ですが負の値になります。
+つまり、座標posを渡して負の値が返ってきた場合は、「座標は球体の中にいる」という判定ができます。
+距離関数のメリットは、数行のシンプルな計算式で図形を表現できるので、プログラムが小さくしやすいところです。その他の距離関数についての情報は、Inigo Quilez氏のサイトでたくさん紹介されています。
+http://iquilezles.org/www/articles/distfunctions/distfunctions.htm
+リスト7.13: 3つの球の距離関数を合成したもの
+// 3つの球の距離関数
+for (int i = 0; i < 3; i++) {
+ float sp = -sphere(
+ pos - float3(0.5, 0.25 + 0.25 * i, 0.5),
+ 0.1 + (sin(_Time.y * 8.0 + i * 23.365) * 0.5 + 0.5) * 0.025) + 0.5;
+ result = smoothMax(result, sp, 14);
+}
+
+今回は、グリッドの1マスの8つの角(頂点)をposとして使っています。球体の中心からの距離を、そのままボリュームデータの濃度として扱います。
+後述しますが、閾値が0.5以上の時にポリゴン化するため、符号を反転しています。また、座標を微妙にずらして3つの球体との距離を求めています。
+リスト7.14: smoothMax関数
+float smoothMax(float d1, float d2, float k)
+{
+ float h = exp(k * d1) + exp(k * d2);
+ return log(h) / k;
+}
+
+smoothMaxは、距離関数の結果をいい感じにブレンドする関数です。これを使って3つの球体をメタボールのように融合させることが出来ます。
+リスト7.15: 閾値チェック
+// グリッドの8つの角の値が閾値を超えているかチェック
+for (i = 0; i < 8; i++) {
+ if (cubeValue[i] <= _Threashold) {
+ flagIndex |= (1 << i);
+ }
+}
+
+int edgeFlags = cubeEdgeFlags[flagIndex];
+
+// 空か完全に満たされている場合は何も描画しない
+if ((edgeFlags == 0) || (edgeFlags == 255)) {
+ return;
+}
+
+グリッドの角のスカラー値が閾値を越えていたら、flagIndexにビットを立てていきます。そのflagIndexをインデックスとして、cubeEdgeFlags配列からポリゴンを生成するための情報を取り出してedgeFlagsに格納しています。グリッドの全ての角が閾値未満か閾値以上の場合は、完全に中か外なのでポリゴンは生成しません。
+リスト7.16: ポリゴンの頂点座標計算
+float offset = 0.5;
+float3 vertex;
+for (i = 0; i < 12; i++) {
+ if ((edgeFlags & (1 << i)) != 0) {
+ // 角同士の閾値のオフセットを取得
+ offset = getOffset(
+ cubeValue[edgeConnection[i].x],
+ cubeValue[edgeConnection[i].y], _
+ Threashold
+ );
+
+ // オフセットを元に頂点の座標を補完
+ vertex = vertexOffset[edgeConnection[i].x]
+ + offset * edgeDirection[i];
+
+ edgeVertices[i].x = pos.x + vertex.x * _Scale;
+ edgeVertices[i].y = pos.y + vertex.y * _Scale;
+ edgeVertices[i].z = pos.z + vertex.z * _Scale;
+
+ // 法線計算(Sampleし直すため、スケールを掛ける前の頂点座標が必要)
+ edgeNormals[i] = getNormal(
+ defpos.x + vertex.x,
+ defpos.y + vertex.y,
+ defpos.z + vertex.z
+ );
+ }
+}
+
+ポリゴンの頂点座標を計算している箇所です。先程の、edgeFlagsのビットを見て、グリッドの辺上に置くポリゴンの頂点座標を計算しています。
+getOffsetは、グリッドの2つの角のスカラー値と閾値から、今の角から次の角までの割合(offset)を出しています。今の角の座標から、次の角の方向へoffset分ずらすことで、最終的になめらかなポリゴンになります。
+getNormalでは、サンプリングし直して勾配を出して法線を算出しています。
+リスト7.17: 頂点を連結してポリゴンを作る
+// 頂点を連結してポリゴンを作成
+int vindex = 0;
+int findex = 0;
+// 最大5つの三角形ができる
+for (i = 0; i < 5; i++) {
+ findex = flagIndex * 16 + 3 * i;
+ if (triangleConnectionTable[findex] < 0)
+ break;
+
+ // 三角形を作る
+ for (j = 0; j < 3; j++) {
+ vindex = triangleConnectionTable[findex + j];
+
+ // Transform行列を掛けてワールド座標に変換
+ float4 ppos = mul(_Matrix, float4(edgeVertices[vindex], 1));
+ o.pos = UnityObjectToClipPos(ppos);
+
+ float3 norm = UnityObjectToWorldNormal(
+ normalize(edgeNormals[vindex])
+ );
+ o.normal = normalize(mul(_Matrix, float4(norm,0)));
+
+ outStream.Append(o); // ストリップに頂点を追加
+ }
+ outStream.RestartStrip(); // 一旦区切って次のプリミティブストリップを開始
+}
+
+先程求めた頂点座標群を繋いでポリゴンを作っている箇所です。triangleConnectionTable配列に接続する頂点のインデックスが入っています。頂点座標にTransformの行列を掛けてワールド座標に変換し、UnityObjectToClipPos()でスクリーン座標に変換しています。
+また、UnityObjectToWorldNormal()で法線もワールド座標系に変換しています。これらの頂点と法線は、次のフラグメントシェーダでライティングに使います。
+TriangleStream.Append()やRestartStrip()は、ジオメトリシェーダ用の特殊な関数です。Append()は、現在のストリップに頂点データを追加します。RestartStrip()は、新しいストリップを作成します。TriangleStreamなので1つのストリップには3つまでAppendするイメージです。
+ +UnityのGI(グローバルイル・ミネーション)などのライティングを反映させるため、Generate code後のSurfaceShaderのライティング処理部分を移植します。
+リスト7.18: フラグメントシェーダの定義
+// 実体のフラグメントシェーダ +void frag_light(g2f_light IN, + out half4 outDiffuse : SV_Target0, + out half4 outSpecSmoothness : SV_Target1, + out half4 outNormal : SV_Target2, + out half4 outEmission : SV_Target3) ++
G-Bufferに出力するため出力(SV_Target)が4つあります。
+リスト7.19: SurfaceOutputStandard構造体の初期化
+#ifdef UNITY_COMPILER_HLSL + SurfaceOutputStandard o = (SurfaceOutputStandard)0; +#else + SurfaceOutputStandard o; +#endif + o.Albedo = _DiffuseColor.rgb; + o.Emission = _EmissionColor * _EmissionIntensity; + o.Metallic = _Metallic; + o.Smoothness = _Glossiness; + o.Alpha = 1.0; + o.Occlusion = 1.0; + o.Normal = normal; ++
あとで使うSurfaceOutputStandard構造体に、色や光沢感などのパラメータをセットします。
+リスト7.20: GI関係の処理
+// Setup lighting environment +UnityGI gi; +UNITY_INITIALIZE_OUTPUT(UnityGI, gi); +gi.indirect.diffuse = 0; +gi.indirect.specular = 0; +gi.light.color = 0; +gi.light.dir = half3(0, 1, 0); +gi.light.ndotl = LambertTerm(o.Normal, gi.light.dir); + +// Call GI (lightmaps/SH/reflections) lighting function +UnityGIInput giInput; +UNITY_INITIALIZE_OUTPUT(UnityGIInput, giInput); +giInput.light = gi.light; +giInput.worldPos = worldPos; +giInput.worldViewDir = worldViewDir; +giInput.atten = 1.0; + +giInput.ambient = IN.sh; + +giInput.probeHDR[0] = unity_SpecCube0_HDR; +giInput.probeHDR[1] = unity_SpecCube1_HDR; + +#if UNITY_SPECCUBE_BLENDING || UNITY_SPECCUBE_BOX_PROJECTION +// .w holds lerp value for blending +giInput.boxMin[0] = unity_SpecCube0_BoxMin; +#endif + +#if UNITY_SPECCUBE_BOX_PROJECTION +giInput.boxMax[0] = unity_SpecCube0_BoxMax; +giInput.probePosition[0] = unity_SpecCube0_ProbePosition; +giInput.boxMax[1] = unity_SpecCube1_BoxMax; +giInput.boxMin[1] = unity_SpecCube1_BoxMin; +giInput.probePosition[1] = unity_SpecCube1_ProbePosition; +#endif + +LightingStandard_GI(o, giInput, gi); ++
GI関係の処理です。UnityGIInputに初期値を入れて、LightnintStandard_GI()で計算したGIの結果をUnityGIに書き込んでいます。
+リスト7.21: 光の反射具合の計算
+// call lighting function to output g-buffer +outEmission = LightingStandard_Deferred(o, worldViewDir, gi, + outDiffuse, + outSpecSmoothness, + outNormal); +outDiffuse.a = 1.0; + +#ifndef UNITY_HDR_ON +outEmission.rgb = exp2(-outEmission.rgb); +#endif ++
諸々の計算結果を LightingStandard_Deferred() に渡して光の反射具合を計算して、Emissionバッファに書き込みます。HDRの場合は、expで圧縮される部分を挟んでから書き込みます。
+ +実体のジオメトリシェーダとほとんど同じです。違いがある所だけ解説します。
+リスト7.22: 影のジオメトリシェーダ
+int vindex = 0;
+int findex = 0;
+for (i = 0; i < 5; i++) {
+ findex = flagIndex * 16 + 3 * i;
+ if (triangleConnectionTable[findex] < 0)
+ break;
+
+ for (j = 0; j < 3; j++) {
+ vindex = triangleConnectionTable[findex + j];
+
+ float4 ppos = mul(_Matrix, float4(edgeVertices[vindex], 1));
+
+ float3 norm;
+ norm = UnityObjectToWorldNormal(normalize(edgeNormals[vindex]));
+
+ float4 lpos1 = mul(unity_WorldToObject, ppos);
+ o.pos = UnityClipSpaceShadowCasterPos(lpos1,
+ normalize(
+ mul(_Matrix,
+ float4(norm, 0)
+ )
+ )
+ );
+ o.pos = UnityApplyLinearShadowBias(o.pos);
+ o.hpos = o.pos;
+
+ outStream.Append(o);
+ }
+ outStream.RestartStrip();
+}
+
+UnityClipSpaceShadowCasterPos()とUnityApplyLinearShadowBias()で頂点座標を影の投影先の座標に変換します。
+ +リスト7.23: 影のフラグメントシェーダ
+// 影のフラグメントシェーダ
+fixed4 frag_shadow(g2f_shadow i) : SV_Target
+{
+ return i.hpos.z / i.hpos.w;
+}
+
+短すぎて説明するところがないです。実は return 0; でも正常に影が描画されます。Unityが中でいい感じにやってくれているんでしょうか?
+ +実行するとこんな感じの絵が出てくるはずです。
+
++図7.5: うねうね +
+また、距離関数を組み合わせるといろいろな形が作れます。
+
++図7.6: かいわれーい +
+今回は簡略化のために距離関数を使いましたが、他にも3Dテクスチャにボリュームデータを書き込んだものを使ったり、いろいろな三次元データを可視化するのにマーチングキューブス法は使えると思います。
ゲーム用途では、地形を掘ったり盛ったりできるASTORONEER*2のようなゲームも作れるかもしれません。
みなさんもマーチングキューブス法でいろいろな表現を模索してみてください!
http://iquilezles.org/www/articles/distfunctions/distfunctions.htm
+[*2] ASTRONEER http://store.steampowered.com/app/361420/ASTRONEER/?l=japanese
プロシージャルモデリング(Procedural Modeling)とは、ルールを利用して3Dモデルを構築するテクニックのことです。モデリングというと、一般的にはモデリングソフトであるBlenderや3ds Maxなどを利用して、頂点や線分を動かしつつ目標とする形を得るように手で操作をしていくことを指しますが、それとは対象的に、ルールを記述し、自動化された一連の処理の結果、形を得るアプローチのことをプロシージャルモデリングと呼びます。
+プロシージャルモデリングは様々な分野で応用されていて、例えばゲームでは、地形の生成や植物の造形、都市の構築などで利用されている例があり、この技術を用いることで、プレイするごとにステージ構造が変わるなどといったコンテンツデザインが可能になります。
+また、建築やプロダクトデザインの分野でも、Rhinoceros*1というCADソフトのプラグインであるGrasshopper*2を使って、プロシージャルに形状をデザインする手法が活発に利用されています。
+[*1] http://www.rhino3d.co.jp/
[*2] http://www.grasshopper3d.com/
プロシージャルモデリングを使えば以下のようなことが可能になります。
+パラメトリックな構造とは、あるパラメータに応じて構造が持つ要素を変形させられる構造のことで、例えば球(Sphere)のモデルであれば、大きさを表す半径(radius)と、球の滑らかさを表す分割数(segments)といったパラメータが定義でき、それらの値を変化させることで望むサイズや滑らかさを持つ球を得ることができます。
+パラメトリックな構造を定義するプログラムを一度実装してしまえば、様々な場面で特定の構造を持つモデルを欲しい形で得ることができ、便利です。
+ +前述の通り、ゲームなどの分野においては、地形や樹木の生成にプロシージャルモデリングが利用される例はとても多く、一度モデルとして書き出されたものを組み込むのではなく、コンテンツ内でリアルタイムに生成されることもあります。リアルタイムなコンテンツにプロシージャルモデリングのテクニックを利用すると、例えば太陽に向かって生える木を任意の位置に生成したり、クリックした位置からビルが立ち並んでいくように街を構築したりするようなことが実現できます。
+また、様々なパターンのモデルをコンテンツに組み込むとデータサイズが膨らんでしまいますが、プロシージャルモデリングを利用してモデルのバリエーションを増やせば、データサイズを抑えることができます。
+プロシージャルモデリングのテクニックを学び、プログラムによってモデルを構築していくことを極めていけば、モデリングツールそのものを自分で開発することも可能になるでしょう。
+ +Unityでは、モデルの形を表すジオメトリデータをMeshクラスによって管理します。
+モデルの形は3D空間に並べられた三角形から構成されていて、1つの三角形は3つの頂点により定義されます。モデルが持つ頂点と三角形データのMeshクラスでの管理方法について、Unityの公式ドキュメントで以下のように解説されています。
++Meshクラスでは、すべての頂点はひとつの配列に格納されていて、それぞれの三角形は頂点配列のインデックスにあたる3つの整数により指定されます。三角形はさらに1つの整数の配列として集められます。この整数は配列の最初から3つごとにグルーピングされるため、要素 0、1、2は最初の三角形を定義し、2つ目の三角形は3、4、5と続いていきます。*3
+
[*3] https://docs.unity3d.com/jp/540/Manual/AnatomyofaMesh.html
モデルには、それぞれの頂点に対応するように、テクスチャマッピングを行うために必要なテクスチャ上の座標を表すuv座標、ライティング時に光源の影響度を計算するために必要な法線ベクトル(normalとも呼ばれます)を含められます。
+ +本章ではhttps://github.com/IndieVisualLab/UnityGraphicsProgrammingリポジトリ内にあるAssets/ProceduralModeling以下をサンプルプログラムとして用意しています。
+C#スクリプトによるモデル生成が主な解説内容となるため、Assets/ProceduralModeling/Scripts以下にあるC#スクリプトを参照しつつ、解説を進めていきます。
+ +本章のサンプルコードはUnity5.0以上で動作することを確認しています。
+ +基本的なモデルであるQuadを例として、モデルをプログラムから構築する方法を解説していきます。Quadは4つの頂点からなる2枚の三角形を合わせた正方形モデルで、UnityではPrimitive Meshとしてデフォルトで提供されていますが、最も基本的な形状であるため、モデルの構造を理解するための例として役立ちます。
+
++図1.1: Quadモデルの構造 黒丸はモデルの頂点を表し、黒丸内の0〜3の数字は頂点のindexを示している 矢印は一枚の三角形を構築する頂点indexの指定順(右上は0,1,2の順番で指定された三角形、左下は2,3,0の順番で指定された三角形) +
+まずはMeshクラスのインスタンスを生成します。
+// Meshのインスタンスを生成 +var mesh = new Mesh(); ++
次にQuadの四隅に位置する4つの頂点を表すVector3配列を生成します。また、uv座標と法線のデータも4つの頂点それぞれに対応するように用意します。
+// Quadの横幅と縦幅がそれぞれsizeの長さになるように半分の長さを求める
+var hsize = size * 0.5f;
+
+// Quadの頂点データ
+var vertices = new Vector3[] {
+ new Vector3(-hsize, hsize, 0f), // 1つ目の頂点 Quadの左上の位置
+ new Vector3( hsize, hsize, 0f), // 2つ目の頂点 Quadの右上の位置
+ new Vector3( hsize, -hsize, 0f), // 3つ目の頂点 Quadの右下の位置
+ new Vector3(-hsize, -hsize, 0f) // 4つ目の頂点 Quadの左下の位置
+};
+
+// Quadのuv座標データ
+var uv = new Vector2[] {
+ new Vector2(0f, 0f), // 1つ目の頂点のuv座標
+ new Vector2(1f, 0f), // 2つ目の頂点のuv座標
+ new Vector2(1f, 1f), // 3つ目の頂点のuv座標
+ new Vector2(0f, 1f) // 4つ目の頂点のuv座標
+};
+
+// Quadの法線データ
+var normals = new Vector3[] {
+ new Vector3(0f, 0f, -1f), // 1つ目の頂点の法線
+ new Vector3(0f, 0f, -1f), // 2つ目の頂点の法線
+ new Vector3(0f, 0f, -1f), // 3つ目の頂点の法線
+ new Vector3(0f, 0f, -1f) // 4つ目の頂点の法線
+};
+
+次に、モデルの面を表す三角形データを生成します。三角形データは整数配列によって指定され、それぞれの整数は頂点配列のindexに対応しています。
+// Quadの面データ 頂点のindexを3つ並べて1つの面(三角形)として認識する
+var triangles = new int[] {
+ 0, 1, 2, // 1つ目の三角形
+ 2, 3, 0 // 2つ目の三角形
+};
+
+最後に生成したデータをMeshのインスタンスに設定していきます。
+mesh.vertices = vertices; +mesh.uv = uv; +mesh.normals = normals; +mesh.triangles = triangles; + +// Meshが占める境界領域を計算する(cullingに必要) +mesh.RecalculateBounds(); + +return mesh; ++
本章で利用するサンプルコードでは、ProceduralModelingBaseという基底クラスを利用しています。このクラスの継承クラスでは、モデルのパラメータ(例えば、Quadでは横幅と縦幅を表すsize)を変更するたびに新たなMeshインスタンスを生成し、MeshFilterに適用することで、変更結果をすぐさま確認することができます。(Editorスクリプトを利用してこの機能を実現しています。ProceduralModelingEditor.cs)
+また、ProceduralModelingMaterialというenum型のパラメータを変更することで、モデルのUV座標や法線方向を可視化することができます。
+
++図1.2: 左から、ProceduralModelingMaterial.Standard、ProceduralModelingMaterial.UV、ProceduralModelingMaterial.Normalが適用されたモデル +
+モデルの構造を理解できたところで、いくつかプリミティブな形状を作っていきましょう。
+ +PlaneはQuadをグリッド上に並べたような形をしています。
+
++図1.3: Planeモデル +
+グリッドの行数と列数を決め、それぞれのグリッドの交点に頂点を配置し、グリッドの各マスを埋めるようにQuadを構築し、それらをまとめることで1つのPlaneモデルを生成します。
+サンプルプログラムPlane.csでは、Planeの縦に並べる頂点の数heightSegments、横に並べる頂点の数widthSegmentsと、縦の長さheight、横の長さwidthのパラメータを用意しています。それぞれのパラメータは次の図のようにPlaneの形状に影響します。
+
++図1.4: Planeパラメータ +
+まずはグリッドの交点に配置する頂点データを生成していきます。
+var vertices = new List<Vector3>();
+var uv = new List<Vector2>();
+var normals = new List<Vector3>();
+
+// 頂点のグリッド上での位置の割合(0.0 ~ 1.0)を算出するための行列数の逆数
+var winv = 1f / (widthSegments - 1);
+var hinv = 1f / (heightSegments - 1);
+
+for(int y = 0; y < heightSegments; y++) {
+ // 行の位置の割合(0.0 ~ 1.0)
+ var ry = y * hinv;
+
+ for(int x = 0; x < widthSegments; x++) {
+ // 列の位置の割合(0.0 ~ 1.0)
+ var rx = x * winv;
+
+ vertices.Add(new Vector3(
+ (rx - 0.5f) * width,
+ 0f,
+ (0.5f - ry) * height
+ ));
+ uv.Add(new Vector2(rx, ry));
+ normals.Add(new Vector3(0f, 1f, 0f));
+ }
+}
+
+次に三角形データですが、各三角形に設定する頂点indexは行と列を辿るループの中で、下記のように参照します。
+var triangles = new List<int>();
+
+for(int y = 0; y < heightSegments - 1; y++) {
+ for(int x = 0; x < widthSegments - 1; x++) {
+ int index = y * widthSegments + x;
+ var a = index;
+ var b = index + 1;
+ var c = index + 1 + widthSegments;
+ var d = index + widthSegments;
+
+ triangles.Add(a);
+ triangles.Add(b);
+ triangles.Add(c);
+
+ triangles.Add(c);
+ triangles.Add(d);
+ triangles.Add(a);
+ }
+}
+
+Planeの各頂点の高さ(y座標)の値は0に設定していましたが、この高さを操作することで、単なる水平な面だけではなく、凸凹した地形や小高い山のような形を得ることができます。
+ParametricPlaneBaseクラスはPlaneクラスを継承しており、Meshを生成するBuild関数をoverrideしています。まずは元のPlaneモデルを生成し、各頂点のuv座標をインプットにして高さを求めるDepth(float u, float v)関数を、全ての頂点について呼び出し、高さを設定し直すことで柔軟に形を変形します。
+このParametricPlaneBaseクラスを継承したクラスを実装することで、頂点によって高さが変化するPlaneモデルを生成できます。
+ +protected override Mesh Build() {
+ // 元のPlaneモデルを生成
+ var mesh = base.Build();
+
+ // Planeモデルが持つ頂点の高さを再設定する
+ var vertices = mesh.vertices;
+
+ // 頂点のグリッド上での位置の割合(0.0 ~ 1.0)を算出するための行列数の逆数
+ var winv = 1f / (widthSegments - 1);
+ var hinv = 1f / (heightSegments - 1);
+
+ for(int y = 0; y < heightSegments; y++) {
+ // 行の位置の割合(0.0 ~ 1.0)
+ var ry = y * hinv;
+ for(int x = 0; x < widthSegments; x++) {
+ // 列の位置の割合(0.0 ~ 1.0)
+ var rx = x * winv;
+
+ int index = y * widthSegments + x;
+ vertices[index].y = Depth(rx, ry);
+ }
+ }
+
+ // 頂点位置の再設定
+ mesh.vertices = vertices;
+ mesh.RecalculateBounds();
+
+ // 法線方向を自動算出
+ mesh.RecalculateNormals();
+
+ return mesh;
+}
+
+サンプルシーンParametricPlane.sceneでは、このParametricPlaneBaseを継承したクラス(MountainPlane、TerrainPlaneクラス)を利用したGameObjectが配置してあります。それぞれのパラメータを変えながら、形が変化していく様子を確認してみてください。
+
++図1.5: ParametricPlane.scene 左がMountainPlaneクラス、右がTerrainPlaneクラスによって生成されたモデル +
+Cylinderは円筒型のモデルで、次の図のような形をしています。
+
++図1.6: Cylinderの構造 +
+円筒型の円のなめらかさはsegments、縦の長さと太さはそれぞれheightとradiusパラメータで制御することができます。上図の例のように、segmentsに7を指定するとCylinderは正7角形を縦に引き伸ばしたような形になり、segmentsの数値を大きくするほど円形に近づいていきます。
+ +Cylinderの頂点は、筒の端に位置する円の周りに沿って均等に並べる必要があります。
+円周に沿って均等に並ぶ頂点を配置するには、三角関数(Mathf.Sin, Mathf.Cos)を利用します。ここでは三角関数の詳細については割愛しますが、これらの関数を利用すると角度を元に円周上の位置を得ることができます。
+
++図1.7: 三角関数から円周上の点の位置を得る +
+この図のように角度θ(シータ)から半径radiusの円上に位置する点は、(x, y) = (Mathf.Cos(θ) * radius, Mathf.Sin(θ) * radius)で取得することができます。
+これを元に、半径radiusの円周上に均等に並べられたsegments個の頂点位置を得るには以下のような処理を行います。
+for (int i = 0; i < segments; i++) {
+ // 0.0 ~ 1.0
+ float ratio = (float)i / (segments - 1);
+
+ // [0.0 ~ 1.0]を[0.0 ~ 2π]に変換
+ float rad = ratio * PI2;
+
+ // 円周上の位置を得る
+ float cos = Mathf.Cos(rad), sin = Mathf.Sin(rad);
+ float x = cos * radius, y = sin * radius;
+}
+
+Cylinderのモデリングでは、円筒の端に位置する円周に沿って均等に頂点を配置し、それらの頂点をつなぎ合わせて側面を形作ります。側面の1つ1つはQuadを構築するのと同じように、上端と下端から対応する頂点を2つずつ取り出して三角形を向かい合わせて配置し、1つの側面、つまり四角形を構築します。Cylinderの側面は、Quadが円形に沿って配置されているものだとイメージできます。
+
++図1.8: Cylinderの側面のモデリング 黒丸は端に位置する円周に沿って均等に配置された頂点 頂点内のa〜dはCylinder.csプログラム内で三角形を構築する際に頂点に割り振られるindex変数 +
+まずは側面を構築していきますが、Cylinderクラスでは上端と下端に位置する円周に並べられた頂点のデータを生成するための関数GenerateCapを用意しています。
+var vertices = new List<Vector3>();
+var normals = new List<Vector3>();
+var uvs = new List<Vector2>();
+var triangles = new List<int>();
+
+// 上端の高さと、下端の高さ
+float top = height * 0.5f, bottom = -height * 0.5f;
+
+// 側面を構成する頂点データを生成
+GenerateCap(segments + 1, top, bottom, radius, vertices, uvs, normals, true);
+
+// 側面の三角形を構築する際、円上の頂点を参照するために、
+// indexが円を一周するための除数
+var len = (segments + 1) * 2;
+
+// 上端と下端をつなぎ合わせて側面を構築
+for (int i = 0; i < segments + 1; i++) {
+ int idx = i * 2;
+ int a = idx, b = idx + 1, c = (idx + 2) % len, d = (idx + 3) % len;
+ triangles.Add(a);
+ triangles.Add(c);
+ triangles.Add(b);
+
+ triangles.Add(d);
+ triangles.Add(b);
+ triangles.Add(c);
+}
+
+GenerateCap関数では、List型で渡された変数に頂点や法線データを設定します。
+void GenerateCap(
+ int segments,
+ float top,
+ float bottom,
+ float radius,
+ List<Vector3> vertices,
+ List<Vector2> uvs,
+ List<Vector3> normals,
+ bool side
+) {
+ for (int i = 0; i < segments; i++) {
+ // 0.0 ~ 1.0
+ float ratio = (float)i / (segments - 1);
+
+ // 0.0 ~ 2π
+ float rad = ratio * PI2;
+
+ // 円周に沿って上端と下端に均等に頂点を配置する
+ float cos = Mathf.Cos(rad), sin = Mathf.Sin(rad);
+ float x = cos * radius, z = sin * radius;
+ Vector3 tp = new Vector3(x, top, z), bp = new Vector3(x, bottom, z);
+
+ // 上端
+ vertices.Add(tp);
+ uvs.Add(new Vector2(ratio, 1f));
+
+ // 下端
+ vertices.Add(bp);
+ uvs.Add(new Vector2(ratio, 0f));
+
+ if(side) {
+ // 側面の外側を向く法線
+ var normal = new Vector3(cos, 0f, sin);
+ normals.Add(normal);
+ normals.Add(normal);
+ } else {
+ normals.Add(new Vector3(0f, 1f, 0f)); // 蓋の上を向く法線
+ normals.Add(new Vector3(0f, -1f, 0f)); // 蓋の下を向く法線
+ }
+ }
+}
+
+Cylinderクラスでは、上端と下端を閉じたモデルにするかどうかをopenEndedフラグで設定することができます。上端と下端を閉じる場合は、円形の「蓋」を形作り、端に栓をします。
+蓋の面を構成する頂点は、側面を構成している頂点を利用せずに、側面と同じ位置に別途新しく頂点を生成します。これは、側面と蓋の部分とで法線を分け、自然なライティングを施すためです。(側面の頂点データを構築する場合はGenerateCapの引数のside変数にtrueを、蓋を構築する場合はfalseを指定し、適切な法線方向が設定されるようにしています。)
+もし、側面と蓋の部分で同じ頂点を共有してしまうと、側面と蓋面で同じ法線を参照することになってしまうので、ライティングが不自然になってしまいます。
+
++図1.9: Cylinderの側面と蓋の頂点を共有した場合(左:BadCylinder.cs)と、サンプルプログラムのように別の頂点を用意した場合(右:Cylinder.cs) 左はライティングが不自然になっている +
+円形の蓋をモデリングするには、(GenerateCap関数から生成される)円周上に均等に並べられた頂点と、円の真ん中に位置する頂点を用意し、真ん中の頂点から円周に沿った頂点をつなぎ合わせて、均等に分けられたピザのように三角形を構築することで円形の蓋を形作ります。
+
++図1.10: Cylinderの蓋のモデリング segmentsパラメータが6の場合の例 +
+// 上端と下端の蓋を生成
+if(openEnded) {
+ // 蓋のモデルのための頂点は、ライティング時に異なった法線を利用するために、側面とは共有せずに新しく追加する
+ GenerateCap(
+ segments + 1,
+ top,
+ bottom,
+ radius,
+ vertices,
+ uvs,
+ normals,
+ false
+ );
+
+ // 上端の蓋の真ん中の頂点
+ vertices.Add(new Vector3(0f, top, 0f));
+ uvs.Add(new Vector2(0.5f, 1f));
+ normals.Add(new Vector3(0f, 1f, 0f));
+
+ // 下端の蓋の真ん中の頂点
+ vertices.Add(new Vector3(0f, bottom, 0f)); // bottom
+ uvs.Add(new Vector2(0.5f, 0f));
+ normals.Add(new Vector3(0f, -1f, 0f));
+
+ var it = vertices.Count - 2;
+ var ib = vertices.Count - 1;
+
+ // 側面の分の頂点indexを参照しないようにするためのoffset
+ var offset = len;
+
+ // 上端の蓋の面
+ for (int i = 0; i < len; i += 2) {
+ triangles.Add(it);
+ triangles.Add((i + 2) % len + offset);
+ triangles.Add(i + offset);
+ }
+
+ // 下端の蓋の面
+ for (int i = 1; i < len; i += 2) {
+ triangles.Add(ib);
+ triangles.Add(i + offset);
+ triangles.Add((i + 2) % len + offset);
+ }
+}
+
+Tubularは筒型のモデルで、次の図のような形をしています。
+
++図1.11: Tubularモデル +
+Cylinderモデルはまっすぐに伸びる円筒形状ですが、Tubularは曲線に沿ったねじれのない筒型をしています。後述する樹木モデルの例では、一本の枝をTubularで表現し、その組み合わせで一本の木を構築する手法を採用しているのですが、滑らかに曲がる筒型が必要な場面でTubularは活躍します。
+ +筒型モデルの構造は次の図のようになっています。
+
++図1.12: 筒型の構造 Tubularが沿う曲線を分割する点を球で、側面を構成する節を六角形で可視化している +
+曲線を分割し、分割点によって区切られた節ごとに側面を構築していき、それらを組み合わせることで1つのTubularモデルを生成します。
+1つ1つの節の側面はCylinderの側面と同じように、側面の上端と下端の頂点を円形に沿って均等に配置し、それらをつなぎ合わせて構築するため、Cylinderを曲線に沿って連結したものがTubular型だと考えることができます。
+ +サンプルプログラムでは、曲線を表す基底クラスCurveBaseを用意しています。3次元空間上の曲線の描き方については、様々なアルゴリズムが考案されており、用途に応じて使いやすい手法を選択する必要があります。サンプルプログラムでは、CurveBaseクラスを継承したクラスCatmullRomCurveを利用しています。
+ここでは詳細は割愛しますが、CatmullRomCurveは渡された制御点全てを通るように点と点の間を補間しつつ曲線を形作るという特徴があり、曲線に経由させたい点を指定できるため、使い勝手の良さに定評があります。
+曲線を表すCurveBaseクラスでは、曲線上の点の位置と傾き(tangentベクトル)を得るためにGetPointAt(float)・GetTangentAt(float)関数を用意しており、引数に[0.0 ~ 1.0]の値を指定することで、始点(0.0)から終点(1.0)の間にある点の位置と傾きを取得できます。
+ +曲線に沿ったねじれのない筒型を作るには、曲線に沿ってなめらかに変化する3つの直交するベクトル「接線(tangent)ベクトル、法線(normal)ベクトル、従法線(binormal)ベクトル」の配列が必要となります。接線ベクトルは、曲線上の一点における傾きを表す単位ベクトルのことで、法線ベクトルと従法線ベクトルはお互いに直交するベクトルとして求めます。
+これらの直交するベクトルによって、曲線上のある一点において「曲線に直交する円周上の座標」を得ることができます。
+
++図1.13: 法線(normal)と従法線(binormal)から、円周上の座標を指す単位ベクトル(v)を求める この単位ベクトル(v)に半径radiusを乗算することで、曲線に直交する半径radiusの円周上の座標を得ることができる +
+この曲線上のある一点における3つの直交するベクトルの組のことをFrenet frame(フレネフレーム)と呼びます。
+
++図1.14: Tubularを構成するFrenet frame配列の可視化 枠が1つのFrenet frameを表し、3つの矢印は接線(tangent)ベクトル、法線(normal)ベクトル、従法線(binormal)ベクトルを示している +
+Tubularのモデリングは、このFrenet frameから得られた法線と従法線を元に節ごとの頂点データを求め、それらをつなぎ合わせていくという手順で行います。
+サンプルプログラムでは、CurveBaseクラスがこのFrenet frame配列を生成するための関数ComputeFrenetFramesを持っています。
+ +Tubularクラスは曲線を表すCatmullRomCurveクラスを持ち、このCatmullRomCurveが描く曲線に沿って筒型を形成します。
+CatmullRomCurveクラスは4つ以上の制御点が必要で、制御点を操作すると曲線の形状が変化し、それに伴ってTubularモデルの形状も変化していきます。
+var vertices = new List<Vector3>();
+var normals = new List<Vector3>();
+var tangents = new List<Vector4>();
+var uvs = new List<Vector2>();
+var triangles = new List<int>();
+
+// 曲線からFrenet frameを取得
+var frames = curve.ComputeFrenetFrames(tubularSegments, closed);
+
+// Tubularの頂点データを生成
+for(int i = 0; i < tubularSegments; i++) {
+ GenerateSegment(curve, frames, vertices, normals, tangents, i);
+}
+// 閉じた筒型を生成する場合は曲線の始点に最後の頂点を配置し、閉じない場合は曲線の終点に配置する
+GenerateSegment(
+ curve,
+ frames,
+ vertices,
+ normals,
+ tangents,
+ (!closed) ? tubularSegments : 0
+);
+
+// 曲線の始点から終点に向かってuv座標を設定していく
+for (int i = 0; i <= tubularSegments; i++) {
+ for (int j = 0; j <= radialSegments; j++) {
+ float u = 1f * j / radialSegments;
+ float v = 1f * i / tubularSegments;
+ uvs.Add(new Vector2(u, v));
+ }
+}
+
+// 側面を構築
+for (int j = 1; j <= tubularSegments; j++) {
+ for (int i = 1; i <= radialSegments; i++) {
+ int a = (radialSegments + 1) * (j - 1) + (i - 1);
+ int b = (radialSegments + 1) * j + (i - 1);
+ int c = (radialSegments + 1) * j + i;
+ int d = (radialSegments + 1) * (j - 1) + i;
+
+ triangles.Add(a); triangles.Add(d); triangles.Add(b);
+ triangles.Add(b); triangles.Add(d); triangles.Add(c);
+ }
+}
+
+var mesh = new Mesh();
+mesh.vertices = vertices.ToArray();
+mesh.normals = normals.ToArray();
+mesh.tangents = tangents.ToArray();
+mesh.uv = uvs.ToArray();
+mesh.triangles = triangles.ToArray();
+
+関数GenerateSegmentは先述したFrenet frameから取り出した法線と従法線を元に、指定された節の頂点データを計算し、List型で渡された変数に設定します。
+void GenerateSegment(
+ CurveBase curve,
+ List<FrenetFrame> frames,
+ List<Vector3> vertices,
+ List<Vector3> normals,
+ List<Vector4> tangents,
+ int index
+) {
+ // 0.0 ~ 1.0
+ var u = 1f * index / tubularSegments;
+
+ var p = curve.GetPointAt(u);
+ var fr = frames[index];
+
+ var N = fr.Normal;
+ var B = fr.Binormal;
+
+ for(int j = 0; j <= radialSegments; j++) {
+ // 0.0 ~ 2π
+ float rad = 1f * j / radialSegments * PI2;
+
+ // 円周に沿って均等に頂点を配置する
+ float cos = Mathf.Cos(rad), sin = Mathf.Sin(rad);
+ var v = (cos * N + sin * B).normalized;
+ vertices.Add(p + radius * v);
+ normals.Add(v);
+
+ var tangent = fr.Tangent;
+ tangents.Add(new Vector4(tangent.x, tangent.y, tangent.z, 0f));
+ }
+}
+
+この節では、これまで説明したProceduralModelingのテクニックを使って、より複雑なモデルを生成する手法について紹介します。
+ +植物のモデリングは、ProceduralModelingのテクニックの応用例としてよく取り上げられています。Unity内でも樹木をEditor内でモデリングするためのTree API*4が用意されていますし、Speed Tree*5という植物のモデリング専用のソフトが存在します。
+[*4] https://docs.unity3d.com/ja/540/Manual/tree-FirstTree.html
[*5] http://www.speedtree.com/
この節では、植物の中でも比較的モデリング手法が単純な樹木のモデリングについて取り上げます。
+ +植物の構造を記述・表現できるアルゴリズムとしてL-Systemがあります。L-Systemは植物学者であるAristid Lindenmayerによって1968年に提唱されたもので、L-SystemのLは彼の名前から来ています。
+L-Systemを用いると、植物の形状に見られる自己相似性を表現することができます。
+自己相似性とは、物体の細部の形を拡大してみると、大きなスケールで見たその物体の形と一致することで、例えば樹木の枝分かれを観察すると、幹に近い部分の枝の分かれ方と、先端に近い部分の枝の分かれ方に相似性があります。
+
++図1.15: それぞれの枝が30度ずつの変化で枝分かれした図形 根元の部分と枝先の部分で相似になっていることがわかるが、このようなシンプルな図形でも樹木のような形に見える(サンプルプログラム LSystem.scene) +
+L-Systemは、要素を記号で表し、記号を置き換える規則を定め、記号に対して規則を繰り返し適用していくことで、記号の列を複雑に発展させていくメカニズムを提供します。
+例えば簡単な例をあげると、
+を
+に従って書き換えていくと、
+a -> ab -> aba -> abaab -> abaababa -> ...
+という風にステップを経るごとに複雑な結果を生み出します。
+このL-Systemをグラフィック生成に利用した例がサンプルプログラムのLSystemクラスです。
+LSystemクラスでは、以下の操作
+を用意しており、
+を
+に従って、決まられた回数だけ規則の適用を繰り返しています。
+その結果、サンプルのLSystem.sceneに示すような、自己相似性を持つ図を描くことができます。このL-Systemの持つ「状態を再帰的に書き換えていく」という性質が自己相似性を生み出すのです。自己相似性はFractal(フラクタル)とも呼ばれ、1つの研究分野にもなっています。
+ +実際にL-Systemを樹木のモデルを生成するプログラムに応用した例として、ProceduralTreeというクラスを用意しました。
+ProceduralTreeでは、前項で解説したLSystemクラスと同様に「枝を進めては分岐し、さらに枝を進める」というルーチンを再帰的に呼び出すことで木の形を生成していきます。
+前項のLSystemクラスでは、枝の分岐に関しては「一定角度、左と右の二方向に分岐する」という単純なルールでしたが、ProceduralTreeでは乱数を用い、分岐する数や分岐方向にランダム性を持たせ、枝が複雑に分岐するようなルールを設定しています。
+
++図1.16: ProceduralTree.scene +
+TreeDataクラスは枝の分岐具合を定めるパラメータや、木のサイズ感やモデルのメッシュの細かさを決めるパラメータを内包したクラスです。このクラスのインスタンスのパラメータを調整することで、木の形をデザインすることができます。
+ +TreeDataクラス内のいくつかのパラメータを用いて枝の分かれ具合を調整します。
+ +1つの枝から分岐する枝の数はbranchesMin・branchesMaxパラメータで調整します。branchesMinが分岐する枝の最小数、branchesMaxが分岐する枝の最大数を表しており、branchesMinからbranchesMaxの間の数をランダムに選び、分岐する数を決めます。
+ +分岐する枝が生える方向はgrowthAngleMin・growthAngleMaxパラメータで調整します。growthAngleMinは分岐する方向の最小角度、growthAngleMaxが最大角度を表しており、growthAngleMinからgrowthAngleMaxの間の数をランダムに選び、分岐する方向を決めます。
+それぞれの枝は伸びる方向を表すtangentベクトルと、それと直交するベクトルとしてnormalベクトルとbinormalベクトルを持ちます。
+growthAngleMin・growAngleMaxパラメータからランダムに得られた値は、分岐点から伸びる方向のtangentベクトルに対して、normalベクトルの方向とbinormalベクトルの方向に回転が加えられます。
+分岐点から伸びる方向tangentベクトルに対してランダムな回転を加えることで、分岐先の枝が生える方向を変化させ、枝分かれを複雑に変化させます。
+
++図1.17: 分岐点から伸びる方向に対してかけられるランダムな回転 分岐点でのTの矢印は伸びる方向(tangentベクトル)、Nの矢印は法線(normalベクトル)、Bの矢印は従法線(binormalベクトル)を表し、伸びる方向に対して法線と従法線の方向にランダムな回転がかけられる +
+枝が生える方向にランダムにかけられる回転の角度が枝先にいくほど大きくなるようにgrowthAngleScaleパラメータを用意しています。このgrowthAngleScaleパラメータは、枝のインスタンスが持つ世代を表すgenerationパラメータが0に近づくほど、つまり枝先に近づくほど、回転する角度に強く影響し、回転の角度を大きくします。
+// 枝先ほど分岐する角度が大きくなる +var scale = Mathf.Lerp( + 1f, + data.growthAngleScale, + 1f - 1f * generation / generations +); + +// normal方向の回転 +var qn = Quaternion.AngleAxis(scale * data.GetRandomGrowthAngle(), normal); + +// binormal方向の回転 +var qb = Quaternion.AngleAxis(scale * data.GetRandomGrowthAngle(), binormal); + +// 枝先が向いているtangent方向にqn * qbの回転をかけつつ、枝先の位置を決める +this.to = from + (qn * qb) * tangent * length; ++
枝はTreeBranchクラスで表現されます。
+世代数(generations)と基本となる長さ(length)と太さ(radius)のパラメータに加えて、分岐パターンを設定するためのTreeDataを引数に指定してコンストラクタを呼び出すと、内部で再帰的にTreeBranchのインスタンスが生成されていきます。
+1つのTreeBranchから分岐したTreeBranchは、元のTreeBranch内にあるList<TreeBranch>型であるchildren変数に格納され、根元のTreeBranchから全ての枝に辿れるようにしています。
+ +一本の枝のモデルは、Tubular同様、一本の曲線を分割し、分割された節を1つのCylinderとしてモデル化し、それらをつなぎ合わせていくように構築していきます。
+TreeSegmentクラスは一本の曲線を分割する節(Segment)を表現するクラスです。
+public class TreeSegment {
+ public FrenetFrame Frame { get { return frame; } }
+ public Vector3 Position { get { return position; } }
+ public float Radius { get { return radius; } }
+
+ // TreeSegmentが向いている方向ベクトルtangent、
+ // それと直交するベクトルnormal、binormalを持つFrenetFrame
+ FrenetFrame frame;
+
+ // TreeSegmentの位置
+ Vector3 position;
+
+ // TreeSegmentの幅(半径)
+ float radius;
+
+ public TreeSegment(FrenetFrame frame, Vector3 position, float radius) {
+ this.frame = frame;
+ this.position = position;
+ this.radius = radius;
+ }
+}
+
+1つのTreeSegmentは節が向いている方向のベクトルと直交ベクトルがセットになったFrenetFrame、位置と幅を表す変数を持ち、Cylinderを構築する際の上端と下端に必要な情報を保持します。
+ +ProceduralTreeのモデル生成ロジックはTubularを応用したもので、一本の枝TreeBranchが持つTreeSegmentの配列からTubularモデルを生成し、それらを1つのモデルに集約することで全体の一本の木を形作る、というアプローチでモデリングしています。
+var root = new TreeBranch(
+ generations,
+ length,
+ radius,
+ data
+);
+
+var vertices = new List<Vector3>();
+var normals = new List<Vector3>();
+var tangents = new List<Vector4>();
+var uvs = new List<Vector2>();
+var triangles = new List<int>();
+
+// 木の全長を取得
+// 枝の長さを全長で割ることで、uv座標の高さ(uv.y)が
+// 根元から枝先に至るまで[0.0 ~ 1.0]で変化するように設定する
+float maxLength = TraverseMaxLength(root);
+
+// 再帰的に全ての枝を辿り、1つ1つの枝に対応するMeshを生成する
+Traverse(root, (branch) => {
+ var offset = vertices.Count;
+
+ var vOffset = branch.Offset / maxLength;
+ var vLength = branch.Length / maxLength;
+
+ // 一本の枝から頂点データを生成する
+ for(int i = 0, n = branch.Segments.Count; i < n; i++) {
+ var t = 1f * i / (n - 1);
+ var v = vOffset + vLength * t;
+
+ var segment = branch.Segments[i];
+ var N = segment.Frame.Normal;
+ var B = segment.Frame.Binormal;
+ for(int j = 0; j <= data.radialSegments; j++) {
+ // 0.0 ~ 2π
+ var u = 1f * j / data.radialSegments;
+ float rad = u * PI2;
+
+ float cos = Mathf.Cos(rad), sin = Mathf.Sin(rad);
+ var normal = (cos * N + sin * B).normalized;
+ vertices.Add(segment.Position + segment.Radius * normal);
+ normals.Add(normal);
+
+ var tangent = segment.Frame.Tangent;
+ tangents.Add(new Vector4(tangent.x, tangent.y, tangent.z, 0f));
+
+ uvs.Add(new Vector2(u, v));
+ }
+ }
+
+ // 一本の枝の三角形を構築する
+ for (int j = 1; j <= data.heightSegments; j++) {
+ for (int i = 1; i <= data.radialSegments; i++) {
+ int a = (data.radialSegments + 1) * (j - 1) + (i - 1);
+ int b = (data.radialSegments + 1) * j + (i - 1);
+ int c = (data.radialSegments + 1) * j + i;
+ int d = (data.radialSegments + 1) * (j - 1) + i;
+
+ a += offset;
+ b += offset;
+ c += offset;
+ d += offset;
+
+ triangles.Add(a); triangles.Add(d); triangles.Add(b);
+ triangles.Add(b); triangles.Add(d); triangles.Add(c);
+ }
+ }
+});
+
+var mesh = new Mesh();
+mesh.vertices = vertices.ToArray();
+mesh.normals = normals.ToArray();
+mesh.tangents = tangents.ToArray();
+mesh.uv = uvs.ToArray();
+mesh.triangles = triangles.ToArray();
+mesh.RecalculateBounds();
+
+植物のプロシージャルモデリングは樹木だけでも奥深く、日光の照射率が高くなるように枝分かれすることで自然な木のモデルを得るようにする、といった手法などが考案されています。
+こうした植物のモデリングに興味がある方はL-Systemを考案したAristid Lindenmayerにより執筆されたThe Algorithmic Beauty of Plants*6に様々な手法が紹介されていますので、参考にしてみてください。
+[*6] http://algorithmicbotany.org/papers/#abop
これまで紹介したプロシージャルモデリングの例から、「モデルをパラメータによって変化させながら動的に生成できる」というテクニックの利点を知ることができました。効率的に様々なバリエーションのモデルを作成できるため、コンテンツ開発の効率化のための技術という印象を受けるかもしれません。
+しかし、世の中にあるモデリングツールやスカルプトツールのように、プロシージャルモデリングのテクニックは「ユーザの入力に応じて、インタラクティブにモデルを生成する」という応用も可能です。
+応用例として、東京大学大学院情報工学科の五十嵐健夫氏により考案された、手書きスケッチによる輪郭線から立体モデルを生成する技術「Teddy」についてご紹介します。
+
++図1.18: 手書きスケッチによる3次元モデリングを行う技術「Teddy」のUnityアセット http://uniteddy.info/ja +
+2002年にプレイステーション2用のソフトとして発売された「ガラクタ名作劇場 ラクガキ王国」*7というゲームでは実際にこの技術が用いられ、「自分の描いた絵を3D化してゲーム内のキャラクターとして動かす」という応用が実現されています。
+[*7] https://ja.wikipedia.org/wiki/ラクガキ王国
この技術では、
+[*8] https://en.wikipedia.org/wiki/Delaunay_triangulation
という手順で3次元モデルを生成しています。アルゴリズムの詳細に関してはコンピュータグラフィクスを扱う国際会議SIGGRAPHにて発表された論文が公開されています。*9
+[*9] http://www-ui.is.s.u-tokyo.ac.jp/~takeo/papers/siggraph99.pdf
TeddyはUnityに移植されたバージョンがAsset Storeに公開されているので、誰でもコンテンツにこの技術を組み込むことができます。*10
+[*10] http://uniteddy.info/ja/
このようにプロシージャルモデリングのテクニックを用いれば、独自のモデリングツールを開発することができ、ユーザの創作によって発展していくようなコンテンツを作ることも可能になります。
+ +プロシージャルモデリングのテクニックを使えば、
+が実現できることを見てきました。
+Unity自体はゲームエンジンであるため、本章で紹介した例からはゲームや映像コンテンツ内での応用を想像されるでしょう。
+しかし、コンピュータグラフィックスの技術自体の応用範囲が広いように、モデルを生成する技術の応用範囲も広いものだと考えることができます。冒頭でも述べましたが、建築やプロダクトデザインの分野でもプロシージャルモデリングの手法が利用されていますし、3Dプリンタ技術などのデジタルファブリケーションの発展にともなって、デザインした形を実生活で利用できる機会が個人レベルでも増えてきています。
+このように、どのような分野でデザインした形を利用するかを広い視野で考えると、プロシージャルモデリングのテクニックを応用できる場面が様々なところから見つかるかもしれません。
+ +本書は主にUnityによるグラフィクスプログラミングに関する技術を解説する本です。グラフィクスプログラミングと一言で言っても幅広く、Shaderテクニックだけ取り上げても多くの書籍が出版されています。本書でも、執筆者たちの興味の赴くままに取り上げられた様々なトピックについての記事が掲載されていますが、ビジュアルとしての結果が見えやすく、自分のエフェクト作成に役立てやすい内容が多いはずです。また、各章で解説されているソースコードについてはhttps://github.com/IndieVisualLab/UnityGraphicsProgrammingにて公開していますので、手元で実行しながら本書を読み進めることができます。
+記事によって難易度は様々で、読者の知識量によっては、物足りなかったり、難しすぎる内容のものがあるかと思います。自分の知識量に応じて、気になったトピックの記事を読むのが良いでしょう。普段仕事でグラフィクスプログラミングを行っている人にとって、エフェクトの引き出しを増やすことにつながれば幸いですし、学生の方でビジュアルコーディングに興味があり、ProcessingやopenFrameworksなどは触ったことはあるが、まだまだ3DCGに高い敷居を感じている方にとっては、Unityを導入として3DCGでの表現力の高さや開発の取っ掛かりを知る機会になれば嬉しいです。
+IndieVisualLabは、会社の同僚(&元同僚)たちによって立ち上げられたサークルです。社内ではUnityを使って、一般的にメディアアートと呼ばれる部類の展示作品のコンテンツプログラミングをやっており、ゲーム系とはまた一味違ったUnityの活用をしています。本書の中にも節々に展示作品の中でUnityを活用する際に役立つ知識が散りばめられているかもしれません。
+ +本書についての感想や気になった点、その他要望(〇〇についての解説が読みたい等)がありましたら、ぜひWebフォーム(https://docs.google.com/forms/d/e/1FAIpQLSdxeansJvQGTWfZTBN_2RTuCK_kRqhA6QHTZKVXHCijQnC8zw/viewform)、またはメール(lab.indievisual@gmail.com)よりお知らせください。
+ + diff --git a/articles/RoomProjection.html b/articles/RoomProjection.html new file mode 100644 index 0000000..4589eff --- /dev/null +++ b/articles/RoomProjection.html @@ -0,0 +1,236 @@ + + + + + + + +本章では直方体の形をした部屋の壁面や床面など複数の面にプロジェクターで映像を投影しCG世界の中にいるような体験ができる映像投影方法を紹介します。また、そのバックグラウンドとしてCGにおけるカメラの処理とその応用例について解説します。サンプルプロジェクトは、UnityGraphicsProgrammingのUnityプロジェクト*1内のAssets/RoomProjectionにありますのでよかったらご覧ください。また、本内容は「数学セミナー 2016年12月号」*2に 寄稿した内容を元に大幅に加筆修正を行ったものになります。
+[*1] サンプルプロジェクト https://github.com/IndieVisualLab/UnityGraphicsProgramming
[*2] https://www.nippyo.co.jp/shop/magazine/7292.html
一般的なCGにおけるカメラ処理とは、透視投影変換を用いて見えている範囲の3Dモデルを2次元画像へ射影する処理を行います。透視投影変換は、各モデルの中心を原点に持つローカル座標系、CG世界の一意に決めた場所を原点にするワールド座標系、カメラを中心としたビュー座標系、クリッピング用のクリップ座標系(これはwも意味を持つ4次元の座標系で、3次元化したものをNDC (Normalized Device Coordinates,正規化デバイス座標系)と呼びます)、出力画面の2次元の位置を表すスクリーン座標系、という順番で頂点の座標を射影していきます。
+
++図9.1: 座標変換の流れ +
+また、これらの変換はそれぞれ1つの行列で表すことができるのであらかじめ行列同士を乗算しておくことで、いくつかの座標変換を行列とベクトルの乗算1回で済ませる方法もよく行われています。
+ +CGにおけるカメラでは、頭頂点をカメラの位置に、底面をカメラの向きに合わせた四角錐を視錐台と呼び、カメラの射影を表す3Dボリュームとして図示できます。
+
++図9.2: 視錐台 +
+2つのカメラの視錐台が頭頂点を共有し側面が接していれば、投影面が別々の方向を向いていても映像的には繋がり、かつ、頭頂点から見たときのパースペクティブが一致します。
+
++図9.3: 接する視錐台(わかりやすいように少し離して配置しています) +
+これは視錐台を無数の視線の集合とみなすことで「視線同士が連続している(=パースペクティブ上矛盾のない映像を投影することができる)」と考えることで理解できます。この考えを5つのカメラまで拡張し、5つの視錐台が頭頂点を共有しそれぞれ隣接する視錐台と接するような配置になるよう画角を調整することで、部屋の各面に対応した映像を生成することができます。理論上は天井も含めた6面も可能ですが今回はプロジェクタの設置スペースとして考え、天井を除く5面を想定しています。
+
++図9.4: 部屋に対応した5つの視錐台 +
+この頭頂点、つまり全てのカメラの位置に相当する場所から鑑賞することで、部屋どの方向を見てもパースペクティブ上矛盾のない映像を鑑賞することができます。
+ +プロジェクション行列(以下Proj)はビュー座標系からクリップ座標系へ変換する行列です。
+として式で表すと以下のようになります。
+C = Proj * V ++
さらにCの各要素をC_{w}で除算することでNDCでの位置座標となります。
+NDC = (\frac{C_{x}}{C_{w}},\frac{C_{y}}{C_{w}},\frac{C_{z}}{C_{w}})
+
+なお、C_{w}=-V_{z}とな(るようにProjを作)ります。ビュー座標系の正面方向がZマイナス方向なのでマイナスがかかっています。NDCでは表示範囲を-1\leq x,y,z\leq 1とし、この変換でV_{z}に応じてV_{x,y}が拡大縮小することにより遠近法の表現が得られます。
+それでは、Projをどのように作ればよいか考えてみましょう。ビュー座標系におけるnearClipPlaneの右上の点の座標をN、farClipPlaneの右上の点の座標をFとしておきます。
+
++図9.5: N,F +
+まずはxに注目してみると、
+を考慮すると
+Proj[0,0] = \frac{N_{z}}{N_{x}}
+
+とすれば良さそうです。x,zの比率は変わらないのでProj[0][0] = \frac{F_{z}}{F_{x}}など視錐台の右端ならどのx,zでも構いません。
+同様に
+Proj[1,1] = \frac{N_{z}}{N_{y}}
+
+も求まります。
+zについては少し工夫が必要です。Proj * Vでzに関わる計算は以下ようになります。
+C_{z} = Proj[2,2]*V_{z} + Proj[2,3]*V_{w} (ただし、V_{w} = 1)
+
+NDC_{z} = \frac{C_{z}}{C_{w}}(ただし、C_{w} = -V_{z})
+
+ここで、N_{z} → -1, F_{z} → 1と変換したいので、a = Proj[2,2], b = Proj[2,3] と置いて
+-1 = \frac{1}{N_{z}} (aN_{z} +b),
+1 = \frac{1}{F_{z}} (aF_{z} +b)
+
+この連立方程式から解が得られます。
+Proj[2,2] = a = \frac{F_{z}+N_{z}}{F_{z}-N_{z}},
+Proj[2,3] = b = \frac{-2F_{z}N_{z}}{F_{z}-N_{z}}
+
+また、C_{w} = -V_{w}となるようにしたいので
+Proj[3,2] = -1 ++
とします。
+したがって求めるProjは以下の形になります。
+Proj = \left(
+\begin{array}{cccc}
+ \frac{N_{z}}{N_{x}} & 0 & 0 & 0\\
+ 0 & \frac{N_{z}}{N_{y}} & 0 & 0\\
+ 0 & 0 & \frac{F_{z}+N_{z}}{F_{z}-N_{z}} & \frac{-2F_{z}N_{z}}{F_{z}-N_{z}} \\
+ 0 & 0 & -1 & 0
+\end{array}
+\right)
+
+シェーダー内などでプロジェクション行列を扱ったことがある方の中にはもしかしたらここまでの内容に違和感を持つ方もいらっしゃるかもしれません。実はUnityのプロジェクション行列の扱いはややこしく、ここまでの内容はCamera.projectionMatrixについての解説になります。この値はプラットフォームによらずOpenGLに準拠しています*3。-1\leq NDC_{z}\leq 1や C_{w} = -V_{w}となるのもこのためです。
+[*3] https://docs.unity3d.com/ScriptReference/GL.GetGPUProjectionMatrix.html
しかしUnity内でシェーダーに渡す際にプラットフォームに依存した形に変換するため、Camera.projectionMatrixをそのまま透視投影変換に使っているとは限りません。とくにNDC_{z}の範囲や向き(つまりZバッファの扱い)は多様でひっかかりやすいポイントになっています*4。
+[*4] https://docs.unity3d.com/Manual/SL-PlatformDifferences.html
視錐台の底面つまり投影面の形はカメラのfov (fieldOfView,画角)とaspect (アスペクト比)に依存しています。Unityのカメラでは画角はInspectorで公開されているものの、アスペクト比は公開されていないのでコードから編集する必要があります。faceSize (部屋の面のサイズ)、distance (視点から面までの距離)から画角とアスペクト比を求めるコードは以下のようになります。
+リスト9.1: 画角とアスペクト比を求める
+camera.aspect = faceSize.x / faceSize.y; +camera.fieldOfView = 2f * Mathf.Atan2(faceSize.y * 0.5f, distance) + * Mathf.Rad2Deg; ++
Mathf.Atan2() でfovの半分の角度をradianで求め、2倍し、Camera.fieldOfViewに代入するためdegreeに直している点に注意して下さい。
+ +視点が部屋の中心にない場合も考慮してみましょう。視点に対して投影面が上下左右に平行移動することができれば、投影面に対して視点が移動したことと同じ効果が得られます。これは現実世界ではプロジェクターなどで映像の投影位置を調整するレンズシフトという機能に相当します。
+
++図9.6: レンズシフト +
+あらためてカメラが透視投影する仕組みに立ち返ってみるとレンズシフトはどの部分で行う処理になるでしょうか?プロジェクション行列でNDCに射影する際に、x,yをずらせば良さそうですもう一度Projection行列を見てみましょう。
+Proj = \left(
+\begin{array}{cccc}
+ \frac{N_{z}}{N_{x}} & 0 & 0 & 0\\
+ 0 & \frac{N_{z}}{N_{y}} & 0 & 0\\
+ 0 & 0 & \frac{F_{z}+N_{z}}{F_{z}-N_{z}} & \frac{-2F_{z}N_{z}}{F_{z}-N_{z}} \\
+ 0 & 0 & -1 & 0
+\end{array}
+\right)
+
+C_{x},C_{y}がずれればいいので、行列の平行移動成分であるProj[0,3],Pro[1,3]になにか入れたくなりますが、あとでC_{w}で除算されることを考慮すると、Proj[0,2],Pro[1,2]に入れるのが正解です。
+Proj = \left(
+\begin{array}{cccc}
+ \frac{N_{z}}{N_{x}} & 0 & LensShift_{x} & 0\\
+ 0 & \frac{N_{z}}{N_{y}} & LensShift_{y} & 0\\
+ 0 & 0 & \frac{F_{z}+N_{z}}{F_{z}-N_{z}} & \frac{-2F_{z}N_{z}}{F_{z}-N_{z}} \\
+ 0 & 0 & -1 & 0
+\end{array}
+\right)
+
+LensShiftの単位はNDCですので投影面のサイズを-1〜1に正規化したものになります。コードにすると以下のようになります。
+リスト9.2: レンズシフトをプロジェクション行列に反映
+var shift = new Vector2( + positionOffset.x / faceSize.x, + positionOffset.y / faceSize.y +) * 2f; +var projectionMatrix = camera.projectionMatrix; +projectionMatrix[0,2] = shift.x; +projectionMatrix[1,2] = shift.y; +camera.projectionMatrix = projectionMatrix; ++
一度Camera.projectionMatrixにsetするとCamera.ResetProjectionMatrix()を呼ばない限りその後のCamera.fieldOfViewの変更が反映されなくなる点に注意が必要です。*5
+[*5] https://docs.unity3d.com/ScriptReference/Camera-projectionMatrix.html
直方体の部屋で、鑑賞者の視点位置をトラッキングできているものとします。前節の方法で視錐台の投影面のサイズと平行移動ができるので、視点位置を視錐台の頭頂点、壁面や床面を投影面としたときその形状に合うような視錐台を動的に求める事ができます。各カメラをこのような視錐台にすることによって各投影面用の映像を作ることができます。この映像を実際の部屋に投影すれは鑑賞者からはパースのあったCG世界が見えるようになります。
+
++図9.7: 部屋のシミュレーション(俯瞰視点) +
+
++図9.8: 部屋のシミュレーション(一人称視点) +
+本章ではプロジェクション行列を応用することで複数の投影面でパースを合わせる投影方法を紹介しました。目の前にディスプレイを置くのではなく視界の広い範囲を動的に反応する映像にしてしまう点で、昨今のHMD型と似て非なるアプローチのVRと言えるのではないかと思います。また、この方法では両眼視差や目のフォーカスを騙せるわけではないのでそのままでは立体視できずに「壁に投影された動く絵」に見えてしまう可能性があります。没入感を高めるためにはもう少し工夫する必要がありそうです。
+なお、同様の手法を立体視と組み合わせる「CAVE」*6という仕組みが知られています。
+[*6] https://en.wikipedia.org/wiki/Cave_automatic_virtual_environment
Unity で ComputeShader (以降必要に応じて"コンピュートシェーダ") を使う方法について、シンプルに解説します。コンピュートシェーダとは、GPU を使って単純処理を並列化し、大量の演算を高速に実行するために用いられます。また GPU に処理を委譲しますが、通常のレンダリングパイプラインとは異なることが特徴に挙げられます。CG においては、大量のパーティクルの動きを表現するためなどに良く用いられます。
+本章の以降に続く内容の一部にも、コンピュートシェーダが用いられたものがあり、それらを読み進める上で、コンピュートシェーダの知識が必要になります。
+ここではコンピュートシェーダを学習するにあたって、一番最初の足掛かりになるような内容について、2 つの簡単なサンプルを用いて解説しています。これらはコンピュートシェーダのすべての事について扱うものではありませんので、必要に応じて情報を補うようにしてください。
+Unity においては ComputeShader と呼称していますが、類似する技術に OpenCL, DirectCompute, CUDA などが挙げられます。基本概念は類似しており、特に DirectCompute(DirectX) と非常に近い関係にあります。もしアーキテクチャ周辺の概念や更なる詳細情報が必要になるときは、これらについても合わせて情報を集めるようにすると良いと思います。
+本章のサンプルは https://github.com/IndieVisualLab/UnityGraphicsProgramming の「SimpleComputeShader」です。
+ +
++図2.1: カーネル、スレッド、グループのイメージ +
+具体的な実装を解説するよりも前に、コンピュートシェーダで取り扱われる カーネル(Kernel)、スレッド(Thread)、グループ(Group) の概念を説明しておく必要があります。
+カーネル とは、GPU で実行される 1 つの処理を指し、コード上では 1 つの関数として扱われます(一般的なシステム用語における意味でのカーネルに相当)。
+スレッドとは、カーネルを実行する単位です。1 スレッドが、1 カーネルを実行します。コンピュートシェーダではカーネルを複数のスレッドで並行して同時に実行することができます。スレッドは (x, y, z) の3次元で指定しす。
+例えば、(4, 1, 1) なら 4 * 1 * 1 = 4 つのスレッドが同時に実行されます。(2, 2, 1) なら、2 * 2 * 1 = 4 つのスレッドが同時に実行されます。同じ 4 つのスレッドが実行されますが、状況に応じて、後者のような 2 次元でスレッドを指定する方が効率が良いことがあります。これについては後に続いて解説します。ひとまずスレッド数は 3 次元で指定されるという認識が必要です。
+最後にグループとは、スレッドを実行する単位です。また、あるグループが実行するスレッドはグループスレッドと呼ばれます。例えば、あるグループが単位当たり、(4, 1, 1) スレッドを持つとします。このグループが 2 つあるとき、それぞれのグループが、(4, 1, 1) のスレッドを持ちます。
+グループもスレッドと同様に 3 次元で指定されます。例えば、(2, 1, 1) グループが、(4, 4, 1) スレッドで実行されるカーネルを実行するとき、グループ数は 2 * 1 * 1 = 2 です。この 2 つのグループは、それぞれ 4 * 4 * 1 = 16 スレッドを持つことになります。したがって、合計スレッド数は、2 * 16 = 32 となります。
+ +サンプル (1) 「SampleScene_Array」では、適当な計算をコンピュートシェーダで実行し、その結果を配列として取得する方法について扱います。サンプルには次のような操作が含まれます。
+サンプル (1) の実行結果は次の通りになります。デバッグ出力だけですから、ソースコードを読みながら動作を確認してください。
+
++図2.2: サンプル (1) の実行結果 +
+ここからサンプルを実例に解説を進めます。非常に短いので、コンピュートシェーダの実装については先に一通り目を通して頂くのが良いと思います。基本構成として、関数の定義、関数の実装、バッファがあり、必要に応じて変数があります。
+SimpleComputeShader_Array.compute
+#pragma kernel KernelFunction_A
+#pragma kernel KernelFunction_B
+
+RWStructuredBuffer<int> intBuffer;
+float floatValue;
+
+[numthreads(4, 1, 1)]
+void KernelFunction_A(uint3 groupID : SV_GroupID,
+ uint3 groupThreadID : SV_GroupThreadID)
+{
+ intBuffer[groupThreadID.x] = groupThreadID.x * floatValue;
+}
+
+[numthreads(4, 1, 1)]
+void KernelFunction_B(uint3 groupID : SV_GroupID,
+ uint3 groupThreadID : SV_GroupThreadID)
+{
+ intBuffer[groupThreadID.x] += 1;
+}
+
+特徴として、numthreads 属性と、SV_GroupID セマンティクスなどがありますが、これについては後述します。
+ +先に解説した通り、正確な定義はさておき、カーネルは GPU で実行される1つの処理を指し、コード上では 1 つの関数として扱われます。カーネルは 1 つのコンピュートシェーダに複数実装することができます。
+この例では、カーネルは KernelFunction_A ないし KernelFunction_B 関数がカーネルに相当します。また、カーネルとして扱う関数は #pragma kernel を使って定義します。これによってカーネルとそれ以外の関数と識別します。
定義された複数のカーネルのうち、任意の 1 つを識別するために、固有のインデックスがカーネルに与えられます。インデックスは #pragma kernel で定義された順に、上から 0, 1 … と与えられます。
コンピュートシェーダで実行した結果を保存するバッファ領域を作っておきます。サンプルの変数 RWStructuredBuffer<int> intBuffer} がこれに相当します。
またスクリプト (CPU) 側から任意の値を与えたい場合には、一般的な CPU プログラミングと同じように変数を用意します。この例では変数 intValue がこれに相当し、スクリプトから値を渡します。
numthreads 属性 (Attribute) は、カーネル (関数) を実行するスレッドの数を指定します。スレッド数の指定は、(x, y, z) で指定し、例えば (4, 1, 1) なら、 4 * 1 * 1 = 4 スレッドでカーネルを実行します。他に、(2, 2, 1) なら 2 * 2 * 1 = 4 スレッドでカーネルを実行します。共に 4 スレッドで実行されますが、この違いや使い分けについては後述します。
+ +カーネルに設定できる引数には制約があり、一般的な CPU プログラミングと比較して自由度は極めて低いです。
+引数に続く値をセマンティクスと呼び、この例では groupID : SV_GroupID と groupThreadID : SV_GroupThreadID を設定しています。セマンティクスは引数がどのような値であるかを示すための物であり、他の名前に変更することができません。
引数名 (変数名) は自由に定義することができますが、コンピュートシェーダを使うにあたって定義されるセマンティクスのいずれかを設定する必要があります。つまり、任意の型の引数を定義してカーネル内で参照する、といった実装はできず、カーネルで参照することができる引数は、定められた限定的なものから選択する、ということです。
+SV_GroupID は、カーネルを実行するスレッドが、どのグループで実行されているかを (x, y, z) で示します。SV_GroupThreadID は、カーネルを実行するスレッドが、グループ内の何番目のスレッドであるかを (x, y, z) で示します。
例えば (4, 4, 1) のグループで、(2, 2, 1) のスレッドを実行するとき、SV_GroupID は (0 ~ 3, 0 ~ 3, 0) の値を返します。SV_GroupThreadID は (0 ~ 1, 0 ~ 1, 0) の値を返します。
サンプルで設定されるセマンティクス以外にも SV_~ から始まるセマンティクスがあり、利用することができますが、ここでは説明を割愛します。一通りコンピュートシェーダの動きが分かった後に目を通すほうが良いと思います。
サンプルでは、用意したバッファに、順にスレッド番号を代入していく処理を行っています。groupThreadID は、あるグループで実行されるスレッド番号が与えられます。このカーネルは (4, 1, 1) スレッドで実行されますから、groupThreadID は (0 ~ 3, 0, 0) が与えられます。
SimpleComputeShader_Array.compute
+[numthreads(4, 1, 1)]
+void KernelFunction_A(uint3 groupID : SV_GroupID,
+ uint3 groupThreadID : SV_GroupThreadID)
+{
+ intBuffer[groupThreadID.x] = groupThreadID.x * intValue;
+}
+
+今回のサンプルはこのスレッドを、(1, 1, 1) のグループで実行します (後述するスクリプトから) 。すなわちグループを 1 つだけ実行し、そのグループには、4 * 1 * 1 のスレッドが含まれます。結果としてgroupThreadID.x には 0 ~ 3 の値が与えられることを確認してください。
※この例では groupID を利用していませんが、スレッドと同様に、3次元で指定されるグループ数が与えられます。代入してみるなどして、コンピュートシェーダの動きを確認するために使ってみてください。
実装したコンピュートシェーダをスクリプトから実行します。スクリプト側で必要になるものは概ね次の通りです。
+comuteShaderkernelIndex_KernelFunction_A, BintComputeBufferSimpleComputeShader_Array.cs
+public ComputeShader computeShader;
+int kernelIndex_KernelFunction_A;
+int kernelIndex_KernelFunction_B;
+ComputeBuffer intComputeBuffer;
+
+void Start()
+{
+ this.kernelIndex_KernelFunction_A
+ = this.computeShader.FindKernel("KernelFunction_A");
+ this.kernelIndex_KernelFunction_B
+ = this.computeShader.FindKernel("KernelFunction_B");
+
+ this.intComputeBuffer = new ComputeBuffer(4, sizeof(int));
+ this.computeShader.SetBuffer
+ (this.kernelIndex_KernelFunction_A,
+ "intBuffer", this.intComputeBuffer);
+
+ this.computeShader.SetInt("intValue", 1);
+ …
+
+あるカーネルを実行するためには、そのカーネルを指定するためのインデックス情報が必要です。インデックスは #pragma kernel で定義された順に、上から 0, 1 … と与えられますが、スクリプト側から FindKernel 関数を使うのが良いでしょう。
SimpleComputeShader_Array.cs
+this.kernelIndex_KernelFunction_A
+ = this.computeShader.FindKernel("KernelFunction_A");
+
+this.kernelIndex_KernelFunction_B
+ = this.computeShader.FindKernel("KernelFunction_B");
+
+コンピュートシェーダ (GPU) による演算結果を CPU 側に保存するためのバッファ領域を用意します。Unity では ComputeBuffer として定義されています。
SimpleComputeShader_Array.cs
+this.intComputeBuffer = new ComputeBuffer(4, sizeof(int)); +this.computeShader.SetBuffer + (this.kernelIndex_KernelFunction_A, "intBuffer", this.intComputeBuffer); ++
ComputeBuffer を (1) 保存する領域のサイズ、(2) 保存するデータの単位当たりのサイズを指定して初期化します。ここでは int 型のサイズ 4 つ分の領域が用意されています。これはコンピュートシェーダの実行結果が int[4] として保存されるためです。必要に応じてサイズを変更します。
次いで、コンピュートシェーダに実装された、(1) どのカーネルが実行するときに、(2) どの GPU 上のバッファを使うのかを指定し、(3) CPU 上のどのバッファに相当するのか、を指定します。
+この例では、(1) KernelFunction_A が実行されるときに参照される、(2) intBuffer なるバッファ領域は、(3) intComputeBuffer に相当する、と指定されます。
SimpleComputeShader_Array.cs
+this.computeShader.SetInt("intValue", 1);
+
+処理したい内容によってはスクリプト (CPU) 側からコンピュートシェーダ (GPU) 側に値を渡し、参照したい場合があると思います。ほとんどの型の値は ComputeShader.Set~ を使って、コンピュートシェーダ内にある変数に設定することができます。このとき、引数に設定する引数の変数名と、コンピュートシェーダ内に定義された変数名は一致する必要があります。この例では intValue に 1 を渡しています。
コンピュートシェーダに実装(定義)されたカーネルは、ComputeShader.Dispatch メソッドで実行します。指定したインデックスのカーネルを、指定したグループ数で実行します。グループ数は X * Y * Z で指定します。このサンプルでは 1 * 1 * 1 = 1 グループです。
SimpleComputeShader_Array.cs
+this.computeShader.Dispatch
+ (this.kernelIndex_KernelFunction_A, 1, 1, 1);
+
+int[] result = new int[4];
+
+this.intComputeBuffer.GetData(result);
+
+for (int i = 0; i < 4; i++)
+{
+ Debug.Log(result[i]);
+}
+
+コンピュートシェーダ (カーネル) の実行結果は、ComputeBuffer.GetData で取得します。
あらためてコンピュートシェーダ側の実装を確認します。このサンプルでは次のカーネルを 1 * 1 * 1 = 1グループで実行しています。スレッドは、4 * 1 * 1 = 4 スレッドです。また intValue にはスクリプトから 1 を与えています。
SimpleComputeShader_Array.compute
+[numthreads(4, 1, 1)]
+void KernelFunction_A(uint3 groupID : SV_GroupID,
+ uint3 groupThreadID : SV_GroupThreadID)
+{
+ intBuffer[groupThreadID.x] = groupThreadID.x * intValue;
+}
+
+groupThreadID(SV_GroupThreadID) は、今このカーネルが、グループ内の何番目のスレッドで実行されているかを示す値が入るので、この例では (0 ~ 3, 0, 0) が入ります。したがって、groupThreadID.x は 0 ~ 3 です。つまり、intBuffer[0] = 0 ~ intBuffer[3] = 3 までが並列して実行されることになります。
1 つのコンピュートシェーダに実装した異なるカーネルを実行するときは、別のカーネルのインデックスを指定します。この例では、KernelFunction_A を実行した後に KernelFunction_B を実行します。さらに KernelFunction_A で利用したバッファ領域を、KernelFunction_B でも使っています。
SimpleComputeShader_Array.cs
+this.computeShader.SetBuffer
+(this.kernelIndex_KernelFunction_B, "intBuffer", this.intComputeBuffer);
+
+this.computeShader.Dispatch(this.kernelIndex_KernelFunction_B, 1, 1, 1);
+
+this.intComputeBuffer.GetData(result);
+
+for (int i = 0; i < 4; i++)
+{
+ Debug.Log(result[i]);
+}
+
+KernelFunction_B は次のようなコードを実行します。このとき intBuffer は KernelFunction_A で使ったものを引き続き指定している点に注意してください。
SimpleComputeShader_Array.compute
+RWStructuredBuffer<int> intBuffer;
+
+[numthreads(4, 1, 1)]
+void KernelFunction_B
+(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID)
+{
+ intBuffer[groupThreadID.x] += 1;
+}
+
+このサンプルでは、KernelFunction_A によって intBuffer に 0 ~ 3 が順に与えられています。したがって KernelFunction_B を実行した後は、値が 1 ~ 4 になることを確認します。
利用し終えた ComputeBuffer は、明示的に破棄する必要があります。
+SimpleComputeShader_Array.cs
+this.intComputeBuffer.Release(); ++
多次元のスレッドまたはグループを指定する意図について、このサンプルでは解説していません。例えば、 (4, 1, 1) スレッドと、(2, 2, 1) スレッドは、どちらも 4 スレッド実行されますが、この 2 つは使い分ける意味があります。これについては後に続くサンプル (2) で解説します。
+ +サンプル (2) 「SampleScene_Texture」では、コンピュートシェーダの算出結果をテクスチャにして取得します。サンプルには次のような操作が含まれます。
+サンプル (2) の実行結果は次の通りになります。横方向と縦方向にグラデーションするテクスチャを生成します。
+
++図2.3: サンプル (2) の実行結果 +
+全体の実装についてはサンプルを参照してください。このサンプルでは概ね次のようなコードをコンピュートシェーダで実行します。カーネルが多次元スレッドで実行される点に注目してください。(8, 8, 1) なので、1 グループあたり、8 * 8 * 1 = 64 スレッドで実行されます。また演算結果の保存先が RWTexture2D<float4> であることも大きな変更点です。
SimpleComputeShader_Texture.compute
+RWTexture2D<float4> textureBuffer;
+
+[numthreads(8, 8, 1)]
+void KernelFunction_A(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ float width, height;
+ textureBuffer.GetDimensions(width, height);
+
+ textureBuffer[dispatchThreadID.xy]
+ = float4(dispatchThreadID.x / width,
+ dispatchThreadID.x / width,
+ dispatchThreadID.x / width,
+ 1);
+}
+
+サンプル (1) では SV_DispatchThradID セマンティクスは使いませんでした。少々複雑ですが、「あるカーネルを実行するスレッドが、すべてのスレッドの中のどこに位置するか (x,y,z) 」を示しています。
SV_DispathThreadID は、SV_Group_ID * numthreads + SV_GroupThreadID で算出される値です。SV_Group_ID はあるグループを (x, y, z) で示し、SV_GroupThreadID は、あるグループに含まれるスレッドを (x, y, z) で示します。
例えば、(2, 2, 1) グループで、(4, 1, 1) スレッドで実行される、カーネルを実行するとします。その内の 1 つのカーネルは、(0, 1, 0) 番目のグループに含まれる、(2, 0, 0) 番目のスレッドで実行されます。このとき SV_DispatchThreadID は、(0, 1, 0) * (4, 1, 1) + (2, 0, 0) = (0, 1, 0) + (2, 0, 0) = (2, 1, 0) になります。
今度は最大値を考えましょう。(2, 2, 1) グループで、(4, 1, 1) スレッドでカーネルが実行されるとき、(1, 1, 0) 番目のグループに含まれる、(3, 0, 0) 番目のスレッドが最後のスレッドです。このとき SV_DispatchThreadID は、(1, 1, 0) * (4, 1, 1) + (3, 0, 0) = (4, 1, 0) + (3, 0, 0) = (7, 1, 0) になります。
以降は時系列順に解説するのが困難ですので、サンプル全体に目を通しながら確認してください。
+サンプル (2) の dispatchThreadID.xy は、テクスチャ上にあるすべてのピクセルを示すように、グループとスレッドを設定しています。グループを設定するのはスクリプト側なので、スクリプトとコンピュートシェーダを横断して確認する必要があります。
SimpleComputeShader_Texture.compute
+textureBuffer[dispatchThreadID.xy] + = float4(dispatchThreadID.x / width, + dispatchThreadID.x / width, + dispatchThreadID.x / width, + 1); ++
このサンプルでは仮に 512x512 のテクスチャを用意していますが、dispatchThreadID.x が 0 ~ 511 を示すとき、dispatchThreadID / width は、0 ~ 0.998… を示します。つまり dispatchThreadID.xy の値( = ピクセル座標)が大きくなるにつれて、黒から白に塗りつぶしていくことになります。
テクスチャは、RGBA チャネルから構成され、それぞれ 0 ~ 1 で設定します。すべて 0 のとき、完全に黒くなり、すべて 1 のとき、完全に白くなります。
+以降がスクリプト側の実装の解説です。サンプル (1) では、コンピュートシェーダの計算結果を保存するために配列のバッファを用意しました。サンプル (2) では、その代わりにテクスチャを用意します。
+SimpleComputeShader_Texture.cs
+RenderTexture renderTexture_A;
+…
+void Start()
+{
+ this.renderTexture_A = new RenderTexture
+ (512, 512, 0, RenderTextureFormat.ARGB32);
+ this.renderTexture_A.enableRandomWrite = true;
+ this.renderTexture_A.Create();
+…
+
+解像度とフォーマットを指定して RenderTexture を初期化します。このとき RenderTexture.enableRandomWrite を有効にして、テクスチャへの書き込みを有効にしている点に注意します。
カーネルのインデックスが取得できるように、カーネルがどれくらいのスレッド数で実行できるかも取得できます(スレッドサイズ)。
+SimpleComputeShader_Texture.cs
+void Start()
+{
+…
+ uint threadSizeX, threadSizeY, threadSizeZ;
+
+ this.computeShader.GetKernelThreadGroupSizes
+ (this.kernelIndex_KernelFunction_A,
+ out threadSizeX, out threadSizeY, out threadSizeZ);
+…
+
+Dispath メソッドで処理を実行します。このとき、グループ数の指定方法に注意します。この例では、グループ数は「テクスチャの水平(垂直)方向の解像度 / 水平(垂直)方向のスレッド数」で算出しています。
水平方向について考えるとき、テクスチャの解像度は 512、スレッド数は 8 ですから、水平方向のグループ数は 512 / 8 = 64 になります。同様に垂直方向も 64 です。したがって、合計グループ数は 64 * 64 = 4096 になります。
+SimpleComputeShader_Texture.cs
+void Update()
+{
+ this.computeShader.Dispatch
+ (this.kernelIndex_KernelFunction_A,
+ this.renderTexture_A.width / this.kernelThreadSize_KernelFunction_A.x,
+ this.renderTexture_A.height / this.kernelThreadSize_KernelFunction_A.y,
+ this.kernelThreadSize_KernelFunction_A.z);
+
+ plane_A.GetComponent<Renderer>()
+ .material.mainTexture = this.renderTexture_A;
+
+言い換えれば、各グループは 8 * 8 * 1 = 64 (= スレッド数) ピクセルずつ処理することになります。グループは 4096 あるので、4096 * 64 = 262,144 ピクセル処理します。画像は、512 * 512 = 262,144 ピクセルなので、ちょうどすべてのピクセルを並列に処理できたことになります。
+ +もう一方のカーネルは、x ではなく、 y 座標を使って塗りつぶしていきます。このとき 0 に近い値、黒い色が下のほうに表れている点に注意します。テクスチャを操作するときは原点を考慮しなければならないこともあります。
+ +サンプル (2) のように、多次元の結果が必要な場合、あるいは多次元の演算が必要な場合には、多次元のスレッドやグループが有効に働きます。もしサンプル (2) を 1 次元のスレッドで処理しようとすると、縦方向のピクセル座標を任意に算出する必要があります。
+実際に実装しようとすると確認できますが、画像処理でいうところのストライド、例えば 512x512 の画像があるとき、その 513 番目のピクセルは、(0, 1) 座標になる、といった算出が必要になります。
+演算数は削減したほうが良いですし、高度な処理を行うにしたがって複雑さは増します。コンピュートシェーダを使った処理を設計するときは、上手く多次元を活用できないか検討するのが良いです。
+ +本章ではコンピュートシェーダについてサンプルを解説する形式で入門情報としましたが、これから先、学習を進める上で必要ないくつかの情報を補足します。
+ +
++図2.4: GPU アーキテクチャのイメージ +
+GPU のアーキテクチャ・構造についての基本的な知識があれば、コンピュートシェーダを使った処理の実装の際、それを最適化するために役に立つので、少しだけここで紹介します。
+GPU は多数の Streaming Multiprocessor(SM) が搭載されていて、それらが分担・並列化して与えられた処理を実行します。
+SM には更に小さな Streaming Processor(SP) が複数搭載されていて、SM に割り当てられた処理を SP が計算する、といった形式です。
+SM にはレジスタとシェアードメモリが搭載されていて、グローバルメモリ(DRAM上のメモリ)よりも高速に読み書きすることができます。レジスタは関数内でのみ参照されるローカル変数に使われ、シェアードメモリは同一 SM 内に所属するすべての SP から参照し書き込むことができます。
+つまり、各メモリの最大サイズやスコープを把握し、無駄なく高速にメモリを読み書きできる最適な実装を実現できるのが理想です。
+例えば最も考慮する必要があるであろうシェアードメモリは、クラス修飾子 (storage-class modifiers) groupshared を使って定義します。ここでは入門なので具体的な導入例を割愛しますが、最適化に必要な技術・用語として覚えておいて、以降の学習に役立ててください。
SP に最も近い位置に置かれ、最も高速にアクセスできるメモリ領域です。4 byte 単位で構成され、カーネル(関数)スコープの変数が配置されます。スレッドごとに独立するため共有することができません。
+ +SM に置かれるメモリ領域で、L1 キャッシュと合わせて管理されています。同じ SM 内にある SP(= スレッド) で共有することができ、かつ十分に高速にアクセスすることができます。
+ +GPU 上ではなく DRAM 上のメモリ領域です。GPU 上にのプロセッサからは離れた位置にあるため参照は低速です。一方で、容量が大きく、すべてのスレッドからデータの読み書きが可能です。
+ +GPU 上ではなく DRAM 上のメモリ領域で、レジスタに収まらないデータを格納します。GPU 上のプロセッサからは離れた位置にあるため参照は低速です。
+ +テクスチャデータ専用のメモリで、グローバルメモリをテクスチャ専用に扱います。
+ +読み込み専用のメモリで、カーネル(関数)の引数や定数を保存しておくためなどに使われます。専用のキャッシュを持っていて、グローバルメモリよりも高速に参照できます。
+ +総スレッド数が実際に処理したいデータ数よりも大きい場合は、無意味に実行される (あるいは処理されない) スレッドが生じることになり非効率です。総スレッド数は可能な限り処理したいデータ数と一致させるように設計します。
+ +執筆時時点での現行スペックの上限を紹介します。最新版でない可能性があることに十分に注意してください。ただし、これらのような制限を考慮しつつ実装することが求められます。
+スレッド数やグループ数の限界については、解説中に言及しませんでした。これはシェーダモデル(バージョン)によって変更されるためです。今後も並列できる数は増えていくものと思われます。
+またグループの限界は (x, y, z) でそれぞれ 65535 です。
+ +シェアードメモリの上限は、単位グループあたり 16 KB,あるスレッドが書き込めるシェアードメモリのサイズは、単位あたり 256 byte までと制限されています。
+ +本章でのその他の参考は以下の通りです。
+本章ではレンダリングパイプラインのステージの一つであるGeometry Shader(ジオメトリシェーダー)についての説明を主軸として、Geometry Shaderを用いた動的な草生成シェーダー(俗に言うGrass Shader)を解説しています。
+Geometry Shaderの説明についてはいくらかの専門的用語を用いていますが、とりあえずGeometry Shaderを使ってみければサンプルコードを見て頂くのが手っ取り早いでしょう。
+本章のUnityプロジェクトは以下のGithubリポジトリにアップロードしてあります。
+https://github.com/IndieVisualLab/UnityGraphicsProgramming/
+ +Geometry Shaderとは、GPU上で動的にプリミティブ(メッシュを構成する基本形状)の変換・生成・削除などが可能なプログラマブルシェーダーの一つです。
+これまでプリミティブを変換するなど、動的にメッシュ形状を変化させようとすると、CPU上で処理を行うか、事前に頂点にメタ情報を持たせておきVertex Shaderで変換するなどの工夫が必要でした。しかし、Vertex Shaderでは隣接する頂点に関する情報を取得することが出来ず、処理中の頂点を元に新しく頂点を生成したり、また逆に削除したりする事が出来ないなどの強い制約がありました。また、だからといってCPUで処理を行うと、リアルタイム処理という観点からすると非現実的なほど膨大な時間を要することになります。この様に、リアルタイムにメッシュを形状変化させることに関しては、今までいくつかの問題を抱えていました。
+そこで、これらの問題を解決し、弱い制約の中で自由に変換処理を出来るようにするための機能として、DirectX10やOpenGL3.2にて標準搭載されたのがGeometry Shaderです。なお、OpenGLではPrimitive Shaderとも呼ばれることがあります。
+ +レンダリングパイプライン上ではVertex Shaderの次、Fragment Shaderやラスタライズ処理の前に位置しています。つまり、Fragment Shader内では、Geometry Shaderにて動的に生成した頂点とVertex Shaderに渡された元々の頂点とを区別せずに処理されます。
+ +通常Vertex Shaderへの入力情報は頂点単位となっており、その頂点についての変換処理を行います。ですが、Geometry Shaderへの入力情報はユーザによって定義された入力用プリミティブ単位となります。
+実際のプログラムは後述してありますが、Vertex Shaderにて処理をした頂点情報群が、入力用プリミティブ型に基いて分割して入力されることになります。例えば入力のプリミティブ型をtriangleとすれば3つの頂点情報が、lineとすれば2つの頂点情報が、pointとすれば1つの頂点情報が渡されます。これによってvertex shaderでは出来なかった、他の頂点情報を参照しながら処理を行なう事が可能となり、幅広い計算が出来るようになります。
+なお一つ注意が必要な点として、Vertex Shaderは頂点単位で処理が行われ、その処理する頂点についての情報が渡されますが、Geometry Shaderは入力用プリミティブ型とは関係なく、プリミティブアセンブリのトポロジによって決定されるプリミティブを単位として処理が行われます。つまり、図6.1のようにトポロジがTrianglesのQuadメッシュにGeometry Shaderを実行する場合、Geometry Shaderは三角形①と②について計2回実行されます。この時、入力用プリミティブ型をLineとした場合、入力に渡される情報は三角形①の時は頂点0,1,2のうちの二点の頂点、②の時は頂点0,2,3のうちの二点の頂点となります。
+
++図6.1: Quadメッシュ +
+Geometry Shaderの出力はユーザ定義の出力用プリミティブ型用の頂点情報群となります。Vertex Shaderでは1入力1出力となっていましたが、Geometry Shaderは複数の情報を出力する事になり、出力情報によって生成されるプリミティブは1つ以上でも問題ありません。
+例えば出力プリミティブ型をtriangleと定義した上で新しく計算によって求めた頂点を計9つ出力した場合は、3つの三角形がGeometry Shaderによって生成された事になります。この処理は前述の通りプリミティブ単位にて行われるため、元々1つだった三角形が3つに増えたとも考えられます。
+また、Geometry ShaderにはMaxVertexCountという、一回の処理で最大何点の頂点を出力するかを事前に設定しておく必要があります。例えばMaxVertexCountを9と設定した場合は、Geometry Shaderは0点 ~ 9点までの頂点数を出力することが出来るようになります。この数値は後述する『Geometry Shaderの制限』によって、一般的には1024が一応の最大値となります。
+なお、頂点情報を出力する上で気を付けなければならない点として、元々のメッシュの形状を維持した状態で新しく頂点を追加する場合は、Vertex Shaderから送られてきた頂点情報についてもGeometry Shaderにて出力する必要があります。Geometry ShaderはVertex Shaderの出力に追加していくという挙動ではなく、Geometry Shaderの出力がラスタライズ処理が行われ、Fragment Shaderに渡されます。逆説的に言えば、Geometry Shaderの出力を0にすることによって、動的に頂点数を減らすことも出来ます。
+ +Geometry Shaderには1回の出力に関して、最大出力頂点数と最大出力要素数という制限があります。最大出力頂点数は文字通り頂点数の限界値であり、GPUに依存した数値ではありますが1024などが一般的なので、1つの三角形から最大で1024点までしか頂点を増やすことが出来ます。最大出力要素数における要素とは座標や色などの頂点が持っている情報の事であり、一般的には(x, y, z, w)の位置要素と(r, g, b, a)の色要素の計8要素となります。この要素の最大出力数もGPUに依存しますが同じく1024が一般的なので、出力は最大でも128(1024/8)に制限される事になります。
+この二つの制限は両方を満たす必要があるため、頂点数的には1024点の出力が可能でも、要素数側の制約によって、実際のGeometry Shaderの出力は128点までは限界となります。ですので、例えばプリミティブ数が2のメッシュ(Quadメッシュなど)に対してGeometry Shaderを利用した場合は、最大でも256点(128点 * 2プリミティブ)の頂点数までしか頂点を扱うことは出来ません。
+この128点という数字が、前項のMaxVertexCountに設定できる数値の限界値となります。
+ +以下にシンプルな挙動のGeometry Shaderのプログラムが記載してあります。前項までの説明について実際のプログラムと照らし合わせながら改めて説明していきます。
+なお、Geometry Shader以外について、Unityでシェーダーを記述する際に必要なShaderLabのシンタックスなどに関する説明は本章では省略しますので、もし分からない部分がありましたら下記の公式ドキュメントを参照してみてください。
+https://docs.unity3d.com/ja/current/Manual/SL-Reference.html
+Shader "Custom/SimpleGeometryShader"
+{
+ Properties
+ {
+ _Height("Height", float) = 5.0
+ _TopColor("Top Color", Color) = (0.0, 0.0, 1.0, 1.0)
+ _BottomColor("Bottom Color", Color) = (1.0, 0.0, 0.0, 1.0)
+ }
+ SubShader
+ {
+ Tags { "RenderType" = "Opaque"}
+ LOD 100
+
+ Cull Off
+ Lighting Off
+
+ Pass
+ {
+ CGPROGRAM
+ #pragma target 5.0
+ #pragma vertex vert
+ #pragma geometry geom
+ #pragma fragment frag
+ #include "UnityCG.cginc"
+
+ uniform float _Height;
+ uniform float4 _TopColor, _BottomColor;
+
+ struct v2g
+ {
+ float4 pos : SV_POSITION;
+ };
+
+ struct g2f
+ {
+ float4 pos : SV_POSITION;
+ float4 col : COLOR;
+ };
+
+ v2g vert(appdata_full v)
+ {
+ v2g o;
+ o.pos = v.vertex;
+
+ return o;
+ }
+
+ [maxvertexcount(12)]
+ void geom(triangle v2g input[3],
+ inout TriangleStream<g2f> outStream)
+ {
+ float4 p0 = input[0].pos;
+ float4 p1 = input[1].pos;
+ float4 p2 = input[2].pos;
+
+ float4 c = float4(0.0f, 0.0f, -_Height, 1.0f)
+ + (p0 + p1 + p2) * 0.33333f;
+
+ g2f out0;
+ out0.pos = UnityObjectToClipPos(p0);
+ out0.col = _BottomColor;
+
+ g2f out1;
+ out1.pos = UnityObjectToClipPos(p1);
+ out1.col = _BottomColor;
+
+ g2f out2;
+ out2.pos = UnityObjectToClipPos(p2);
+ out2.col = _BottomColor;
+
+ g2f o;
+ o.pos = UnityObjectToClipPos(c);
+ o.col = _TopColor;
+
+ // bottom
+ outStream.Append(out0);
+ outStream.Append(out1);
+ outStream.Append(out2);
+ outStream.RestartStrip();
+
+ // sides
+ outStream.Append(out0);
+ outStream.Append(out1);
+ outStream.Append(o);
+ outStream.RestartStrip();
+
+ outStream.Append(out1);
+ outStream.Append(out2);
+ outStream.Append(o);
+ outStream.RestartStrip();
+
+ outStream.Append(out2);
+ outStream.Append(out0);
+ outStream.Append(o);
+ outStream.RestartStrip();
+ }
+
+ float4 frag(g2f i) : COLOR
+ {
+ return i.col;
+ }
+ ENDCG
+ }
+ }
+}
+
+このシェーダーでは、渡された三角形の中心座標を計算してさらに上方向に移動させ、渡されてきた三角形の各頂点と計算して求めた新しい座標を接続させています。つまり、平面的な三角形から簡単な三角錐を生成していることになります。
+なので、このシェーダーをQuadメッシュ(2つの三角形から構成されている)に適用すると、図6.2から図6.3のようになります。
+
++図6.2: この様な平たい板から +
+
++図6.3: 立体的な二つの三角錐が表示されるようになります +
+このシェーダーの中で、特にGeometry Shaderに関する部分だけを抜き出して説明していきます。
+#pragma target 5.0 +#pragma vertex vert + +// Geometry Shaderの利用を宣言 +#pragma geometry geom + +#pragma fragment frag +#include "UnityCG.cginc" ++
上記の宣言部分にて、geomという名前の関数がGeometry Shader用関数であることを宣言しています。これによってGeometry Shaderステージになった時にgeom関数が呼び出されるようになります。
[maxvertexcount(12)] +void geom(triangle v2g input[3], inout TriangleStream<g2f> outStream) ++
これがGeometry Shader用の関数宣言です。
+ +triangle v2f input[3] ++
ここが入力に関する部分です。
+今回は三角形を元に三角錐を生成したいので、入力はtriangleとしています。これにより、単位プリミティブである三角形の各頂点情報が入力されるうになり、三角形は3点の頂点から構成されますので、受け取っている仮引数は長さ3の配列となります。なので、もし入力をtriangleではなくpointにした場合は構成する頂点は1点のみなので、geom(point v2f input[1])の様に長さ1の配列で受け取ることになります。
inout TriangleStream<g2f> outStream ++
ここが出力に関する部分です。
+今回生成するメッシュのプリミティブは三角形としたいため、TriangleStream型で宣言しています。TriangleStrema型は出力が三角形ストリップである事を意味しているため、出力した各頂点情報を元に三角形を生成してくれるようになります。他にもPointStream型やLineStream型などがありますので、目的に応じて出力のプリミティブ型を選択する必要があります。
また、[maxvertexcount(12)]の部分にて最大出力数を12に設定してあります。これは三角錐を構成する三角形の数は底辺の1つと側面の3つで計4つであり、一つの三角形に付き頂点数が3点必要なので、3 * 4で12点の頂点を出力することになるため12と設定してあります。
g2f out0; +out0.pos = UnityObjectToClipPos(p0); +out0.col = _BottomColor; + +g2f out1; +out1.pos = UnityObjectToClipPos(p1); +out1.col = _BottomColor; + +g2f out2; +out2.pos = UnityObjectToClipPos(p2); +out2.col = _BottomColor; + +g2f o; +o.pos = UnityObjectToClipPos(c); +o.col = _TopColor; + +// bottom +outStream.Append(out0); +outStream.Append(out1); +outStream.Append(out2); +outStream.RestartStrip(); + +// sides +outStream.Append(out0); +outStream.Append(out1); +outStream.Append(o); +outStream.RestartStrip(); + +outStream.Append(out1); +outStream.Append(out2); +outStream.Append(o); +outStream.RestartStrip(); + +outStream.Append(out2); +outStream.Append(out0); +outStream.Append(o); +outStream.RestartStrip(); ++
ここが実際の頂点を出力している処理の部分です。
+まず最初に出力用のg2f型の変数を宣言し、頂点座標と色情報を格納しています。この時Vertex Shaderと同じようにオブジェクト空間からカメラのクリップ空間への変換をしておく必要があります。
+その後に、メッシュを構成する頂点の順序を意識しながら、頂点情報を出力しています。outStream変数のAppend関数に出力用変数を渡すことで現在のストリームに追加されていき、RestartStrip関数を呼び出す事によって現在のプリミティブストリップを終了し、新しいストリームを開始しています。
これは、TriangleStreamは三角形ストリップなので、Append関数で頂点を追加していくほどそのストリームに追加されている全ての頂点を元に、接続された複数の三角形を生成していくことになります。なので、今回の様に三角形同士がAppendされた順序を元に接続されると困る時は、一旦RestartStripを呼び出して新しいストリームを開始する必要があります。もちろんAppend順を工夫することでRestartStrip関数の呼び出しを減らすことは可能です。
本項では、前項の『簡単なGeometry Shader』から少し発展させて、Geometry Shaderを使ってリアルタイムに草を生成するGrass Shaderについて説明します。
+以下は説明するGrass Shaderのプログラムです。
+Shader "Custom/Grass" {
+ Properties
+ {
+ // 草の高さ
+ _Height("Height", float) = 80
+ // 草の幅
+ _Width("Width", float) = 2.5
+
+ // 草の下部の高さ
+ _BottomHeight("Bottom Height", float) = 0.3
+ // 草の中間部の高さ
+ _MiddleHeight("Middle Height", float) = 0.4
+ // 草の上部の高さ
+ _TopHeight("Top Height", float) = 0.5
+
+ // 草の下部の幅
+ _BottomWidth("Bottom Width", float) = 0.5
+ // 草の中間部の幅
+ _MiddleWidth("Middle Width", float) = 0.4
+ // 草の上部の幅
+ _TopWidth("Top Width", float) = 0.2
+
+ // 草の下部の曲がり具合
+ _BottomBend("Bottom Bend", float) = 1.0
+ // 草の中間部の曲がり具合
+ _MiddleBend("Middle Bend", float) = 1.0
+ // 草の上部の曲がり具合
+ _TopBend("Top Bend", float) = 2.0
+
+ // 風の強さ
+ _WindPower("Wind Power", float) = 1.0
+
+ // 草の上部の色
+ _TopColor("Top Color", Color) = (1.0, 1.0, 1.0, 1.0)
+ // 草の下部の色
+ _BottomColor("Bottom Color", Color) = (0.0, 0.0, 0.0, 1.0)
+
+ // 草の高さにランダム性を与えるノイズテクスチャ
+ _HeightMap("Height Map", 2D) = "white"
+ // 草の向きにランダム性を与えるノイズテクスチャ
+ _RotationMap("Rotation Map", 2D) = "black"
+ // 風の強さにランダム性を与えるノイズテクスチャ
+ _WindMap("Wind Map", 2D) = "black"
+ }
+ SubShader
+ {
+ Tags{ "RenderType" = "Opaque" }
+
+ LOD 100
+ Cull Off
+
+ Pass
+ {
+ CGPROGRAM
+ #pragma target 5.0
+ #include "UnityCG.cginc"
+
+ #pragma vertex vert
+ #pragma geometry geom
+ #pragma fragment frag
+
+ float _Height, _Width;
+ float _BottomHeight, _MiddleHeight, _TopHeight;
+ float _BottomWidth, _MiddleWidth, _TopWidth;
+ float _BottomBend, _MiddleBend, _TopBend;
+
+ float _WindPower;
+ float4 _TopColor, _BottomColor;
+ sampler2D _HeightMap, _RotationMap, _WindMap;
+
+ struct v2g
+ {
+ float4 pos : SV_POSITION;
+ float3 nor : NORMAL;
+ float4 hei : TEXCOORD0;
+ float4 rot : TEXCOORD1;
+ float4 wind : TEXCOORD2;
+ };
+
+ struct g2f
+ {
+ float4 pos : SV_POSITION;
+ float4 color : COLOR;
+ };
+
+ v2g vert(appdata_full v)
+ {
+ v2g o;
+ float4 uv = float4(v.texcoord.xy, 0.0f, 0.0f);
+
+ o.pos = v.vertex;
+ o.nor = v.normal;
+ o.hei = tex2Dlod(_HeightMap, uv);
+ o.rot = tex2Dlod(_RotationMap, uv);
+ o.wind = tex2Dlod(_WindMap, uv);
+
+ return o;
+ }
+
+ [maxvertexcount(7)]
+ void geom(triangle v2g i[3], inout TriangleStream<g2f> stream)
+ {
+ float4 p0 = i[0].pos;
+ float4 p1 = i[1].pos;
+ float4 p2 = i[2].pos;
+
+ float3 n0 = i[0].nor;
+ float3 n1 = i[1].nor;
+ float3 n2 = i[2].nor;
+
+ float height = (i[0].hei.r + i[1].hei.r + i[2].hei.r) / 3.0f;
+ float rot = (i[0].rot.r + i[1].rot.r + i[2].rot.r) / 3.0f;
+ float wind = (i[0].wind.r + i[1].wind.r + i[2].wind.r) / 3.0f;
+
+ float4 center = ((p0 + p1 + p2) / 3.0f);
+ float4 normal = float4(((n0 + n1 + n2) / 3.0f).xyz, 1.0f);
+
+ float bottomHeight = height * _Height * _BottomHeight;
+ float middleHeight = height * _Height * _MiddleHeight;
+ float topHeight = height * _Height * _TopHeight;
+
+ float bottomWidth = _Width * _BottomWidth;
+ float middleWidth = _Width * _MiddleWidth;
+ float topWidth = _Width * _TopWidth;
+
+ rot = rot - 0.5f;
+ float4 dir = float4(normalize((p2 - p0) * rot).xyz, 1.0f);
+
+ g2f o[7];
+
+ // Bottom.
+ o[0].pos = center - dir * bottomWidth;
+ o[0].color = _BottomColor;
+
+ o[1].pos = center + dir * bottomWidth;
+ o[1].color = _BottomColor;
+
+ // Bottom to Middle.
+ o[2].pos = center - dir * middleWidth + normal * bottomHeight;
+ o[2].color = lerp(_BottomColor, _TopColor, 0.33333f);
+
+ o[3].pos = center + dir * middleWidth + normal * bottomHeight;
+ o[3].color = lerp(_BottomColor, _TopColor, 0.33333f);
+
+ // Middle to Top.
+ o[4].pos = o[3].pos - dir * topWidth + normal * middleHeight;
+ o[4].color = lerp(_BottomColor, _TopColor, 0.66666f);
+
+ o[5].pos = o[3].pos + dir * topWidth + normal * middleHeight;
+ o[5].color = lerp(_BottomColor, _TopColor, 0.66666f);
+
+ // Top.
+ o[6].pos = o[5].pos + dir * topWidth + normal * topHeight;
+ o[6].color = _TopColor;
+
+ // Bend.
+ dir = float4(1.0f, 0.0f, 0.0f, 1.0f);
+
+ o[2].pos += dir
+ * (_WindPower * wind * _BottomBend)
+ * sin(_Time);
+ o[3].pos += dir
+ * (_WindPower * wind * _BottomBend)
+ * sin(_Time);
+ o[4].pos += dir
+ * (_WindPower * wind * _MiddleBend)
+ * sin(_Time);
+ o[5].pos += dir
+ * (_WindPower * wind * _MiddleBend)
+ * sin(_Time);
+ o[6].pos += dir
+ * (_WindPower * wind * _TopBend)
+ * sin(_Time);
+
+ [unroll]
+ for (int i = 0; i < 7; i++) {
+ o[i].pos = UnityObjectToClipPos(o[i].pos);
+ stream.Append(o[i]);
+ }
+ }
+
+ float4 frag(g2f i) : COLOR
+ {
+ return i.color;
+ }
+ ENDCG
+ }
+ }
+}
+
+このシェーダーを縦横に複数並べたPlaneメッシュに適用すると、図6.4のようになります。
+
++図6.4: Grass Shaderの結果 +
+この中から草を生成する処理についての説明をします。
+ +今回は一つのプリミティブにつき1本の草を生成することにします。草の形状の生成については図6.5のように下部・中間部・上部に分けて頂点を合計7点生成し、上に行くほど斜めにしていくことで、草の斜め具合を簡易的に表現します。
+
++図6.5: 草の形の作り方 +
+詳細はコメントにて記載してありますが、一本の草の中の各部分(下部・中間部・上部)の横幅と高さをコントロールする係数、草全体の横幅と高さをコントロールする係数を主なパラメーターとして用意しています。また一本一本の草が同じ形になるのは見栄えが悪いので、ランダム性を持たせるためのノイズテクスチャを使います。
+ +float height = (i[0].hei.r + i[1].hei.r + i[2].hei.r) / 3.0f; +float rot = (i[0].rot.r + i[1].rot.r + i[2].rot.r) / 3.0f; +float wind = (i[0].wind.r + i[1].wind.r + i[2].wind.r) / 3.0f; + +float4 center = ((p0 + p1 + p2) / 3.0f); +float4 normal = float4(((n0 + n1 + n2) / 3.0f).xyz, 1.0f); ++
この部分では草の高さと向き、風の強弱の基準となる数値を計算しています。Geometry Shader内で計算しても良いのですが、頂点に対してメタ情報的に持たせた方がGeometry Shader上で計算を行なう上での初期値の様な扱いが出来るのでVertex Shaderで計算しています。
+float4 center = ((p0 + p1 + p2) / 3.0f); +float4 normal = float4(((n0 + n1 + n2) / 3.0f).xyz, 1.0f); ++
ここでは草の中心部分と、草を生やしていく方向を計算しています。ここの部分をノイズテクスチャなどで決定するようにすると、草が生える方向にランダム性を持たせることが出来ます。
+float bottomHeight = height * _Height * _BottomHeight; + +... + +o[6].pos += dir * (_WindPower * wind * _TopBend) * sin(_Time); ++
長いのでプログラムは略記してあります。この部分では下部・中間部・上部についての高さと幅をそれぞれ計算し、それを元に座標を求めています。
+[unroll]
+for (int i = 0; i < 7; i++) {
+ o[i].pos = UnityObjectToClipPos(o[i].pos);
+ stream.Append(o[i]);
+}
+
+この部分にて計算した7点の頂点をAppendしています。今回は三角形が繋がりながら生成されていっても問題ないため、RestartStripはしていません。
なお、forステートメントに対して[unroll]というアトリビュートを適用しています。これはコンパイル時に、ループの回数分ループ内の処理を展開するというアトリビュートで、メモリサイズが大きくなるというデメリットはあるのですが、高速に動作するという利点があります。
ここまでGeometry Shaderについての説明から、基本と応用のプログラムまでを説明してきました。CPU上で動くプログラムを書くのとは多少なりとも特徴が異なる所がありますが、基本的な部分を抑えさせすれば活用できるはずです。
+実は通説としてGeometry Shaderは遅いと言われているそうです。筆者自身はあまり感じたことはないのですが、利用範囲が大規模になると大変なのかもしれません。もしGeometry Shaderを大規模に使うということになりそうでしたら、ぜひ一度ベンチマークなどを取ってみてください。
+それでもGPU上で動的に且つ自由に新しいメッシュを作ったり、削除したり出来るというのはアイデアの幅をかなり広げることになると思います。個人的に最も重要なことは、どの技術を使ったのかではなく、それによって何を作り、表現するのかだと思っています。ぜひ本章にてGeometry Shaderという一つの道具を知り学んだ上で、なにか新しい可能性を感じてもらえたら幸いです。
+ +インスタレーション、サイネージ、Web(フロントエンド・バックエンド)、スマートフォンアプリなど、来た球はなるべく全部打つようにしています。
+ + +勢いと雰囲気だけで生きてるうちに、唐突にインタラクティブアーティスト・エンジニアになってしまって、とても大変なことになってしまった。周りの人たちに助けられながら、勉強させてもらいながら、なんとかやっています。
+ + +インタラクションエンジニア。インスタレーション、サイネージ、舞台演出、MV、コンサート映像、VJなどの映像表現領域で、リアルタイム、プロシージャルの特性を生かしたコンテンツの制作を行っている。sugi-choとmattatzとでAqueductというユニットを組んで数回活動したことがある。
+元ゲーム開発会社テクニカルアーティスト。アート・デザイン・音楽が好きで、インタラクティブアートに転向。趣味はサンプラー・シンセ・楽器・レコード・機材いじり。Twitterはじめました。
+ + +インタラクティブアーティスト・エンジニア兼、学生。大学で雪の物理シミュレーションを研究している傍らエンジニアリングに勤しんでいる。最近はTouchDesignerに浮気中。是非twitterでお話ししましょう。
+インタラクションエンジニア・雑魚系疾風のプログラマ・ゆるふわガチ勢・わりと何でも作る何でも屋さん。好きな学校の教室は図工室か図書室。
+ + +雰囲気でやるインタラクティブアーティスト・エンジニア。三度のメシよりインタラクティブコンテンツ好き。お芋が好きでカイワレは食べない。ジェネ系の動画をTwitterによく上げている。たまにVJをやる。
+インタラクティブエンジニア。Web制作、グラフィックデザインのお仕事も個人でやってます。制作のご依頼はtwitterまで。
+ + +元ゲーム開発者、現インタラクティブアーティスト・エンジニア。健康に気を使おうと朝ごはんを食べるようにしたらなぜか2kgほど痩せた。
+ + +Unityでインタラクティブアートを作る人間。フリーランス。hi@sugi.cc
+ + + diff --git a/articles/index.html b/articles/index.html new file mode 100644 index 0000000..5330255 --- /dev/null +++ b/articles/index.html @@ -0,0 +1,12 @@ +Preface.html本章ではサンプリング手法について解説していきます。今回取り上げるのは、ある確率分布の中から適当な値を複数サンプリングしてくるMCMC(マルコフ連鎖モンテカルロ法)というサンプリング方法です。
+ある確率分布からサンプリングしてくる方法として最も簡単な方法に棄却法という方法がありますが、3次元空間でのサンプリングでは棄却される領域が大きく実際の運用に耐えません。そこでMCMCを使うことで高次元においても効率よくサンプリングできるというのが、本章の内容です。
+MCMCに関する情報は、一方では書籍など体系だった情報は統計屋さん向けのものでプログラマにとっては冗長な割に実装までの手引が存在せず、他方ネットにある情報は10数行のサンプルコードが記載されているだけで理論的な背景へのケアがないため、理論と実装を手早く一気通貫に理解できるコンテンツが存在しないのが実情です。次節以降の具体的な解説はできるだけそういった内容になるように心がけました。
+MCMCの背景となる確率の解説は、厳密を期せばそれこそ本が一冊書けるほどの内容です。今回は安心して実装できる最小限の理論的背景の説明をモットーに、定義の厳密性は程々に、なるだけ直感的な表現を目指しました。数学については大学初年度程度、プログラムについては仕事で少しでも使ったことがある程度の方なら難なく読める内容かなと思います。
+ +本章ではUnityGraphicsProgrammingのUnityプロジェクトhttps://github.com/IndieVisualLab/UnityGraphicsProgramming内にあるAssets/ProceduralModeling以下をサンプルプログラムとして用意しています。
+ +MCMCの理論を理解するには、まずは確率についての基礎的な内容を抑えておく必要があります。ただし今回MCMCを理解するために押さえておくべき概念は少なく、以下の4つだけです。尤度も確率密度関数も必要なしです!
+順に見ていきましょう。
+ +ある事象が確立 P(X) で起こるときの、この実数Xを確率変数と呼びます。例えば「サイコロの5の目が出る確率は1/6である」という時に「5の目」が確率変数にあたり「1/6」が確率に当たります。先程の文を一般的に言い換えると「サイコロのXの目がでる確率はP(X)である」と言い換えることができます。
+ちなみにすこし定義らしい書き方をすると、確率変数Xは標本空間Ω(=起こる可能性のある全ての事象)から選ばれた元ω(=起こった一つの事象)について、実数であるXを返す写像 X = X(ω) と書くことができます。
+ +先程の確率変数の後半で若干ややこしい定義を付け加えたのは、確率変数Xが X = X(ω) という書き方で表されるという前提に立つと、確率過程の理解が簡単になるからです。確率過程とは、先程のXに時間の条件を付け加えたもので X = X(ω, t) と表すことができるもののこと。つまり確率過程は時間の条件を添えた確率変数の一種と考えることができます。
+ +確率分布は、確率変数 X と 確率 P(X) との対応関係を示すものです。よく縦軸に確率 P(X) 横軸に X を取ったグラフで表します。
+ +一つ一つの点は遷移しても全体の分布が不変であるような分布。分布 P とある遷移行列 π について、πP = P を満たす P を定常分布と呼びます。この定義だけではわかりにくいですが、以下の図を見れば明らかです。
+
++図8.1: stationaryDistribution +
+さて本節ではMCMCを構成する概念について触れていきます。
MCMCは最初に述べたように、ある確率分布の中から適当な値をサンプリングしてくる手法なのですが、より具体的には、与えられた分布が定常分布であるという条件の下でモンテカルロ法(Monte Carlo)とマルコフ連鎖(Markov chain)によってサンプリングする手法を指します。以下ではモンテカルロ法、マルコフ連鎖、定常分布、の順に解説をおこなっていきます。
モンテカルロ法とは、擬似乱数を使った数値計算やシミュレーションの総称です。
よくモンテカルロ法による数値計算の導入に使われる例に、以下のような円周率の計算があります。
float pi;
+float trial = 10000;
+float count = 0;
+
+for(int i=0; i<trial; i++){
+ float x = Random.value;
+ float y = Random.value;
+ if(x*x+y*y <= 1) count++;
+}
+
+pi = 4 * count / trial;
+
+要するに1 x 1の正方形の中で扇形の円の中に入った試行数と全体の試行数の比が面積比になるので、そこから円周率を出す事ができるというものです。簡単な例ですが、これもモンテカルロ法です。
+ +マルコフ連鎖は、マルコフ性を満たす確率過程のうち、状態が離散的に記述できるものを指します。
マルコフ性とは、ある確率過程の将来状態の確率分布が現在状態のみに依存し、過去の状態に依存しない性質のことです。
++図8.2: MarkovChain +
+上図のようにマルコフ連鎖では将来の状態は現在の状態のみに依存して、過去の状態には直接的には影響しません。
+ +MCMCでは擬似乱数を使ってある任意の分布から与えられた定常分布へと収束していく必要があります。というのも、与えられた分布に収束しないと毎回違う分布からサンプリングしてしまうし、定常分布でないと上手く連鎖的にサンプリングできません。任意の分布が与えられた分布へと収束するには、以下の二つの条件を満たす必要があります。
+
++図8.3: Irreducibility +
+
++図8.4: Aperiodicity +
+この2つの条件を満たしていればある任意の分布は与えられた定常分布に収束することができます。これをマルコフ過程のエルゴード性といいます。
+ +さて与えられた分布が先程のエルゴート性を満たす分布かどうかをいちいち調べるのは骨が折れることなので、多くの場合には条件を強めにとって「詳細釣り合い」という条件を満たす範囲で調べていきます。詳細釣り合いをみたすマルコフ連鎖の手法の一つがメトロポリス法と呼ばれるものです。
+メトロポリス法は以下の2ステップを踏むことでサンプリングを行います
+メトロポリス法のメリットは、確率分布の極大値に遷移しきった後も r の値が小さければ確率値の小さい方に遷移するので、極大値周辺で確率値に比例したサンプリングができることです。
+ちなみにメトロポリス法はメトロポリス・ヘイスティング法(MH法)の一種です。メトロポリス法は提案分布に左右対称な分布を使いますが、MH法ではこの限りではありません。
+ +では実際にコードの抜粋を見ながら、どのようにMCMCを実装するかを見ていきましょう。
+先ず3次元の確率分布を用意します。これを目標分布と呼びます。実際にサンプリングしたい分布なので「目標」分布です。
+void Prepare()
+{
+ var sn = new SimplexNoiseGenerator();
+ for (int x = 0; x < lEdge; x++)
+ for (int y = 0; y < lEdge; y++)
+ for (int z = 0; z < lEdge; z++)
+ {
+ var i = x + lEdge * y + lEdge * lEdge * z;
+ var val = sn.noise(x, y, z);
+ data[i] = new Vector4(x, y, z, val);
+ }
+}
+
+今回はシンプレックスノイズを目標分布として採用しました。
+次に実際にMCMCを走らせます。
+public IEnumerable<Vector3> Sequence(int nInit, int limit, float th)
+{
+ Reset();
+
+ for (var i = 0; i < nInit; i++)
+ Next(th);
+
+ for (var i = 0; i < limit; i++)
+ {
+ yield return _curr;
+ Next(th);
+ }
+}
+
+public void Reset()
+{
+ for (var i = 0; _currDensity <= 0f && i < limitResetLoopCount; i++)
+ {
+ _curr = new Vector3(
+ Scale.x * Random.value,
+ Scale.y * Random.value,
+ Scale.z * Random.value
+ );
+ _currDensity = Density(_curr);
+ }
+}
+
+コルーチンを使って処理を走らせます。MCMCは一つのマルコフ連鎖が終わると全く別のところから処理が始まるため、概念的には並列処理と考えることができます。今回はReset関数を使って、一連の処理が終わった後に別の処理を走らせるようにしています。この作業を行うことで、確率分布の極大値が多数存在する場合にも上手くサンプリングができるようになります。
+遷移を始めて最初の方は目標分布から離れた点である可能性が高いので、この区間はサンプリングを行わず捨ててしまします(burn-in)。十分目標分布に近づいたらサンプリングと遷移のセットを一定回数行い、終わったらまた別の一連の処理に入ります。
+最後に遷移を決定する処理です。
3次元ですので、提案分布は以下のように三変量の標準正規分布を用います。
public static Vector3 GenerateRandomPointStandard()
+{
+ var x = RandomGenerator.rand_gaussian(0f, 1f);
+ var y = RandomGenerator.rand_gaussian(0f, 1f);
+ var z = RandomGenerator.rand_gaussian(0f, 1f);
+ return new Vector3(x, y, z);
+}
+
+public static float rand_gaussian(float mu, float sigma)
+{
+ float z = Mathf.Sqrt(-2.0f * Mathf.Log(Random.value))
+ * Mathf.Sin(2.0f * Mathf.PI * Random.value);
+ return mu + sigma * z;
+}
+
+メトロポリス法では左右対称な分布である必要があるので、平均値を0以外に設定することは無いですが、分散を1以外にする場合は、コレスキー分解を使って以下のように導出します。
+public static Vector3 GenerateRandomPoint(Matrix4x4 sigma)
+{
+ var c00 = sigma.m00 / Mathf.Sqrt(sigma.m00);
+ var c10 = sigma.m10 / Mathf.Sqrt(sigma.m00);
+ var c20 = sigma.m21 / Mathf.Sqrt(sigma.m00);
+ var c11 = Mathf.Sqrt(sigma.m11 - c10 * c10);
+ var c21 = (sigma.m21 - c20 * c10) / c11;
+ var c22 = Mathf.Sqrt(sigma.m22 - (c20 * c20 + c21 * c21));
+ var r1 = RandomGenerator.rand_gaussian(0f, 1f);
+ var r2 = RandomGenerator.rand_gaussian(0f, 1f);
+ var r3 = RandomGenerator.rand_gaussian(0f, 1f);
+ var x = c00 * r1;
+ var y = c10 * r1 + c11 * r2;
+ var z = c20 * r1 + c21 * r2 + c22 * r3;
+ return new Vector3(x, y, z);
+}
+
+遷移先の決定は、提案分布(上の一点である)nextと直前の点_currそれぞれの、目標分布上における確率の比を取り一様乱数より大きければ遷移、そうでなければ遷移しない、とします。
確率値は、遷移先の座標に対応する確立値を見つける処理が重いため(O(n^3)の処理量)、近似計算を行っています。今回は目標分布が連続的に変化する分布を用いているので、距離に反比例する加重平均を行うことで近似的に確立値を導出しています。
void Next(float threshold)
+{
+ Vector3 next =
+ GaussianDistributionCubic.GenerateRandomPointStandard()
+ + _curr;
+
+ var densityNext = Density(next);
+ bool flag1 =
+ _currDensity <= 0f ||
+ Mathf.Min(1f, densityNext / _currDensity) >= Random.value;
+ bool flag2 = densityNext > threshold;
+ if (flag1 && flag2)
+ {
+ _curr = next;
+ _currDensity = densityNext;
+ }
+}
+
+float Density(Vector3 pos)
+{
+ float weight = 0f;
+ for (int i = 0; i < weightReferenceloopCount; i++)
+ {
+ int id = (int)Mathf.Floor(Random.value * (Data.Length - 1));
+ Vector3 posi = Data[id];
+ float mag = Vector3.SqrMagnitude(pos - posi);
+ weight += Mathf.Exp(-mag) * Data[id].w;
+ }
+ return weight;
+}
+
+今回リポジトリに3次元の棄却法(円の例で示したような簡単なモンテカルロ法)のサンプルも入っているので比較してみるとよいでしょう。棄却法では棄却の基準値を強めに取るとほとんどサンプリングが上手くできないのに対して、MCMCでは同じようなサンプリング結果をよりスムーズに提示することができます。またMCMCではステップ毎のランダムウォークの幅を小さくすれば、一連の連鎖の中では近しい空間からサンプリングするため、植物や花の群生を簡単に再現することができます。
+ +この章では、ComputeShaderを使ったBoidsアルゴリズムを用いた群のシミュレーションの実装について解説いたします。鳥や魚、その他の陸上動物は時として群を作ります。この群の動きには規則性と複雑性が見られ、ある種の美しさを持っており人を惹きつけてきました。コンピュターグラフィックスにおいては、それらの個体の振る舞いを一つ一つ人の手で制御することは現実的でなく、Boidsと呼ばれる群を作るためのアルゴリズムが考案されました。このシミュレーションアルゴリズムは、いくつかのシンプルな規則で構成されており実装も容易ですが、単純な実装では、すべての個体との位置関係を調べる必要があり、個体数が増えると、その2乗に比例して計算量が増加してしまいます。多くの個体を制御したいという場合、CPUによる実装では非常に困難です。そこで、GPUによる強力な並列計算能力を利用します。Unityには、GPUによるこのような汎用的な計算(GPGPU)を行うため、ComputeShaderというシェーダプログラムが用意されています。GPUには共有メモリと呼ばれる特殊な記憶領域が組み込まれており、ComputeShaderを用いると、このメモリを有効に活用することができます。また、UnityにはGPUインスタンシングという高度なレンダリング機能があり、任意のメッシュを大量に描画することが可能です。これらのUnityのGPUの計算能力を生かした機能を使い、多数のBoidオブジェクトを制御し描画するプログラムを紹介いたします。
+ +Boidsと呼ばれる群のシミュレーションアルゴリズムは、Craig Reynoldsによって1986年に開発され、翌年1987年のACM SIGGRAPHに「Flocks, Herds, and Schools: A Distributed Behavioral Model」というタイトルの論文として発表されました。
+Reynoldsは、群れというものは、それぞれの個体が視覚や聴覚などの知覚によって、周囲の他の個体の位置や動く方向に基づいて自身の行動を修正することにより、結果として複雑な振る舞いを生み出している、ということに着目します。
+それぞれの個体は以下の3つのシンプルな行動規則に従います。
+ +ある一定の距離内にある個体と密集することを避けるように動く
+ +ある一定の距離内にある個体が向いている方向の平均に向かおうと動く
+ +ある一定の距離内にある個体の平均位置に動く
+
++図3.1: Boidsの基本的なルール +
+これらのルールに従って、個々の動きを制御することにより、群れの動きをプログラムすることができます。
+ +https://github.com/IndieVisualLab/UnityGraphicsProgramming
+本書のサンプルUnityプロジェクトにある、Assets/BoidsSimulationOnGPUフォルダ内のBoidsSimulationOnGPU.unityシーンデータを開いてください。
+ +本章で紹介するプログラムは、ComputeShader、GPUインスタンシングを使用しています。
+ComputeShaderは、以下のプラットフォームまたはAPIで動作します。
+GPUインスタンシングは以下のプラットフォームまたはAPIで利用可能です。
+本サンプルプログラムでは、Graphics.DrawMeshInstacedIndirectメソッドを使用しています。そのため、Unityのバージョンは5.6以降である必要があります。
+ +本サンプルプログラムは以下のコードで構成されます。
+スクリプトやマテリアルリソースなどはこのようにセットします
+
++図3.2: UnityEditor上での設定 +
+このコードでは、Boidsシミュレーションのパラメータや、GPU上での計算のために必要なバッファや計算命令を記述したComputeShaderの管理などを行います。
+GPUBoids.cs
+
+using UnityEngine;
+using System.Collections;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+
+public class GPUBoids : MonoBehaviour
+{
+ // Boidデータの構造体
+ [System.Serializable]
+ struct BoidData
+ {
+ public Vector3 Velocity; // 速度
+ public Vector3 Position; // 位置
+ }
+ // スレッドグループのスレッドのサイズ
+ const int SIMULATION_BLOCK_SIZE = 256;
+
+ #region Boids Parameters
+ // 最大オブジェクト数
+ [Range(256, 32768)]
+ public int MaxObjectNum = 16384;
+
+ // 結合を適用する他の個体との半径
+ public float CohesionNeighborhoodRadius = 2.0f;
+ // 整列を適用する他の個体との半径
+ public float AlignmentNeighborhoodRadius = 2.0f;
+ // 分離を適用する他の個体との半径
+ public float SeparateNeighborhoodRadius = 1.0f;
+
+ // 速度の最大値
+ public float MaxSpeed = 5.0f;
+ // 操舵力の最大値
+ public float MaxSteerForce = 0.5f;
+
+ // 結合する力の重み
+ public float CohesionWeight = 1.0f;
+ // 整列する力の重み
+ public float AlignmentWeight = 1.0f;
+ // 分離する力の重み
+ public float SeparateWeight = 3.0f;
+
+ // 壁を避ける力の重み
+ public float AvoidWallWeight = 10.0f;
+
+ // 壁の中心座標
+ public Vector3 WallCenter = Vector3.zero;
+ // 壁のサイズ
+ public Vector3 WallSize = new Vector3(32.0f, 32.0f, 32.0f);
+ #endregion
+
+ #region Built-in Resources
+ // Boidsシミュレーションを行うComputeShaderの参照
+ public ComputeShader BoidsCS;
+ #endregion
+
+ #region Private Resources
+ // Boidの操舵力(Force)を格納したバッファ
+ ComputeBuffer _boidForceBuffer;
+ // Boidの基本データ(速度, 位置)を格納したバッファ
+ ComputeBuffer _boidDataBuffer;
+ #endregion
+
+ #region Accessors
+ // Boidの基本データを格納したバッファを取得
+ public ComputeBuffer GetBoidDataBuffer()
+ {
+ return this._boidDataBuffer != null ? this._boidDataBuffer : null;
+ }
+
+ // オブジェクト数を取得
+ public int GetMaxObjectNum()
+ {
+ return this.MaxObjectNum;
+ }
+
+ // シミュレーション領域の中心座標を返す
+ public Vector3 GetSimulationAreaCenter()
+ {
+ return this.WallCenter;
+ }
+
+ // シミュレーション領域のボックスのサイズを返す
+ public Vector3 GetSimulationAreaSize()
+ {
+ return this.WallSize;
+ }
+ #endregion
+
+ #region MonoBehaviour Functions
+ void Start()
+ {
+ // バッファを初期化
+ InitBuffer();
+ }
+
+ void Update()
+ {
+ // シミュレーション
+ Simulation();
+ }
+
+ void OnDestroy()
+ {
+ // バッファを破棄
+ ReleaseBuffer();
+ }
+
+ void OnDrawGizmos()
+ {
+ // デバッグとしてシミュレーション領域をワイヤーフレームで描画
+ Gizmos.color = Color.cyan;
+ Gizmos.DrawWireCube(WallCenter, WallSize);
+ }
+ #endregion
+
+ #region Private Functions
+ // バッファを初期化
+ void InitBuffer()
+ {
+ // バッファを初期化
+ _boidDataBuffer = new ComputeBuffer(MaxObjectNum,
+ Marshal.SizeOf(typeof(BoidData)));
+ _boidForceBuffer = new ComputeBuffer(MaxObjectNum,
+ Marshal.SizeOf(typeof(Vector3)));
+
+ // Boidデータ, Forceバッファを初期化
+ var forceArr = new Vector3[MaxObjectNum];
+ var boidDataArr = new BoidData[MaxObjectNum];
+ for (var i = 0; i < MaxObjectNum; i++)
+ {
+ forceArr[i] = Vector3.zero;
+ boidDataArr[i].Position = Random.insideUnitSphere * 1.0f;
+ boidDataArr[i].Velocity = Random.insideUnitSphere * 0.1f;
+ }
+ _boidForceBuffer.SetData(forceArr);
+ _boidDataBuffer.SetData(boidDataArr);
+ forceArr = null;
+ boidDataArr = null;
+ }
+
+ // シミュレーション
+ void Simulation()
+ {
+ ComputeShader cs = BoidsCS;
+ int id = -1;
+
+ // スレッドグループの数を求める
+ int threadGroupSize = Mathf.CeilToInt(MaxObjectNum
+ / SIMULATION_BLOCK_SIZE);
+
+ // 操舵力を計算
+ id = cs.FindKernel("ForceCS"); // カーネルIDを取得
+ cs.SetInt("_MaxBoidObjectNum", MaxObjectNum);
+ cs.SetFloat("_CohesionNeighborhoodRadius",
+ CohesionNeighborhoodRadius);
+ cs.SetFloat("_AlignmentNeighborhoodRadius",
+ AlignmentNeighborhoodRadius);
+ cs.SetFloat("_SeparateNeighborhoodRadius",
+ SeparateNeighborhoodRadius);
+ cs.SetFloat("_MaxSpeed", MaxSpeed);
+ cs.SetFloat("_MaxSteerForce", MaxSteerForce);
+ cs.SetFloat("_SeparateWeight", SeparateWeight);
+ cs.SetFloat("_CohesionWeight", CohesionWeight);
+ cs.SetFloat("_AlignmentWeight", AlignmentWeight);
+ cs.SetVector("_WallCenter", WallCenter);
+ cs.SetVector("_WallSize", WallSize);
+ cs.SetFloat("_AvoidWallWeight", AvoidWallWeight);
+ cs.SetBuffer(id, "_BoidDataBufferRead", _boidDataBuffer);
+ cs.SetBuffer(id, "_BoidForceBufferWrite", _boidForceBuffer);
+ cs.Dispatch(id, threadGroupSize, 1, 1); // ComputeShaderを実行
+
+ // 操舵力から、速度と位置を計算
+ id = cs.FindKernel("IntegrateCS"); // カーネルIDを取得
+ cs.SetFloat("_DeltaTime", Time.deltaTime);
+ cs.SetBuffer(id, "_BoidForceBufferRead", _boidForceBuffer);
+ cs.SetBuffer(id, "_BoidDataBufferWrite", _boidDataBuffer);
+ cs.Dispatch(id, threadGroupSize, 1, 1); // ComputeShaderを実行
+ }
+
+ // バッファを解放
+ void ReleaseBuffer()
+ {
+ if (_boidDataBuffer != null)
+ {
+ _boidDataBuffer.Release();
+ _boidDataBuffer = null;
+ }
+
+ if (_boidForceBuffer != null)
+ {
+ _boidForceBuffer.Release();
+ _boidForceBuffer = null;
+ }
+ }
+ #endregion
+}
+
+
+InitBuffer関数では、GPU上で計算を行う際に使用するバッファを宣言しています。GPU上で計算するためのデータを格納するバッファとして、ComputeBufferというクラスを使用します。ComputeBufferはComputeShaderのためにデータを格納するデータバッファです。C#スクリプトからGPU上のメモリバッファに対して読み込みや書き込みができるようになります。初期化時の引数には、バッファの要素の数と、要素1つのサイズ(バイト数)を渡します。Marshal.SizeOf()メソッドを使用することで、型のサイズ(バイト数)を取得することができます。ComputeBufferでは、SetData()を用いて、任意の構造体の配列の値をセットすることができます。
+ +Simulation関数では、ComputeShaderに必要なパラメータを渡し、計算命令を発行します。
+ComputeShaderに記述された、実際にGPUに計算をさせる関数はカーネルと呼ばれます。このカーネルの実行単位をスレッドと言い、GPUアーキテクチャに即した並列計算処理を行うために、任意の数まとめてグループとして扱い、それらはスレッドグループと呼ばれます。このスレッドの数とスレッドグループ数の積が、Boidオブジェクトの個体数と同じかそれを超えるように設定します。
+カーネルは、ComputeShaderスクリプト内で #pragma kernelディレクティブを用いて指定されます。これにはそれぞれIDが割り当てられており、C#スクリプトからはFindKernelメソッドを用いることで、このIDを取得することができます。
+SetFloatメソッド、SetVectorメソッド、SetBufferメソッドなどを使用し、シミュレーションに必要なパラメータやバッファをComputeShaderに渡します。バッファやテクスチャをセットするときにはカーネルIDが必要になります。
+Dispatchメソッドを実行することで、ComputeShaderに定義したカーネルをGPUで計算処理を行うように命令を発行します。引数には、カーネルIDとスレッドグループの数を指定します。
+ +GPUへの計算命令を記述します。カーネルは2つで、1つは操舵力を計算するもの、もう1つは、その力を適用させ速度や位置を更新するものです。
+Boids.compute
+
+// カーネル関数を指定
+#pragma kernel ForceCS // 操舵力を計算
+#pragma kernel IntegrateCS // 速度, 位置を計算
+
+// Boidデータの構造体
+struct BoidData
+{
+ float3 velocity; // 速度
+ float3 position; // 位置
+};
+
+// スレッドグループのスレッドのサイズ
+#define SIMULATION_BLOCK_SIZE 256
+
+// Boidデータのバッファ(読み取り用)
+StructuredBuffer<BoidData> _BoidDataBufferRead;
+// Boidデータのバッファ(読み取り, 書き込み用)
+RWStructuredBuffer<BoidData> _BoidDataBufferWrite;
+// Boidの操舵力のバッファ(読み取り用)
+StructuredBuffer<float3> _BoidForceBufferRead;
+// Boidの操舵力のバッファ(読み取り, 書き込み用)
+RWStructuredBuffer<float3> _BoidForceBufferWrite;
+
+int _MaxBoidObjectNum; // Boidオブジェクト数
+
+float _DeltaTime; // 前フレームから経過した時間
+
+float _SeparateNeighborhoodRadius; // 分離を適用する他の個体との距離
+float _AlignmentNeighborhoodRadius; // 整列を適用する他の個体との距離
+float _CohesionNeighborhoodRadius; // 結合を適用する他の個体との距離
+
+float _MaxSpeed; // 速度の最大値
+float _MaxSteerForce; // 操舵する力の最大値
+
+float _SeparateWeight; // 分離適用時の重み
+float _AlignmentWeight; // 整列適用時の重み
+float _CohesionWeight; // 結合適用時の重み
+
+float4 _WallCenter; // 壁の中心座標
+float4 _WallSize; // 壁のサイズ
+float _AvoidWallWeight; // 壁を避ける強さの重み
+
+
+// ベクトルの大きさを制限する
+float3 limit(float3 vec, float max)
+{
+ float length = sqrt(dot(vec, vec)); // 大きさ
+ return (length > max && length > 0) ? vec.xyz * (max / length) : vec.xyz;
+}
+
+// 壁に当たった時に逆向きの力を返す
+float3 avoidWall(float3 position)
+{
+ float3 wc = _WallCenter.xyz;
+ float3 ws = _WallSize.xyz;
+ float3 acc = float3(0, 0, 0);
+ // x
+ acc.x = (position.x < wc.x - ws.x * 0.5) ? acc.x + 1.0 : acc.x;
+ acc.x = (position.x > wc.x + ws.x * 0.5) ? acc.x - 1.0 : acc.x;
+
+ // y
+ acc.y = (position.y < wc.y - ws.y * 0.5) ? acc.y + 1.0 : acc.y;
+ acc.y = (position.y > wc.y + ws.y * 0.5) ? acc.y - 1.0 : acc.y;
+
+ // z
+ acc.z = (position.z < wc.z - ws.z * 0.5) ? acc.z + 1.0 : acc.z;
+ acc.z = (position.z > wc.z + ws.z * 0.5) ? acc.z - 1.0 : acc.z;
+
+ return acc;
+}
+
+// シェアードメモリ Boidデータ格納用
+groupshared BoidData boid_data[SIMULATION_BLOCK_SIZE];
+
+// 操舵力の計算用カーネル関数
+[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
+void ForceCS
+(
+ uint3 DTid : SV_DispatchThreadID, // スレッド全体で固有のID
+ uint3 Gid : SV_GroupID, // グループのID
+ uint3 GTid : SV_GroupThreadID, // グループ内のスレッドID
+ uint GI : SV_GroupIndex // SV_GroupThreadIDを一次元にしたもの 0-255
+)
+{
+ const unsigned int P_ID = DTid.x; // 自身のID
+ float3 P_position = _BoidDataBufferRead[P_ID].position; // 自身の位置
+ float3 P_velocity = _BoidDataBufferRead[P_ID].velocity; // 自身の速度
+
+ float3 force = float3(0, 0, 0); // 操舵力を初期化
+
+ float3 sepPosSum = float3(0, 0, 0); // 分離計算用 位置加算変数
+ int sepCount = 0; // 分離のために計算した他の個体の数のカウント用変数
+
+ float3 aliVelSum = float3(0, 0, 0); // 整列計算用 速度加算変数
+ int aliCount = 0; // 整列のために計算した他の個体の数のカウント用変数
+
+ float3 cohPosSum = float3(0, 0, 0); // 結合計算用 位置加算変数
+ int cohCount = 0; // 結合のために計算した他の個体の数のカウント用変数
+
+ // SIMULATION_BLOCK_SIZE(グループスレッド数)ごとの実行 (グループ数分実行)
+ [loop]
+ for (uint N_block_ID = 0; N_block_ID < (uint)_MaxBoidObjectNum;
+ N_block_ID += SIMULATION_BLOCK_SIZE)
+ {
+ // SIMULATION_BLOCK_SIZE分のBoidデータを、シェアードメモリに格納
+ boid_data[GI] = _BoidDataBufferRead[N_block_ID + GI];
+
+ // すべてのグループ共有アクセスが完了し、
+ // グループ内のすべてのスレッドがこの呼び出しに到達するまで、
+ // グループ内のすべてのスレッドの実行をブロックする
+ GroupMemoryBarrierWithGroupSync();
+
+ // 他の個体との計算
+ for (int N_tile_ID = 0; N_tile_ID < SIMULATION_BLOCK_SIZE;
+ N_tile_ID++)
+ {
+ // 他の個体の位置
+ float3 N_position = boid_data[N_tile_ID].position;
+ // 他の個体の速度
+ float3 N_velocity = boid_data[N_tile_ID].velocity;
+
+ // 自身と他の個体の位置の差
+ float3 diff = P_position - N_position;
+ // 自身と他の個体の位置の距離
+ float dist = sqrt(dot(diff, diff));
+
+ // --- 分離(Separation) ---
+ if (dist > 0.0 && dist <= _SeparateNeighborhoodRadius)
+ {
+ // 他の個体の位置から自身へ向かうベクトル
+ float3 repulse = normalize(P_position - N_position);
+ // 自身と他の個体の位置の距離で割る(距離が遠ければ影響を小さく)
+ repulse /= dist;
+ sepPosSum += repulse; // 加算
+ sepCount++; // 個体数カウント
+ }
+
+ // --- 整列(Alignment) ---
+ if (dist > 0.0 && dist <= _AlignmentNeighborhoodRadius)
+ {
+ aliVelSum += N_velocity; // 加算
+ aliCount++; // 個体数カウント
+ }
+
+ // --- 結合(Cohesion) ---
+ if (dist > 0.0 && dist <= _CohesionNeighborhoodRadius)
+ {
+ cohPosSum += N_position; // 加算
+ cohCount++; // 個体数カウント
+ }
+ }
+ GroupMemoryBarrierWithGroupSync();
+ }
+
+ // 操舵力(分離)
+ float3 sepSteer = (float3)0.0;
+ if (sepCount > 0)
+ {
+ sepSteer = sepPosSum / (float)sepCount; // 平均を求める
+ sepSteer = normalize(sepSteer) * _MaxSpeed; // 最大速度に調整
+ sepSteer = sepSteer - P_velocity; // 操舵力を計算
+ sepSteer = limit(sepSteer, _MaxSteerForce); // 操舵力を制限
+ }
+
+ // 操舵力(整列)
+ float3 aliSteer = (float3)0.0;
+ if (aliCount > 0)
+ {
+ aliSteer = aliVelSum / (float)aliCount; // 近い個体の速度の平均を求める
+ aliSteer = normalize(aliSteer) * _MaxSpeed; // 最大速度に調整
+ aliSteer = aliSteer - P_velocity; // 操舵力を計算
+ aliSteer = limit(aliSteer, _MaxSteerForce); // 操舵力を制限
+ }
+ // 操舵力(結合)
+ float3 cohSteer = (float3)0.0;
+ if (cohCount > 0)
+ {
+ // / 近い個体の位置の平均を求める
+ cohPosSum = cohPosSum / (float)cohCount;
+ cohSteer = cohPosSum - P_position; // 平均位置方向へのベクトルを求める
+ cohSteer = normalize(cohSteer) * _MaxSpeed; // 最大速度に調整
+ cohSteer = cohSteer - P_velocity; // 操舵力を計算
+ cohSteer = limit(cohSteer, _MaxSteerForce); // 操舵力を制限
+ }
+ force += aliSteer * _AlignmentWeight; // 操舵力に整列する力を加える
+ force += cohSteer * _CohesionWeight; // 操舵力に結合する力を加える
+ force += sepSteer * _SeparateWeight; // 操舵力に分離する力を加える
+
+ _BoidForceBufferWrite[P_ID] = force; // 書き込み
+}
+
+// 速度, 位置計算用カーネル関数
+[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
+void IntegrateCS
+(
+ uint3 DTid : SV_DispatchThreadID // スレッド全体で固有のID
+)
+{
+ const unsigned int P_ID = DTid.x; // インデックスを取得
+
+ BoidData b = _BoidDataBufferWrite[P_ID]; // 現在のBoidデータを読み込む
+ float3 force = _BoidForceBufferRead[P_ID]; // 操舵力を読み込む
+
+ // 壁に近づいたら反発する力を与える
+ force += avoidWall(b.position) * _AvoidWallWeight;
+
+ b.velocity += force * _DeltaTime; // 操舵力を速度に適用
+ b.velocity = limit(b.velocity, _MaxSpeed); // 速度を制限
+ b.position += b.velocity * _DeltaTime; // 位置を更新
+
+ _BoidDataBufferWrite[P_ID] = b; // 計算結果を書き込む
+}
+
+
+ForceCSカーネルでは、操舵力の計算を行います。
+ +groupshared という記憶域修飾子をつけられた変数は共有メモリ(shared memory)に書き込まれるようになります。共有メモリは多くのデータ量を書き込むことはできませんが、レジスタに近く配置されており非常に高速にアクセスができます。この共有メモリはスレッドグループ内で共有することができます。SIMULATION_BLOCK_SIZE分の他の個体の情報をまとめて共有メモリに書き込んでおいて、同一スレッドグループ内で高速に読みこむことができるようにすることで、他の個体との位置関係を考慮した計算を効率的に行っていきます。
+
++図3.3: GPUの基本的なアーキテクチャ +
+共有メモリに書き込まれたデータにアクセスする時は、GroupMemoryBarrierWithGroupSync()メソッドを記述し、スレッドグループ内のすべてのスレッドの処理の同期をとっておく必要があります。GroupMemoryBarrierWithGroupSync()は、スレッドグループ内のすべてのスレッドが、この呼び出しに到達するまで、グループ内のすべてのスレッドの実行をブロックします。これにより、スレッドグループ内のすべてのスレッドでboid_data配列の初期化が適切に終わっていることが保証されるようになります。
+ +指定した距離より近い個体があった場合、その個体の位置から自身の位置へ向かうベクトルを求め、正規化します。そのベクトルを、距離の値で割ることで、近ければより避けるように、遠ければ小さく避けるように重みをつけ他の個体と衝突しないようにする力として加算していきます。全ての個体との計算が終わったら、その値を用いて、現在の速度との関係から操舵力を求めます。
+ +指定した距離より近い個体があった場合、その個体の速度(Velocity)を足し合わせていき、同時にその個体数をカウントしていき、それらの値で、近い個体の速度(つまり向いている方向)の平均を求めます。全ての個体との計算が終わったら、その値を用いて、現在の速度との関係から操舵力を求めます。
+ +指定した距離より近い個体があった場合、その個体の位置を加算していき、同時にその個体数をカウントしていき、それらの値で、近い個体の位置の平均(重心)を求めます。さらに、そこへ向かうベクトルを求め、現在の速度との関係から操舵力を求めます。
+ +IntegrateCSカーネルでは、ForceCS()で求めた操舵力を元に、Boidの速度と位置を更新します。AvoidWallでは、指定したエリアの外に出ようとした場合、逆向きの力を与え領域の内部に留まるようにしています。
+ +このスクリプトでは、Boidsシミュレーションで得られた結果を、指定したメッシュで描画することを行います。
+BoidsRender.cs
+
+using System.Collections;
+using System.Collections.Generic;
+using UnityEngine;
+
+// 同GameObjectに、GPUBoidsコンポーネントがアタッチされていることを保証
+[RequireComponent(typeof(GPUBoids))]
+public class BoidsRender : MonoBehaviour
+{
+ #region Paremeters
+ // 描画するBoidsオブジェクトのスケール
+ public Vector3 ObjectScale = new Vector3(0.1f, 0.2f, 0.5f);
+ #endregion
+
+ #region Script References
+ // GPUBoidsスクリプトの参照
+ public GPUBoids GPUBoidsScript;
+ #endregion
+
+ #region Built-in Resources
+ // 描画するメッシュの参照
+ public Mesh InstanceMesh;
+ // 描画のためのマテリアルの参照
+ public Material InstanceRenderMaterial;
+ #endregion
+
+ #region Private Variables
+ // GPUインスタンシングのための引数(ComputeBufferへの転送用)
+ // インスタンスあたりのインデックス数, インスタンス数,
+ // 開始インデックス位置, ベース頂点位置, インスタンスの開始位置
+ uint[] args = new uint[5] { 0, 0, 0, 0, 0 };
+ // GPUインスタンシングのための引数バッファ
+ ComputeBuffer argsBuffer;
+ #endregion
+
+ #region MonoBehaviour Functions
+ void Start ()
+ {
+ // 引数バッファを初期化
+ argsBuffer = new ComputeBuffer(1, args.Length * sizeof(uint),
+ ComputeBufferType.IndirectArguments);
+ }
+
+ void Update ()
+ {
+ // メッシュをインスタンシング
+ RenderInstancedMesh();
+ }
+
+ void OnDisable()
+ {
+ // 引数バッファを解放
+ if (argsBuffer != null)
+ argsBuffer.Release();
+ argsBuffer = null;
+ }
+ #endregion
+
+ #region Private Functions
+ void RenderInstancedMesh()
+ {
+ // 描画用マテリアルがNull, または, GPUBoidsスクリプトがNull,
+ // またはGPUインスタンシングがサポートされていなければ, 処理をしない
+ if (InstanceRenderMaterial == null || GPUBoidsScript == null ||
+ !SystemInfo.supportsInstancing)
+ return;
+
+ // 指定したメッシュのインデックス数を取得
+ uint numIndices = (InstanceMesh != null) ?
+ (uint)InstanceMesh.GetIndexCount(0) : 0;
+ // メッシュのインデックス数をセット
+ args[0] = numIndices;
+ // インスタンス数をセット
+ args[1] = (uint)GPUBoidsScript.GetMaxObjectNum();
+ argsBuffer.SetData(args); // バッファにセット
+
+ // Boidデータを格納したバッファをマテリアルにセット
+ InstanceRenderMaterial.SetBuffer("_BoidDataBuffer",
+ GPUBoidsScript.GetBoidDataBuffer());
+ // Boidオブジェクトスケールをセット
+ InstanceRenderMaterial.SetVector("_ObjectScale", ObjectScale);
+ // 境界領域を定義
+ var bounds = new Bounds
+ (
+ GPUBoidsScript.GetSimulationAreaCenter(), // 中心
+ GPUBoidsScript.GetSimulationAreaSize() // サイズ
+ );
+ // メッシュをGPUインスタンシングして描画
+ Graphics.DrawMeshInstancedIndirect
+ (
+ InstanceMesh, // インスタンシングするメッシュ
+ 0, // submeshのインデックス
+ InstanceRenderMaterial, // 描画を行うマテリアル
+ bounds, // 境界領域
+ argsBuffer // GPUインスタンシングのための引数のバッファ
+ );
+ }
+ #endregion
+}
+
+
+大量の同一のMeshを描画したい時、一つ一つGameObjectを生成するのでは、ドローコールが上がり描画負荷が増大していきます。また、ComputeShaderでの計算結果をCPUメモリに転送するコストが高く、高速に処理を行いたい場合、GPUでの計算結果をそのまま描画用シェーダに渡し描画処理をさせることが必要です。UnityのGPUインスタンシングを使えば、不要なGameObjectの生成を行うことなく、大量の同一のMeshを少ないドローコールで高速に描画することができます。
+ +このスクリプトでは、Graphics.DrawMeshInstancedIndirectメソッドを用いてGPUインスタンシングによるメッシュ描画を行います。このメソッドでは、メッシュのインデックス数やインスタンス数をComputeBufferとして渡すことができます。GPUからすべてのインスタンスデータを読み込みたい場合に便利です。
+Start()では、このGPUインスタンシングのための引数バッファを初期化しています。初期化時のコンストラクタの3つ目の引数にはComputeBufferType.IndirectArgumentsを指定します.
+RenderInstancedMesh()では、GPUインスタンシングによるメッシュ描画を実行しています。描画のためのマテリアルInstanceRenderMaterialに、SetBufferメソッドで、Boidsシミュレーションによって得られたBoidのデータ(速度、位置の配列)を渡しています。
+Graphics.DrawMeshInstancedIndrectメソッドには、インスタンシングするメッシュ、submeshのインデックス、描画用マテリアル、境界データ、また、インスタンス数などのデータを格納したバッファを引数に渡します。
+このメソッドは通常Update()内で呼ばれるようにします。
+ +Graphics.DrawMeshInstancedIndrectメソッドに対応した描画用のシェーダです。
+BoidsRender.shader
+
+Shader "Hidden/GPUBoids/BoidsRender"
+{
+ Properties
+ {
+ _Color ("Color", Color) = (1,1,1,1)
+ _MainTex ("Albedo (RGB)", 2D) = "white" {}
+ _Glossiness ("Smoothness", Range(0,1)) = 0.5
+ _Metallic ("Metallic", Range(0,1)) = 0.0
+ }
+ SubShader
+ {
+ Tags { "RenderType"="Opaque" }
+ LOD 200
+
+ CGPROGRAM
+ #pragma surface surf Standard vertex:vert addshadow
+ #pragma instancing_options procedural:setup
+
+ struct Input
+ {
+ float2 uv_MainTex;
+ };
+ // Boidの構造体
+ struct BoidData
+ {
+ float3 velocity; // 速度
+ float3 position; // 位置
+ };
+
+ #ifdef UNITY_PROCEDURAL_INSTANCING_ENABLED
+ // Boidデータの構造体バッファ
+ StructuredBuffer<BoidData> _BoidDataBuffer;
+ #endif
+
+ sampler2D _MainTex; // テクスチャ
+
+ half _Glossiness; // 光沢
+ half _Metallic; // 金属特性
+ fixed4 _Color; // カラー
+
+ float3 _ObjectScale; // Boidオブジェクトのスケール
+
+ // オイラー角(ラジアン)を回転行列に変換
+ float4x4 eulerAnglesToRotationMatrix(float3 angles)
+ {
+ float ch = cos(angles.y); float sh = sin(angles.y); // heading
+ float ca = cos(angles.z); float sa = sin(angles.z); // attitude
+ float cb = cos(angles.x); float sb = sin(angles.x); // bank
+
+ // RyRxRz (Heading Bank Attitude)
+ return float4x4(
+ ch * ca + sh * sb * sa, -ch * sa + sh * sb * ca, sh * cb, 0,
+ cb * sa, cb * ca, -sb, 0,
+ -sh * ca + ch * sb * sa, sh * sa + ch * sb * ca, ch * cb, 0,
+ 0, 0, 0, 1
+ );
+ }
+
+ // 頂点シェーダ
+ void vert(inout appdata_full v)
+ {
+ #ifdef UNITY_PROCEDURAL_INSTANCING_ENABLED
+
+ // インスタンスIDからBoidのデータを取得
+ BoidData boidData = _BoidDataBuffer[unity_InstanceID];
+
+ float3 pos = boidData.position.xyz; // Boidの位置を取得
+ float3 scl = _ObjectScale; // Boidのスケールを取得
+
+ // オブジェクト座標からワールド座標に変換する行列を定義
+ float4x4 object2world = (float4x4)0;
+ // スケール値を代入
+ object2world._11_22_33_44 = float4(scl.xyz, 1.0);
+ // 速度からY軸についての回転を算出
+ float rotY =
+ atan2(boidData.velocity.x, boidData.velocity.z);
+ // 速度からX軸についての回転を算出
+ float rotX =
+ -asin(boidData.velocity.y / (length(boidData.velocity.xyz)
+ + 1e-8)); // 0除算防止
+ // オイラー角(ラジアン)から回転行列を求める
+ float4x4 rotMatrix =
+ eulerAnglesToRotationMatrix(float3(rotX, rotY, 0));
+ // 行列に回転を適用
+ object2world = mul(rotMatrix, object2world);
+ // 行列に位置(平行移動)を適用
+ object2world._14_24_34 += pos.xyz;
+
+ // 頂点を座標変換
+ v.vertex = mul(object2world, v.vertex);
+ // 法線を座標変換
+ v.normal = normalize(mul(object2world, v.normal));
+ #endif
+ }
+
+ void setup()
+ {
+ }
+
+ // サーフェスシェーダ
+ void surf (Input IN, inout SurfaceOutputStandard o)
+ {
+ fixed4 c = tex2D (_MainTex, IN.uv_MainTex) * _Color;
+ o.Albedo = c.rgb;
+ o.Metallic = _Metallic;
+ o.Smoothness = _Glossiness;
+ }
+ ENDCG
+ }
+ FallBack "Diffuse"
+}
+
+
+#pragma surface surf Standard vertex:vert addshadowこの部分では、サーフェスシェーダとしてsurf()、ライティングモデルはStandard、カスタム頂点シェーダとしてvert()を指定するという処理を行っています。
+#pragma instancing_options ディレクティブで procedural:FunctionName と記述することによって、Graphics.DrawMeshInstancedIndirectメソッドを使うときのための追加のバリアントを生成するようにUnityに指示することができ、頂点シェーダステージの始めに、FunctionNameで指定した関数が呼ばれるようになります。公式のサンプル(https://docs.unity3d.com/ScriptReference/Graphics.DrawMeshInstancedIndirect.html)などを見ると、この関数内で、個々のインスタンスの位置や回転、スケールに基づき、unity_ObjectToWorld行列, unity_WorldToObject行列の書き換えを行っていますが、このサンプルプログラムでは、頂点シェーダ内でBoidsのデータを受け取り、頂点や法線の座標変換を行っています(良いのかわかりませんが…)。そのため、指定したsetup関数内では何も記述していません。
+ +頂点シェーダ(Vertex Shader)に、シェーダに渡されたメッシュの頂点に対して行う処理を記述します。
+unity_InstanceIDによってインスタンスごとに固有のIDを取得することができます。このIDをBoidデータのバッファとして宣言したStructuredBufferの配列のインデックスに指定することによって、インスタンスごとに固有のBoidデータを得ることができます。
+ +Boidの速度データから、進行方向を向くような回転の値を算出します。ここでは直感的に扱うために、回転はオイラー角で表現することにします。Boidを飛行体と捉えると、オブジェクトを基準とした座標の3軸の回転は、それぞれ、ピッチ、ヨー、ロールと呼ばれます。
+
++図3.4: 軸と回転の呼称 +
+まず、Z軸についての速度とX軸についての速度から、逆正接(アークタンジェント)を返すatan2メソッドを用いてヨー(水平面に対してどの方向を向いているか)を求めます。
+
++図3.5: 速度と角度(ヨー)の関係 +
+次に、速度の大きさと、Y軸についての速度の比率から、逆正弦(アークサイン)を返すasinメソッドを用いてピッチ(上下の傾き)を求めています。それぞれの軸についての速度の中でY軸の速度が小さい場合は、変化が少なく水平を保つように重みのついた回転量になるようになっています。
+
++図3.6: 速度と角度(ピッチ)の関係 +
+移動、回転、拡大縮小といった座標変換処理は、まとめて一つの行列で表現することができます。4x4の行列object2worldを定義します。
+ +まず、スケール値を代入します。XYZ軸それぞれに \rm S_x S_y S_z {} だけ拡大縮小を行う行列Sは以下のように表現されます。
+\rm
+S=
+\left(
+\begin{array}{cccc}
+\rm S_x & 0 & 0 & 0 \\
+0 & \rm S_y & 0 & 0 \\
+0 & 0 & \rm S_z & 0 \\
+0 & 0 & 0 & 1
+\end{array}
+\right)
+
+HLSLのfloat4x4型の変数は、._11_22_33_44のようなスィズルを用いて行列の特定の要素を指定できます。デフォルトであれば、成分は以下のように整列してます。
+表3.1:
+| 11 | 12 | 13 | 14 |
|---|---|---|---|
| 21 | 22 | 23 | 24 |
| 31 | 32 | 33 | 34 |
| 41 | 42 | 43 | 44 |
ここでは、11、22、33、にXYZそれぞれのスケールの値、44には1を代入します。
+ +次に、回転を適用します。XYZ軸それぞれについての回転 \rm R_x R_y R_z {} を行列で表現すると、
+\rm
+R_x(\phi)=
+\left(
+\begin{array}{cccc}
+1 & 0 & 0 & 0 \\
+0 & \rm cos(\phi) & \rm -sin(\phi) & 0 \\
+0 & \rm sin(\phi) & \rm cos(\phi) & 0 \\
+0 & 0 & 0 & 1
+\end{array}
+\right)
+
+\rm
+R_y(\theta)=
+\left(
+\begin{array}{cccc}
+\rm cos(\theta) & 0 & \rm sin(\theta) & 0 \\
+0 & 1 & 0 & 0 \\
+\rm -sin(\theta) & 0 & \rm cos(\theta) & 0 \\
+0 & 0 & 0 & 1
+\end{array}
+\right)
+
+\rm
+R_z(\psi)=
+\left(
+\begin{array}{cccc}
+\rm cos(\psi) & \rm -sin(\psi) & 0 & 0 \\
+\rm sin(\psi) & \rm cos(\psi) & 0 & 0 \\
+0 & 0 & 1 & 0 \\
+0 & 0 & 0 & 1
+\end{array}
+\right)
+
+これを一つに行列に合成します。このとき、合成する回転の軸の順によって回転時の挙動が変化しますが、この順に合成すると、Unityの標準の回転と同様のものになるはずです。
+
++図3.7: 回転行列の合成 +
+これによって求められた回転行列と、上のスケールを適用した行列との積を求めることによって、回転を適用します。
+ +次に、平行移動を適用します。それぞれの軸に、 \rm T_x T_y T_z {} 平行移動するとすると、行列は以下のように表現されます。
+\rm T=
+\left(
+\begin{array}{cccc}
+1 & 0 & 0 & \rm T_x \\
+0 & 1 & 0 & \rm T_y \\
+0 & 0 & 1 & \rm T_z \\
+0 & 0 & 0 & 1
+\end{array}
+\right)
+
+この平行移動は、14, 24, 34成分にXYZそれぞれの軸についての位置(Position)データを加算することで適用できます。
+これらの計算によって得られた行列を、頂点、法線に適用させることによって、Boidのトランスフォームデータを反映します。
+ +このように群れっぽい動きをするオブジェクトが描画されると思います。
+
++図3.8: 実行結果 +
+この章で紹介した実装は、最低限のBoidsのアルゴリズムを利用したものですが、パラメータの調整によっても、群は大きなまとまりになったり、幾つもの小群体が作られたりと、異なる特徴を持った動きを見せると思います。ここで示した基本的な行動規則の他にも、考慮すべきルールが存在します。例えば、これが魚の群だとして、それらを捕食する外敵が現れたとすると当然逃げるような動きをし、地形など障害物があるとすれば魚はぶつからないように避けるでしょう。視覚について考えると、動物の種によっては視野や精度も異なり、視界の外の他の個体は計算処理から除外するなどすると、より実際のものに近づいていくと思います。空を飛ぶのか、水の中を動くのか、陸上を移動するのかといった環境や、移動運動のための運動器官の特性によっても動きの特徴が変わってきます。個体差にも着眼すべきです。
+GPUによる並列処理は、CPUによる演算に比べれば多くの個体を計算できますが、基本的には他の個体との計算は総当たりで行っており、計算効率はあまり良いとは言えません。それには、個体をその位置によってグリッドやブロックで分割した領域に登録しておき、隣接した領域に存在する個体についてだけ計算処理を行うというように、近傍個体探索の効率化を図ることで計算コストを抑えることができます。
+このように改良の余地は多く残されており、適切な実装と行動のルールを適用することにより、いっそう美しく、迫力、密度と味わいのある群の動きが表現できるようになることと思います。できるようになりたいです。
+ +本章では、ComputeShaderを使った格子法による流体シミュレーションについて解説します。
+ +https://github.com/IndieVisualLab/UnityGraphicsProgramming/
+のAssets/StabeFluidsに格納されています。
+ +本章では、格子法による流体シミュレーションと、それらを実現するにあたって必要となる、数式の計算方法や捉え方を解説していきます。まず格子法とは何でしょう。その意味を探る為に、一度流体力学での「流れ」の解析方法に少し迫ってみましょう。
+ +流体力学とは、自然現象である「流れ」を数式化して、計算可能なものとする事に特徴をおいています。この「流れ」、一体どうすれば数値化し解析することが出来るでしょうか。
端的に行ってしまいますと、「時間が一瞬進んだ時の流速」を導く事で数値化する事ができます。少し数学的に言うと、時間で微分した際の流速ベクトルの変化量の解析と言い換える事ができます。
ただ、この流れを解析する方法として、二つの手法が考えられます。
一つは、お風呂のお湯をイメージした際に、お風呂にはったお湯を格子状に分割し、その固定された各格子空間の流速ベクトルを測定する方法。
そしてもう一つは、お風呂にアヒルを浮かべ、アヒルの動き自体を解析する方法です。この二つの方法の内、前者を「オイラーの方法」、後者を「ラグランジュの方法」と呼びます。
さて、一旦コンピューターグラフィックスの方に話を戻しましょう。流体シミュレーションにも、「オイラーの方法」や「ラグランジュの方法」の様にいくつかのシミュレーション方法が存在しますが、大きく分けて、以下の3種類に大別する事ができます。
+漢字の意味合いから少し想像することができるかもしれませんが、格子法は「オイラーの方法」の様に、流れをシミュレーションする際に格子状の「場」を作り、時間で微分した際にその各格子がどういった速度になっているかをシミュレーションする手法をいいます。また粒子法は「ラグランジュの方法」の様に、その粒子の方に着目し、粒子自体の移流をシミュレーションする方法を言います。
格子法・粒子法と共に、お互いに得意不得意な範囲があります。
格子法は流体のシミュレーションにおいて、圧力・粘性・拡散等の計算は得意ですが、移流の計算が不得意です。
これとは逆に、粒子法は移流の計算が得意です。(これらの得意不得意は、オイラーの方法とラグランジュの方法の解析の仕方を思い浮かべると想像がつくかもしれません。)
これらを補う為に、FLIP法に代表される、格子法+粒子法と言った得意分野を補い合う手法も生まれています。
本稿ではSIGGRAPH 1999.で発表されたJon Stam氏の格子法における非圧縮性粘性流体シミュレーションの論文であるStable Fluidsを元に、流体シミュレーションの実装方法やシミュレーションにおける必要な数式の説明を行なっていきます。
+ +まずは、格子法におけるナビエ・ストークスの方程式について見ていきましょう。
+\dfrac {\partial \overrightarrow {u}} {\partial t}=-\left( \overrightarrow {u} \cdot \nabla \right) \overrightarrow {u} + \nu \nabla ^{2} \overrightarrow {u} + \overrightarrow{f}
+
+\dfrac {\partial \rho} {\partial t}=-\left( \overrightarrow {u} \cdot \nabla \right) \rho + \kappa \nabla ^{2} \rho + S
+
+\nabla \cdot \overrightarrow{u} = 0
+
+上記の内、一つ目の方程式は速度場、二つ目は密度場を表します。また、3つ目は「連続の式(質量保存則)」となります。これらの3つの式を一つずつ紐解いて見ましょう。
+ +まずは式としても短く、「非圧縮性」流体をシミュレーションする際の条件として働く「連続の式(質量保存則)」から紐解いて見ましょう。
流体をシミュレーションする際に、その対象が圧縮性か非圧縮性かを明確に区別する必要があります。例えば、気体等の密度が圧力によって変化する物が対象である場合は圧縮性流体となります。逆に、水などの密度がどの場所でも一定である物は、非圧縮性流体となります。
本章では非圧縮性流体のシミュレーションを取り扱いますので、速度場の各セルの発散は0に保つ必要があります。つまり、速度場の流入と流出を相殺させ、0になるように維持します。流入があれば流出させる為、流速は伝搬して行く事となります。この条件は連続の式(質量保存則)として、以下の方程式で表す事ができます。
\nabla \cdot \overrightarrow{u} = 0
+
+上記は「発散(ダイバージェンス)が0」であるという意味になります。まずは「発散(ダイバージェンス)」の数式を確認しておきましょう。
+ +\nabla \cdot \overrightarrow{u} = \nabla \cdot (u, v) = \dfrac{\partial u}{\partial x} + \dfrac{\partial v}{\partial y}
+
+\nabla(ナブラ演算子)はベクトル微分演算子といいます。例えばベクトル場が2次元と想定した場合に、図のように \left( \dfrac {\partial } {\partial x}_, \dfrac {\partial } {\partial y} \right) の偏微分を取る際の、偏微分の表記を簡略化した演算子として作用します。\nabla演算子は演算子ですので、それだけでは意味を持ちませんが、一緒に組み合わせる式が内積なのか、外積なのか、それとも単に\nabla fといった関数なのかで演算内容が変わってきます。
今回は偏微分の内積をとる「発散(ダイバージェンス)」について説明しておきましょう。まず、なぜこの式が「発散」という意味になるのかを見てみます。
発散を理解する為に、まずは下記のような格子空間の一つのセルを切り出して考えてみましょう。
+
++図4.1: ベクトル場から微分区間(Δx,Δy)のセルを抽出 +
+発散とは、ベクトル場の一つのセルにどれくらいのベクトルが流出、流入しているかを算出する事を言います。なお流出を+、流入を−とします。
+発散は上記のように、ベクトル場のセルを切り取った際の偏微分をみた際に、x方向の特定のポイントxと微量に進んだ\Delta xとの変化量、また、y方向の特定のポイントyと微量に進んだ\Delta yとの変化量の内積で求める事ができます。なぜ偏微分との内積で流出が求まるかは、上記の図を微分演算する事で証明できます。
+\frac{i(x + \Delta x, y)\Delta y - i(x,y)\Delta y + j(x, y + \Delta y)\Delta x - j(x,y)\Delta x }{\Delta x \Delta y}
+
+ = \frac{i(x+\Delta x, y) - i(x,y)}{\Delta x} + \frac{j(x, y+\Delta y) - j(x,y)}{\Delta y}
+
+上記の式から極限をとり、
+\lim_{\Delta x \to 0} \frac{i(x+\Delta x, y) - i(x,y)}{\Delta x} + \lim_{\Delta y \to 0} \frac{j(x,y+\Delta y) - j(x,y)}{\Delta y} = \dfrac {\partial i} {\partial x} + \dfrac {\partial j} {\partial y}
+
+とする事で、最終的に偏微分との内積の式と等式になる事がわかります。
+ +次に、格子法の本丸である速度場について説明していきます。その前に、速度場のナビエ・ストークス方程式を実装していくにあたって、先ほど確認した発散(divergence)に加え、勾配(gradient)とラプラシアン(Laplacian)について確認しておきましょう。
+ +\nabla f(x, y) = \left( \dfrac{\partial f}{\partial x}_,\dfrac{\partial f}{\partial y}\right)
+
+\nabla f (grad \ f)は勾配を求める式となります。意味としては、各偏微分方向に微小に進んだ座標を、関数fにてサンプリングし、求められた各偏微分方向の値を合成する事によって、最終的にどのベクトルを向くのかを意味しています。つまり、偏微分した際の値の大きい方向に向いたベクトルを算出する事ができます。
+ +\Delta f = \nabla^2 f = \nabla \cdot \nabla f = \frac{\partial^2 f}{\partial x^2} + \frac{\partial^2 f}{\partial y^2}
+
+ラプラシアンはナブラを上下反転させた記号で表されます。(デルタと同じですが、文脈から読み取り、間違えないようにしましょう。)
\nabla^2 f、もしくは\nabla \cdot \nabla fとも書き、二階偏微分として演算されます。
また、解体して考えると、関数の勾配をとって、発散を求めた形とも取れるでしょう。
意味合い的に考えると、ベクトル場の中で勾配方向に集中した箇所は流入が多い為、発散をとった場合−に、逆に勾配の低い箇所は湧き出しが多いので発散を取った時に+になる事が想像できます。
ラプラシアン演算子にはスカラーラプラシアンとベクトルラプラシアンがあり、ベクトル場に作用させる場合は、勾配・発散・回転(∇とベクトルの外積)を用いた、
\nabla^2 \overrightarrow{u} = \nabla \nabla \cdot \overrightarrow{u} - \nabla \times \nabla \times \overrightarrow{u}
+
+といった式で導くのですが、直交座標系の場合のみ、ベクトルの成分毎に勾配と発散を求め、合成する事で求める事ができます。
+\nabla^2 \overrightarrow{u} = \left(
+\dfrac{\partial ^2 u_x}{\partial x^2}+\dfrac{\partial ^2 u_x}{\partial y^2}+\dfrac{\partial ^2 u_x}{\partial z^2}_,
+\dfrac{\partial ^2 u_y}{\partial x^2}+\dfrac{\partial ^2 u_y}{\partial y^2}+\dfrac{\partial ^2 u_y}{\partial z^2}_,
+\dfrac{\partial ^2 u_z}{\partial x^2}+\dfrac{\partial ^2 u_z}{\partial y^2}+\dfrac{\partial ^2 u_z}{\partial z^2}
+\right)
+
+以上で、格子法でのナビエ・ストークス方程式を解くための必要な数式の確認は完了しました。ここから、速度場の方程式を各項ごとに見ていきましょう。
+ +\dfrac {\partial \overrightarrow {u}} {\partial t}=-\left( \overrightarrow {u} \cdot \nabla \right) \overrightarrow {u} + \nu \nabla ^{2} \overrightarrow {u} + \overrightarrow {f}
+
+上記の内、\overrightarrow {u}は流速、\nuは動粘性係数(kinematic viscosity)、\overrightarrow{f}は外力(force)になります。
左辺側は時間で偏微分をとった際の流速である事がわかります。右辺側は第一項を移流項、第二項を拡散粘性項、第三項を圧力項、第四項を外力項とします。
これらは、計算時には一括でできるものであっても、実装時にはステップに分けて実装して行く必要があります。
まず、ステップとして、外力を受けなければ、初期条件のまま変化を起こす事ができませんので、第四項の外力項から考えて見たいと思います。
これはシンプルに外部からのベクトルを加算する部分となります。つまり初期条件で速度場がベクトル量0の状態に対し、ベクトルの起点としてUIであったりなんらかのイベントから、RWTexture2Dの該当IDにベクトルを加算する部分となります。
コンピュートシェーダーの外力項のカーネルは、以下の様に実装しておきます。また、コンピュートシェーダーにて使用予定の各係数やバッファの定義も記述しておきます。
float visc; //動粘性係数
+float dt; //デルタタイム
+float velocityCoef; //速度場外力係数
+float densityCoef; //密度場外圧係数
+
+//xy = velocity, z = density, 描画シェーダに渡す流体ソルバー
+RWTexture2D<float4> solver;
+//density field, 密度場
+RWTexture2D<float> density;
+//velocity field, 速度場
+RWTexture2D<float2> velocity;
+//xy = pre vel, z = pre dens. when project, x = p, y = div
+//1ステップ前のバッファ保存、及び質量保存時の一時バッファ
+RWTexture2D<float3> prev;
+//xy = velocity source, z = density source 外力入力バッファ
+Texture2D source;
+
+[numthreads(THREAD_X, THREAD_Y, THREAD_Z)]
+void AddSourceVelocity(uint2 id : SV_DispatchThreadID)
+{
+ uint w, h;
+ velocity.GetDimensions(w, h);
+
+ if (id.x < w && id.y < h)
+ {
+ velocity[id] += source[id].xy * velocityCoef * dt;
+ prev[id] = float3(source[id].xy * velocityCoef * dt, prev[id].z);
+ }
+}
+
+次のステップとして、第二項の拡散粘性項を実装します。
+ +\nu \nabla ^{2} \overrightarrow {u}
+
+\nabla演算子や\Delta演算子の左右に値がある時には、「右の要素にのみ作用する」というルールがありますので、この場合、動粘性係数は一旦置いておいて、ベクトルラプラシアンの部分を先に考えます。
流速\overrightarrow{u}に対してベクトルラプラシアンで、ベクトルの各成分毎の勾配と発散をとり合成させ、流速を隣接へ拡散させています。そこに動粘性係数を乗算する事によって、拡散の勢いを調整します。
ここでは流速の各成分の勾配を取った上に拡散させていますので、隣接からの流入も隣接への流出も起こり、ステップ1で受けたベクトルが隣接へと影響していくという現象が分かるかと思います。
実装面においては、少し工夫が必要となります。数式通りに実装すると、粘性係数と微分時間・格子数を乗算させた拡散率が高くなってしまった場合に、振動が起こり、収束が取れず最後にはシミュレーション自体が発散してしまいます。
拡散をStableな状態にする為に、ここではガウス・ザイデル法やヤコビ法、SOR法等の反復法が用いられます。ここではガウス・ザイデル法でシミュレーションしてみましょう。
ガウス・ザイデル法とは、式を自セルに対する未知数からなる線形方程式に変換し、算出された値をすぐに次の反復時に使い、連鎖させることで近似の答えに収束させていく方法です。反復回数は多ければ多いほど正確な値へと収束していきますが、リアルタイムレンダリングにおけるグラフィックスで必要なのは、正確な結果ではなく、より良いフレームレートと見た目の美しさですので、イテレーション回数はマシンパフォーマンスや見た目を考慮し、調整しましょう。
#define GS_ITERATE 4
+
+[numthreads(THREAD_X, THREAD_Y, THREAD_Z)]
+void DiffuseVelocity(uint2 id : SV_DispatchThreadID)
+{
+ uint w, h;
+ velocity.GetDimensions(w, h);
+
+ if (id.x < w && id.y < h)
+ {
+ float a = dt * visc * w * h;
+
+ [unroll]
+ for (int k = 0; k < GS_ITERATE; k++) {
+ velocity[id] = (prev[id].xy + a * (
+ velocity[int2(id.x - 1, id.y)] +
+ velocity[int2(id.x + 1, id.y)] +
+ velocity[int2(id.x, id.y - 1)] +
+ velocity[int2(id.x, id.y + 1)]
+ )) / (1 + 4 * a);
+ SetBoundaryVelocity(id, w, h);
+ }
+ }
+}
+
+上記のSetBoundaryVelocity関数は境界用のメソッドになります。詳しくはリポジトリをご参照下さい。
+ +\nabla \cdot \overrightarrow{u} = 0
+
+ここで一旦、項を進める前に質量保存側に立ち返りましょう。これまでの工程で、外力項で受けた力を速度場に拡散させましたが、現状、各セルの質量は保存されておらず、湧き出しっぱなしの場所と流入が多い場所とで、質量が保存されていない状態になっています。
上記の方程式の様に、質量は必ず保存させ各セルの発散を0に持っていかないといけませんから、ここで一旦質量を保存をしておきましょう。
なお、質量保存ステップをComputeShaderで行う際、隣接スレッドとの偏微分演算を行う為、場を確定しておかなければなりません。グループシェアードメモリ内で偏微分演算ができれば高速化が見込めたのですが、別のグループスレッドから偏微分を取った時に、やはり値が取得できず汚い結果となってしまった為、ここはバッファを確定しながら、3ステップに分け進めます。
速度場から発散算出 > Poisson方程式をガウス・ザイデル法で算出 > 速度場に減算させ質量保存
の3ステップにカーネルをわけ、場を確定しながら質量保存に持っていきます。なお、SetBound~系は境界に対するメソッドの呼び出しになります。
//質量保存Step1.
+//step1では、速度場から発散の算出
+[numthreads(THREAD_X, THREAD_Y, THREAD_Z)]
+void ProjectStep1(uint2 id : SV_DispatchThreadID)
+{
+ uint w, h;
+ velocity.GetDimensions(w, h);
+
+ if (id.x < w && id.y < h)
+ {
+ float2 uvd;
+ uvd = float2(1.0 / w, 1.0 / h);
+
+ prev[id] = float3(0.0,
+ -0.5 *
+ (uvd.x * (velocity[int2(id.x + 1, id.y)].x -
+ velocity[int2(id.x - 1, id.y)].x)) +
+ (uvd.y * (velocity[int2(id.x, id.y + 1)].y -
+ velocity[int2(id.x, id.y - 1)].y)),
+ prev[id].z);
+
+ SetBoundaryDivergence(id, w, h);
+ SetBoundaryDivPositive(id, w, h);
+ }
+}
+
+//質量保存Step2.
+//step2では、step1で求めた発散からPoisson方程式をガウス・ザイデル法で解く
+[numthreads(THREAD_X, THREAD_Y, THREAD_Z)]
+void ProjectStep2(uint2 id : SV_DispatchThreadID)
+{
+ uint w, h;
+
+ velocity.GetDimensions(w, h);
+
+ if (id.x < w && id.y < h)
+ {
+ for (int k = 0; k < GS_ITERATE; k++)
+ {
+ prev[id] = float3(
+ (prev[id].y + prev[uint2(id.x - 1, id.y)].x +
+ prev[uint2(id.x + 1, id.y)].x +
+ prev[uint2(id.x, id.y - 1)].x +
+ prev[uint2(id.x, id.y + 1)].x) / 4,
+ prev[id].yz);
+ SetBoundaryDivPositive(id, w, h);
+ }
+ }
+}
+
+//質量保存Step3.
+//step3で、∇・u = 0にする.
+[numthreads(THREAD_X, THREAD_Y, THREAD_Z)]
+void ProjectStep3(uint2 id : SV_DispatchThreadID)
+{
+ uint w, h;
+
+ velocity.GetDimensions(w, h);
+
+ if (id.x < w && id.y < h)
+ {
+ float velX, velY;
+ float2 uvd;
+ uvd = float2(1.0 / w, 1.0 / h);
+
+ velX = velocity[id].x;
+ velY = velocity[id].y;
+
+ velX -= 0.5 * (prev[uint2(id.x + 1, id.y)].x -
+ prev[uint2(id.x - 1, id.y)].x) / uvd.x;
+ velY -= 0.5 * (prev[uint2(id.x, id.y + 1)].x -
+ prev[uint2(id.x, id.y - 1)].x) / uvd.y;
+
+ velocity[id] = float2(velX, velY);
+ SetBoundaryVelocity(id, w, h);
+ }
+}
+
+これで速度場を質量保存がされた状態にできました。流出した箇所に流入がおき、流入が多い箇所からは流出がおきる為、流体らしい表現になりました。
+ +-\left( \overrightarrow {u} \cdot \nabla \right) \overrightarrow {u}
+
+移流項はラグランジュの方法的な手法が用いられるのですが、1ステップ前の速度場のバックトレースを行い、該当セルから速度ベクトルを引いた箇所の値を、現在いる場所に移動するといった作業を各セルに対して行います。バックトレースした際に、格子にぴったり収まる場所に遡れる訳ではありませんので、移流の際は近傍4セルとの線形補間を行い、正しい値を移流させます。
+[numthreads(THREAD_X, THREAD_Y, THREAD_Z)]
+void AdvectVelocity(uint2 id : SV_DispatchThreadID)
+{
+ uint w, h;
+ density.GetDimensions(w, h);
+
+ if (id.x < w && id.y < h)
+ {
+ int ddx0, ddx1, ddy0, ddy1;
+ float x, y, s0, t0, s1, t1, dfdt;
+
+ dfdt = dt * (w + h) * 0.5;
+
+ //バックトレースポイント割り出し.
+ x = (float)id.x - dfdt * prev[id].x;
+ y = (float)id.y - dfdt * prev[id].y;
+ //ポイントがシミュレーション範囲内に収まるようにクランプ.
+ clamp(x, 0.5, w + 0.5);
+ clamp(y, 0.5, h + 0.5);
+ //バックトレースポイントの近傍セル割り出し.
+ ddx0 = floor(x);
+ ddx1 = ddx0 + 1;
+ ddy0 = floor(y);
+ ddy1 = ddy0 + 1;
+ //近傍セルとの線形補間用の差分を取っておく.
+ s1 = x - ddx0;
+ s0 = 1.0 - s1;
+ t1 = y - ddy0;
+ t0 = 1.0 - t1;
+
+ //バックトレースし、1step前の値を近傍との線形補間をとって、現在の速度場に代入。
+ velocity[id] = s0 * (t0 * prev[int2(ddx0, ddy0)].xy +
+ t1 * prev[int2(ddx0, ddy1)].xy) +
+ s1 * (t0 * prev[int2(ddx1, ddy0)].xy +
+ t1 * prev[int2(ddx1, ddy1)].xy);
+ SetBoundaryVelocity(id, w, h);
+ }
+}
+
+次に密度場の方程式をみてみましょう。
+\dfrac {\partial \rho} {\partial t}=-\left( \overrightarrow {u} \cdot \nabla \right) \rho + \kappa \nabla ^{2} \rho + S
+
+上記の内、\overrightarrow {u}は流速、\kappaは拡散係数、ρは密度、Sは外圧になります。
密度場は必ずしも必要ではありませんが、速度場を求めた際の各ベクトルに対し、密度場で拡散させた画面上のピクセルを乗せる事で、溶けながら流れる様な、より流体らしい表現が可能になります。
尚、密度場の数式を見て気づいた方もいらっしゃるかと思いますが、速度場と全く同じフローになっており、違いはベクトルがスカラーになっている点と、動粘性係数\nuが拡散係数\kappaになっている点、質量保存則を用いない点の3点のみしかありません。
密度場は密度の変化の場ですので、非圧縮性である必要はなく、質量保存の必要がありません。また、動粘性係数と拡散係数は、係数としての使い所は同じになります。
ですので、先ほど速度場で用いたカーネルの質量保存則以外のカーネルを、次元を落として作ることによって、密度場を実装する事が可能です。紙面上密度場の解説はしませんが、リポジトリには密度場も実装しておりますので、そちらもご参照ください。
上記の速度場及び密度場、質量保存則を用いることによって流体をシミュレーションする事ができるのですが、シミュレーションのステップについて、最後に見ておきましょう。
+上記がStableFluidのシミュレーションステップになります。
+ +実行して、マウスでスクリーン上をドラッグすると、以下の様な流体シミュレーションを起こす事が可能です。
+
++図4.2: 実行例 +
+流体シミュレーションは、プリレンダリングと違い、Unityの様なリアルタイムゲームエンジンにとっては負荷の高い分野です。しかし、GPU演算能力の向上から、2次元であればある程度の解像度でも耐えうるFPSが出せる様になってきました。また、途中で出てきたGPUにとって負荷の高い演算部分、ガウス・ザイデル反復法を別の処理で実装してみたり、速度場自体をカールノイズで代用してみたり等の工夫をすれば、より軽い演算での流体表現も可能になる事でしょう。
+もしこの章をお読みいただいて、少しでも流体に興味を持たれた方は、ぜひ次章の「粒子法による流体シミュレーション」にもトライして見て下さい。格子法とはまた違った角度から流体に迫れますので、流体シミュレーションの奥深さや実装の面白さを体験できる事かと思います。
+ +こんにちは!すぎのひろのりです!では残念ながらありません。
+締め切りも近づいてきたある日、「すぎっちょ記事書いてる?」と問うたところ「あ!」とだけおっしゃり、どうやら完全に失念されておったようです。最近忙しそうなのですが、せっかくのこの機会、彼の実績を紹介したいのもあり、ここは簡単に代筆でお送りします。
+ +すぎっちょは制作物をGithubに積極的に公開しており、その中でも個人的に面白いなと思ったのがこちらです。
+https://github.com/sugi-cho/ProjectionSpray
+3Dモデルにスプレーを吹きかけるようにして色をつけることができます。
+ +
++図10.1: デモ画像1 +
+スプレーデバイスからスプレーが噴出し、ボディの表面に色が塗られます。
+
++図10.2: デモ画像2 +
+謎のフェチズムを感じます。
+
++図10.3: デモ画像3 +
+ステンシルのようなことも!
+
++図10.4: デモ画像4 +
+Unity!
+ +次回があれば、是非とも詳しい解説をお願いしたいものです。
+すぎっちょが、面接時、自分と、似たものを、感じる、と言っていた同僚の仲田さんのリポジトリも、Unityの便利で優れたコードが多数あげられておりおすすめです。
+ +失礼いたしました。
+(。・ˇ_ˇ・。)
+ + diff --git a/articles/takao.html b/articles/takao.html new file mode 100644 index 0000000..818cad9 --- /dev/null +++ b/articles/takao.html @@ -0,0 +1,703 @@ + + + + + + + +前章では、格子法による流体シミュレーションの作成方法について解説しました。本章では、もう一つの流体のシミュレーション方法である粒子法、特にSPH法を用いて流体の動きを表現していきます。多少噛み砕いて説明を行っているので、不十分な表現などありますがご了承ください。
+ +流体の動きの観測方法として、オイラー的視点とラグランジュ的視点というものが存在します。オイラー的視点とは、流体に等間隔で観測点を固定し、その観測点での流体の動きを解析するものです。一方、ラグランジュ的視点とは、流体の流れに沿って動く観測点を浮かべ、その観測点での流体の動きを観測するものとなります(図5.1参照)。基本的に、オイラー的視点を用いた流体シミュレーション手法のことを格子法、ラグランジュ的視点を用いた流体シミュレーション手法のことを粒子法と呼びます。
+
++図5.1: 左:オイラー的、右:ラグランジュ的 +
+オイラー的視点とラグランジュ的視点では、微分の演算の仕方が異なります。はじめに、オイラー的視点で表された物理量*1を以下に示してみます。
+[*1] 物理量とは、観測できる速度や質量などのことを指します。 端的には「単位が有るもの」と捉えて良いでしょう。
\phi = \phi (\overrightarrow{x}, t)
+
+これは、時刻tで位置\overrightarrow{x}にある物理量\phiという意味になります。この物理量の時間微分は、
+ \frac{\partial \phi}{\partial t}
+
+と表せます。もちろんこれは、物理量の位置が\overrightarrow{x}で固定されていますので、オイラー的視点での微分になります。
+[*2] 流れに沿った観測点の移動のことを、移流と呼びます。
一方、ラグランジュ的視点では、観測点を流れに沿って移動*2させますので、観測点自体も時間の関数となっています。そのため、初期状態で\overrightarrow{x}_0にあった観測点は、時刻tで
+ \overrightarrow{x}(\overrightarrow{x}_0, t)
+
+に存在します。 よって物理量の表記も
+ \phi = \phi (\overrightarrow{x}(\overrightarrow{x}_0, t), t)
+
+となります。微分の定義に従って、現在の物理量と\Delta t秒後の物理量の変化量を見てみると
+ \displaystyle \lim_{\Delta t \to 0} \frac{\phi(\overrightarrow{x}(\overrightarrow{x}_0, t + \Delta t), t + \Delta t) - \phi(\overrightarrow{x}(\overrightarrow{x}_0, t), t)}{\Delta t}
+
+ = \sum_i \frac{\partial \phi}{\partial x_i} \frac{\partial x_i}{\partial t} + \frac{\partial \phi}{\partial t}
+
+ = \left( \left( \begin{matrix}u_1\\u_2\\u_3\end{matrix} \right)
+ \cdot
+ \left( \begin{matrix} \frac{\partial}{\partial x_1}\\\frac{\partial}{\partial x_2}\\\frac{\partial}{\partial x_3} \end{matrix} \right)
+ + \frac{\partial}{\partial t}
+ \right) \phi\\
+
+ = (\frac{\partial}{\partial t} + \overrightarrow{u} \cdot {grad}) \phi
+
+となります。これが、観測点の移動を考慮した物理量の時間微分となります。しかしながら、この表記を用いていては式が複雑になりますので、
+ \dfrac{D}{Dt} := \frac{\partial}{\partial t} + \overrightarrow{u} \cdot {grad}
+
+という演算子を導入することで、短く表すことができます。これら、観測点の移動を考慮した一連の操作を、ラグランジュ微分と呼びます。一見複雑そうに見えますが、観測点が移動する粒子法では、ラグランジュ的視点で式を表した方が都合が良くなります。
+ +流体は、流体の速度が音速よりも十分に小さい場合、体積の変化が起きないとみなすことができます。これは流体の非圧縮条件と呼ばれ、以下の数式で表されます。
+ \nabla \cdot \overrightarrow{u} = 0
+
+これは、流体内で湧き出しや消失がないことを示しています。この式の導出には少し複雑な積分が入りますので、説明は割愛*3します。「流体は圧縮しない!」程度に捉えておいてください。
+[*3] "Fluid Simulation for Computer Graphics - Robert Bridson" で詳しく解説されています。
粒子法では、流体を小さな粒子によって分割し、ラグランジュ的視点で流体の動きを観測します。この粒子が、前節の観測点にあたります。 一口に「粒子法」といっても、現在では多くの手法が提案されており、有名なものとして
+などがあります。
+ +はじめに、粒子法におけるナビエ・ストークス方程式(以下NS方程式)は、以下のように記述されます。
+ \dfrac{D \overrightarrow{u}}{Dt} = -\dfrac{1}{\rho}\nabla p + \nu \nabla \cdot \nabla \overrightarrow{u} + \overrightarrow{g}
+ \label{eq:navier}
+
+前章の格子法で出てきたNS方程式とは少し形が異なりますね。移流項がまるまる抜けてしまっていますが、先程のオイラー微分とラグランジュ微分の関係を見てみると、うまくこの形に変形できることがわかります。粒子法では観測点を流れに沿って移動させますから、NS方程式計算時に移流項を考慮する必要がありません。移流の計算はNS方程式で算出した加速度をもとに粒子位置を直接更新することで済ませる事ができます。
+現実の流体は分子の集まりですので、ある種のパーティクルシステムであると言うことができます。しかし、コンピュータで実際の分子の数の計算を行うのは不可能ですので、計算可能な大きさに調節してあげる必要があります。図5.2に示されているそれぞれの粒(*4)は、計算可能な大きさで分割した流体の一部分を表していています。これらの粒は、それぞれ質量m、位置ベクトル\overrightarrow{x}、速度ベクトル\overrightarrow{u}、体積Vを持つと考えることができます。
+
++図5.2: 流体のパーティクル近似 +
+これらそれぞれの粒について、外から受けた力\overrightarrow{f}を計算し、運動方程式m \overrightarrow{a} = \overrightarrow{f}を解くことで加速度が算出され、次のタイムステップでどのように移動するかを決めることができます。
+[*4] 英語では'Blob'と呼ばれます
前述の通り、それぞれの粒子は周りから何らかの力を受けて動きますが、その「力」とは一体何でしょうか。簡単な例として、重力m \overrightarrow{g}があげられますが、それ以外に周りの粒子からも何らかの力を受けるはずです。これらの力について、以下に解説します。
+ +流体粒子にかかる力の1つ目は、圧力です。流体は必ず圧力の高い方から低い方に向かって流れます。もし圧力がどの方向からも同じだけかかっていたとすると、力は打ち消されて動きが止まってしまいますから、圧力のバランスが不均一である場合を考えます。前章で述べられたように、圧力のスカラー場の勾配を取ることで、自分の粒子位置から見て最も圧力上昇率の高い方向を算出することができます。粒子が力を受ける方向は、圧力の高い方から低い方ですので、マイナスを取って-\nabla pとなります。また、粒子は体積を持っていますから、粒子にかかる圧力は、-\nabla pに粒子の体積をかけて算出します*5。最終的に、- V \nabla pという結果が導出されます。
+[*5] 流体の非圧縮条件により、単に体積をかけるだけで粒子にかかる圧力の積分を表すことができます。
流体粒子にかかる力の2つ目は、粘性力です。粘性(ねばりけ)のある流体とは、はちみつや溶かしたチョコレートなどに代表される、変形しづらい流体のことを指します。粘性があるという言葉を粒子法の表現に当てはめてみると、粒子の速度は、周りの粒子速度の平均をとりやすいということになります。前章で述べられた通り、周囲の平均をとるという演算は、ラプラシアンを用いて行うことができます。
+粘性の度合いを動粘性係数\muを用いて表すと、\mu \nabla \cdot \nabla \overrightarrow{u}と表す事ができます。
+ +これらの力を運動方程式m \overrightarrow{a} = \overrightarrow{f}に当てはめて整理すると、
+ m \dfrac{D\overrightarrow{u}}{Dt} = - V \nabla p + V \mu \nabla \cdot \nabla \overrightarrow{u} + m\overrightarrow{g}
+
+ここで、mは\rho Vであることから、変形して(Vが打ち消されます)
+ \rho \dfrac{D\overrightarrow{u}}{Dt} = - \nabla p + \mu \nabla \cdot \nabla \overrightarrow{u} + \rho \overrightarrow{g}
+
+両辺\rhoで割り、
+ \dfrac{D\overrightarrow{u}}{Dt} = - \dfrac{1}{\rho}\nabla p + \dfrac{\mu}{\rho} \nabla \cdot \nabla \overrightarrow{u} + \overrightarrow{g}
+
+最後に、粘性項の係数\dfrac{\mu}{\rho}に\nuを導入して、
+ \dfrac{D\overrightarrow{u}}{Dt} = - \dfrac{1}{\rho}\nabla p + \nu \nabla \cdot \nabla \overrightarrow{u} + \overrightarrow{g}
+
+となり、はじめに挙げたNS方程式を導出することができました。
+ +粒子法では、粒子自体が流体の観測点を表現しているので、移流項の計算は単に粒子位置を移動させるだけで完了します。実際の時間微分の計算では、無限に小さい時間を用いますが、コンピューターでの計算では無限を表現できないため、十分小さい時間\Delta tを用いて微分を表現します。これを差分と言い、\Delta tを小さくすればするほど、正確な計算を行うことができます。
+加速度について、差分の表現を導入すると、
+ \overrightarrow{a} = \dfrac{D\overrightarrow{u}}{Dt} \equiv \frac{\Delta \overrightarrow{u}}{\Delta t}
+
+となります。よって速度の増分\Delta \overrightarrow{u}は、
+\Delta \overrightarrow{u} = \Delta t \overrightarrow{a}
+
+となり、また、位置の増分についても同様に、
+ \overrightarrow{u} = \frac{\partial \overrightarrow{x}}{\partial t} \equiv \frac{\Delta \overrightarrow{x}}{\Delta t}
+
+より、
+\Delta \overrightarrow{x} = \Delta t \overrightarrow{u}
+
+となります。
+この結果を利用することで、次のフレームでの速度ベクトルと位置ベクトルを算出できます。現在のフレームでの粒子速度が\overrightarrow{u}_nであるとすると、次のフレームでの粒子速度は\overrightarrow{u}_{n+1}で、
+\overrightarrow{u}_{n+1} = \overrightarrow{u}_n + \Delta \overrightarrow{u} = \overrightarrow{u}_n + \Delta t \overrightarrow{a}
+
+と表せます。
+現在のフレームでの粒子位置が\overrightarrow{x}_nであるとすると、次のフレームでの粒子位置は\overrightarrow{x}_{n+1}で、
+\overrightarrow{x}_{n+1} = \overrightarrow{x}_n + \Delta \overrightarrow{x} = \overrightarrow{x}_n + \Delta t \overrightarrow{u}
+
+と表せます。
+この手法は、前進オイラー法と呼ばれます。これを毎フレーム繰り返すことで、各時刻での粒子の移動を表現することができます。
+ +前節では、粒子法におけるNS方程式の導出方法について解説しました。もちろん、これらの微分方程式をコンピュータでそのまま解くことはできませんので、何らかの近似をしてあげる必要があります。その手法として、CG分野でよく用いられるSPH法について解説します。
+SPH法は、本来宇宙物理学における天体同士の衝突シミュレーションに用いられていた手法ですが、1996年にDesbrunら*6によってCGにおける流体シミュレーションにも応用されました。また、並列化も容易で、現在のGPUでは大量の粒子の計算をリアルタイムに行うことが可能です。コンピュータシミュレーションでは、連続的な物理量を離散化して計算を行う必要がありますが、この離散化を、重み関数と呼ばれる関数を用いて行う手法をSPH法と呼びます。
+[*6] Desbrun and Cani, Smoothed Particles: A new paradigm for animating highly deformable bodies, Eurographics Workshop on Computer Animation and Simulation (EGCAS), 1996.
SPH法では、粒子一つ一つが影響範囲を持っていて、他の粒子と距離が近いほどその粒子の影響が大きく受けるという動作をします。この影響範囲を図示すると図5.3のようになります。
+
++図5.3: 2次元の重み関数 +
+この関数を重み関数*7と呼びます。
+[*7] 通常この関数はカーネル関数とも呼ばれますが、ComputeShaderにおけるカーネル関数と区別するためこの呼び方にしています。
SPH法における物理量を\phiとすると、重み関数を用いて以下のように離散化されます。
+ \phi(\overrightarrow{x}) = \sum_{j \in N}m_j\frac{\phi_j}{\rho_j}W(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+N, m, \rho, hはそれぞれ、近傍粒子の集合、粒子の質量、粒子の密度、重み関数の影響半径です。また、関数Wが先ほど述べた重み関数になります。
+さらに、この物理量には、勾配とラプラシアンなどの偏微分演算が適用でき、勾配は、
+ \nabla \phi(\overrightarrow{x}) = \sum_{j \in N}m_j\frac{\phi_j}{\rho_j} \nabla W(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+ラプラシアンは、
+ \nabla^2 \phi(\overrightarrow{x}) = \sum_{j \in N}m_j\frac{\phi_j}{\rho_j} \nabla^2 W(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+と表せます。式からわかるように、物理量の勾配及びラプラシアンは、重み関数に対してのみ適用されるイメージになります。重み関数Wは、求めたい物理量によって異なるものを使用しまが、この理由の説明については割愛*8します。
+[*8] "CGのための物理シミュレーションの基礎 - 藤澤誠" で詳しく解説されています。
流体の粒子の密度は、先ほどの重み関数で離散化した物理量の式を利用して、
+ \rho(\overrightarrow{x}) = \sum_{j \in N}m_jW_{poly6}(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+と与えられます。ここで、利用する重み関数Wは、以下で与えられます。
+
++図5.4: Poly6重み関数 +
+粘性項を離散化も密度の場合と同様重み関数を利用して、
+ f_{i}^{visc} = \mu\nabla^2\overrightarrow{u}_i = \mu \sum_{j \in N}m_j\frac{\overrightarrow{u}_j - \overrightarrow{u}_i}{\rho_j} \nabla^2 W_{visc}(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+と表されます。ここで、重み関数のラプラシアン\nabla^2 W_{visc}は、以下で与えられます。
+
++図5.5: Viscosity重み関数のラプラシアン +
+同様に、圧力項を離散化していきます。
+ f_{i}^{press} = - \frac{1}{\rho_i} \nabla p_i = - \frac{1}{\rho_i} \sum_{j \in N}m_j\frac{p_j - p_i}{2\rho_j} \nabla W_{spiky}(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+ここで、重み関数の勾配W_{spiky}は以下で与えられます。
+
++図5.6: Spiky重み関数の勾配 +
+この時、粒子の圧力は事前に、Tait方程式と呼ばれる、
+ p = B\left\{\left(\frac{\rho}{\rho_0}\right)^\gamma - 1\right\}
+
+で算出されています。 ここで、Bは気体定数です。非圧縮性を保証するためには、本来ポアソン方程式を解かなければならないのですが、リアルタイム計算には向きません。その代わりSPH法*9では、近似的に非圧縮性を確保する点で格子法よりも圧力項の計算が苦手であるといわれます。
+[*9] Tait方程式を用いた圧力計算を行うSPH法を、特別にWCSPH法と呼びます。
サンプルはこちらのリポジトリ(https://github.com/IndieVisualLab/UnityGraphicsProgramming)のAssets/SPHFluid以下に掲載しています。今回の実装では、極力シンプルにSPHの手法を解説するために高速化や数値安定性は考慮していませんのでご了承ください。
+ +シミュレーションに使用する諸々のパラメータの説明については、コード内コメントに記載しています。
+リスト5.1: シミュレーションに使用するパラメータ(FluidBase.cs)
+1: NumParticleEnum particleNum = NumParticleEnum.NUM_8K; // 粒子数 + 2: float smoothlen = 0.012f; // 粒子半径 + 3: float pressureStiffness = 200.0f; // 圧力項係数 + 4: float restDensity = 1000.0f; // 静止密度 + 5: float particleMass = 0.0002f; // 粒子質量 + 6: float viscosity = 0.1f; // 粘性係数 + 7: float maxAllowableTimestep = 0.005f; // 時間刻み幅 + 8: float wallStiffness = 3000.0f; // ペナルティ法の壁の力 + 9: int iterations = 4; // イテレーション回数 +10: Vector2 gravity = new Vector2(0.0f, -0.5f); // 重力 +11: Vector2 range = new Vector2(1, 1); // シミュレーション空間 +12: bool simulate = true; // 実行 or 一時停止 +13: +14: int numParticles; // パーティクルの個数 +15: float timeStep; // 時間刻み幅 +16: float densityCoef; // Poly6カーネルの密度係数 +17: float gradPressureCoef; // Spikyカーネルの圧力係数 +18: float lapViscosityCoef; // Laplacianカーネルの粘性係数 ++
今回のデモシーンでは、コードに記載されているパラメータの初期化値とは異なる値をインスペクタで設定していますので注意してください。
+ +重み関数の係数はシミュレーション中で変化しないため、初期化時にCPU側で計算しておきます。(ただし、実行途中でパラメータを編集する可能性も踏まえてUpdate関数内で更新しています)
+今回、粒子ごとの質量はすべて一定にしているので、物理量の式内にある質量mはシグマの外に出て以下になります。
+ \phi(\overrightarrow{x}) = m \sum_{j \in N}\frac{\phi_j}{\rho_j}W(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+そのため、係数計算の中に質量を含めてしまうことができます。
+重み関数の種類で係数も変化してきますから、それぞれに関して係数を計算します。
+リスト5.2: 重み関数の係数の事前計算(FluidBase.cs)
+1: densityCoef = particleMass * 4f / (Mathf.PI * Mathf.Pow(smoothlen, 8)); + 2: gradPressureCoef + 3: = particleMass * -30.0f / (Mathf.PI * Mathf.Pow(smoothlen, 5)); + 4: lapViscosityCoef + 5: = particleMass * 20f / (3 * Mathf.PI * Mathf.Pow(smoothlen, 5)); ++
最終的に、これらのCPU側で計算した係数(及び各種パラメータ)をGPU側の定数バッファに格納します。
+リスト5.3: ComputeShaderの定数バッファに値を転送する(FluidBase.cs)
+ 1: fluidCS.SetInt("_NumParticles", numParticles);
+ 2: fluidCS.SetFloat("_TimeStep", timeStep);
+ 3: fluidCS.SetFloat("_Smoothlen", smoothlen);
+ 4: fluidCS.SetFloat("_PressureStiffness", pressureStiffness);
+ 5: fluidCS.SetFloat("_RestDensity", restDensity);
+ 6: fluidCS.SetFloat("_Viscosity", viscosity);
+ 7: fluidCS.SetFloat("_DensityCoef", densityCoef);
+ 8: fluidCS.SetFloat("_GradPressureCoef", gradPressureCoef);
+ 9: fluidCS.SetFloat("_LapViscosityCoef", lapViscosityCoef);
+10: fluidCS.SetFloat("_WallStiffness", wallStiffness);
+11: fluidCS.SetVector("_Range", range);
+12: fluidCS.SetVector("_Gravity", gravity);
+
+リスト5.4: ComputeShaderの定数バッファ(SPH2D.compute)
+1: int _NumParticles; // 粒子数 + 2: float _TimeStep; // 時間刻み幅(dt) + 3: float _Smoothlen; // 粒子半径 + 4: float _PressureStiffness; // Beckerの係数 + 5: float _RestDensity; // 静止密度 + 6: float _DensityCoef; // 密度算出時の係数 + 7: float _GradPressureCoef; // 圧力算出時の係数 + 8: float _LapViscosityCoef; // 粘性算出時の係数 + 9: float _WallStiffness; // ペナルティ法の押し返す力 +10: float _Viscosity; // 粘性係数 +11: float2 _Gravity; // 重力 +12: float2 _Range; // シミュレーション空間 +13: +14: float3 _MousePos; // マウス位置 +15: float _MouseRadius; // マウスインタラクションの半径 +16: bool _MouseDown; // マウスが押されているか ++
リスト5.5: 密度の計算を行うカーネル関数(SPH2D.compute)
+ 1: [numthreads(THREAD_SIZE_X, 1, 1)]
+ 2: void DensityCS(uint3 DTid : SV_DispatchThreadID) {
+ 3: uint P_ID = DTid.x; // 現在処理しているパーティクルID
+ 4:
+ 5: float h_sq = _Smoothlen * _Smoothlen;
+ 6: float2 P_position = _ParticlesBufferRead[P_ID].position;
+ 7:
+ 8: // 近傍探索(O(n^2))
+ 9: float density = 0;
+10: for (uint N_ID = 0; N_ID < _NumParticles; N_ID++) {
+11: if (N_ID == P_ID) continue; // 自身の参照回避
+12:
+13: float2 N_position = _ParticlesBufferRead[N_ID].position;
+14:
+15: float2 diff = N_position - P_position; // 粒子距離
+16: float r_sq = dot(diff, diff); // 粒子距離の2乗
+17:
+18: // 半径内に収まっていない粒子は除外
+19: if (r_sq < h_sq) {
+20: // 計算には2乗しか含まれないのでルートをとる必要なし
+21: density += CalculateDensity(r_sq);
+22: }
+23: }
+24:
+25: // 密度バッファを更新
+26: _ParticlesDensityBufferWrite[P_ID].density = density;
+27: }
+
+本来であれば粒子を全数調査せず、適切な近傍探索アルゴリズムを用いて近傍粒子を探す必要がありますが、今回の実装では簡単のために全数調査を行っています(10行目のforループ)。また、自分と相手粒子との距離計算を行うため、11行目で自身の粒子同士で計算を行うのを回避しています。
+重み関数の有効半径hによる場合分けは19行目のif文で実現します。密度の足し合わせ(シグマの計算)は、9行目で0で初期化しておいた変数に対してシグマ内部の計算結果を加算していくことで実現します。ここで、もう一度密度の計算式を示します。
+ \rho(\overrightarrow{x}) = \sum_{j \in N}m_jW_{poly6}(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+密度の計算は上式のとおり、Poly6重み関数を用います。 Poly6重み関数はリスト5.6で計算します。
+リスト5.6: 密度の計算(SPH2D.compute)
+ 1: inline float CalculateDensity(float r_sq) {
+ 2: const float h_sq = _Smoothlen * _Smoothlen;
+ 3: return _DensityCoef * (h_sq - r_sq) * (h_sq - r_sq) * (h_sq - r_sq);
+ 4: }
+
+最終的にリスト5.5の25行目で書き込み用バッファに書き込みます。
+ +リスト5.7: 粒子毎の圧力を計算する重み関数(SPH2D.compute)
+ 1: [numthreads(THREAD_SIZE_X, 1, 1)]
+ 2: void PressureCS(uint3 DTid : SV_DispatchThreadID) {
+ 3: uint P_ID = DTid.x; // 現在処理しているパーティクルID
+ 4:
+ 5: float P_density = _ParticlesDensityBufferRead[P_ID].density;
+ 6: float P_pressure = CalculatePressure(P_density);
+ 7:
+ 8: // 圧力バッファを更新
+ 9: _ParticlesPressureBufferWrite[P_ID].pressure = P_pressure;
+10: }
+
+圧力項を解く前に、粒子単位の圧力を算出しておき、後の圧力項の計算コストを下げます。先程も述べましたが、圧力の計算では本来、以下の式のようなポアソン方程式と呼ばれる方程式を解く必要があります。
+ \nabla^2 p = \rho \frac{\nabla \overrightarrow{u}}{\Delta t}
+
+しかし、コンピュータで正確にポアソン方程式を解く操作は非常に計算コストが高いため、以下のTait方程式を用いて近似的に求めます。
+ p = B\left\{\left(\frac{\rho}{\rho_0}\right)^\gamma - 1\right\}
+
+リスト5.8: Tait方程式の実装(SPH2D.compute)
+ 1: inline float CalculatePressure(float density) {
+ 2: return _PressureStiffness * max(pow(density / _RestDensity, 7) - 1, 0);
+ 3: }
+
+リスト5.9: 圧力項・粘性項を計算するカーネル関数(SPH2D.compute)
+ 1: [numthreads(THREAD_SIZE_X, 1, 1)]
+ 2: void ForceCS(uint3 DTid : SV_DispatchThreadID) {
+ 3: uint P_ID = DTid.x; // 現在処理しているパーティクルID
+ 4:
+ 5: float2 P_position = _ParticlesBufferRead[P_ID].position;
+ 6: float2 P_velocity = _ParticlesBufferRead[P_ID].velocity;
+ 7: float P_density = _ParticlesDensityBufferRead[P_ID].density;
+ 8: float P_pressure = _ParticlesPressureBufferRead[P_ID].pressure;
+ 9:
+10: const float h_sq = _Smoothlen * _Smoothlen;
+11:
+12: // 近傍探索(O(n^2))
+13: float2 press = float2(0, 0);
+14: float2 visco = float2(0, 0);
+15: for (uint N_ID = 0; N_ID < _NumParticles; N_ID++) {
+16: if (N_ID == P_ID) continue; // 自身を対象とした場合スキップ
+17:
+18: float2 N_position = _ParticlesBufferRead[N_ID].position;
+19:
+20: float2 diff = N_position - P_position;
+21: float r_sq = dot(diff, diff);
+22:
+23: // 半径内に収まっていない粒子は除外
+24: if (r_sq < h_sq) {
+25: float N_density
+26: = _ParticlesDensityBufferRead[N_ID].density;
+27: float N_pressure
+28: = _ParticlesPressureBufferRead[N_ID].pressure;
+29: float2 N_velocity
+30: = _ParticlesBufferRead[N_ID].velocity;
+31: float r = sqrt(r_sq);
+32:
+33: // 圧力項
+34: press += CalculateGradPressure(...);
+35:
+36: // 粘性項
+37: visco += CalculateLapVelocity(...);
+38: }
+39: }
+40:
+41: // 統合
+42: float2 force = press + _Viscosity * visco;
+43:
+44: // 加速度バッファの更新
+45: _ParticlesForceBufferWrite[P_ID].acceleration = force / P_density;
+46: }
+
+圧力項、粘性項の計算も、密度の計算方法と同様に行います。
+初めに、以下の圧力項による力の計算を31行目にて行っています。
+ f_{i}^{press} = - \frac{1}{\rho_i} \nabla p_i = - \frac{1}{\rho_i} \sum_{j \in N}m_j\frac{p_j - p_i}{2\rho_j} \nabla W_{press}(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+シグマの中身の計算は以下の関数で行われます。
+リスト5.10: 圧力項の要素の計算(SPH2D.compute)
+ 1: inline float2 CalculateGradPressure(...) {
+ 2: const float h = _Smoothlen;
+ 3: float avg_pressure = 0.5f * (N_pressure + P_pressure);
+ 4: return _GradPressureCoef * avg_pressure / N_density
+ 5: * pow(h - r, 2) / r * (diff);
+ 6: }
+
+次に、以下の粘性項による力の計算を34行目で行っています。
+ f_{i}^{visc} = \mu\nabla^2\overrightarrow{u}_i = \mu \sum_{j \in N}m_j\frac{\overrightarrow{u}_j - \overrightarrow{u}_i}{\rho_j} \nabla^2 W_{visc}(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+シグマの中身の計算は以下の関数で行われます。
+リスト5.11: 粘性項の要素の計算(SPH2D.compute)
+ 1: inline float2 CalculateLapVelocity(...) {
+ 2: const float h = _Smoothlen;
+ 3: float2 vel_diff = (N_velocity - P_velocity);
+ 4: return _LapViscosityCoef / N_density * (h - r) * vel_diff;
+ 5: }
+
+最後に、リスト5.9の39行目にて圧力項と粘性項で算出した力を足し合わせ、最終的な出力としてバッファに書き込んでいます。
+ +リスト5.12: 衝突判定と位置更新を行うカーネル関数(SPH2D.compute)
+ 1: [numthreads(THREAD_SIZE_X, 1, 1)]
+ 2: void IntegrateCS(uint3 DTid : SV_DispatchThreadID) {
+ 3: const unsigned int P_ID = DTid.x; // 現在処理しているパーティクルID
+ 4:
+ 5: // 更新前の位置と速度
+ 6: float2 position = _ParticlesBufferRead[P_ID].position;
+ 7: float2 velocity = _ParticlesBufferRead[P_ID].velocity;
+ 8: float2 acceleration = _ParticlesForceBufferRead[P_ID].acceleration;
+ 9:
+10: // マウスインタラクション
+11: if (distance(position, _MousePos.xy) < _MouseRadius && _MouseDown) {
+12: float2 dir = position - _MousePos.xy;
+13: float pushBack = _MouseRadius-length(dir);
+14: acceleration += 100 * pushBack * normalize(dir);
+15: }
+16:
+17: // 衝突判定を書くならここ -----
+18:
+19: // 壁境界(ペナルティ法)
+20: float dist = dot(float3(position, 1), float3(1, 0, 0));
+21: acceleration += min(dist, 0) * -_WallStiffness * float2(1, 0);
+22:
+23: dist = dot(float3(position, 1), float3(0, 1, 0));
+24: acceleration += min(dist, 0) * -_WallStiffness * float2(0, 1);
+25:
+26: dist = dot(float3(position, 1), float3(-1, 0, _Range.x));
+27: acceleration += min(dist, 0) * -_WallStiffness * float2(-1, 0);
+28:
+29: dist = dot(float3(position, 1), float3(0, -1, _Range.y));
+30: acceleration += min(dist, 0) * -_WallStiffness * float2(0, -1);
+31:
+32: // 重力の加算
+33: acceleration += _Gravity;
+34:
+35: // 前進オイラー法で次の粒子位置を更新
+36: velocity += _TimeStep * acceleration;
+37: position += _TimeStep * velocity;
+38:
+39: // パーティクルのバッファ更新
+40: _ParticlesBufferWrite[P_ID].position = position;
+41: _ParticlesBufferWrite[P_ID].velocity = velocity;
+42: }
+
+壁との衝突判定をペナルティ法を用いて行います(19-30行目)。ペナルティ法とは、境界位置からはみ出した分だけ強い力で押し返すという手法になります。
+本来は壁との衝突判定の前に障害物との衝突判定も行うのですが、今回の実装ではマウスとのインタラクションを行うようにしています(213-218行目)。マウスが押されていれば、指定された力でマウス位置から遠ざかるような力を加えています。
+33行目にて外力である重力を加算しています。重力の値をゼロにすると無重力状態になり、面白い視覚効果が得られます。また、位置の更新は前述の前進オイラー法で行い(36-37行目)、最終的な結果をバッファに書き込みます。
+ +リスト5.13: シミュレーションの主要関数(FluidBase.cs)
+ 1: private void RunFluidSolver() {
+ 2:
+ 3: int kernelID = -1;
+ 4: int threadGroupsX = numParticles / THREAD_SIZE_X;
+ 5:
+ 6: // Density
+ 7: kernelID = fluidCS.FindKernel("DensityCS");
+ 8: fluidCS.SetBuffer(kernelID, "_ParticlesBufferRead", ...);
+ 9: fluidCS.SetBuffer(kernelID, "_ParticlesDensityBufferWrite", ...);
+10: fluidCS.Dispatch(kernelID, threadGroupsX, 1, 1);
+11:
+12: // Pressure
+13: kernelID = fluidCS.FindKernel("PressureCS");
+14: fluidCS.SetBuffer(kernelID, "_ParticlesDensityBufferRead", ...);
+15: fluidCS.SetBuffer(kernelID, "_ParticlesPressureBufferWrite", ...);
+16: fluidCS.Dispatch(kernelID, threadGroupsX, 1, 1);
+17:
+18: // Force
+19: kernelID = fluidCS.FindKernel("ForceCS");
+20: fluidCS.SetBuffer(kernelID, "_ParticlesBufferRead", ...);
+21: fluidCS.SetBuffer(kernelID, "_ParticlesDensityBufferRead", ...);
+22: fluidCS.SetBuffer(kernelID, "_ParticlesPressureBufferRead", ...);
+23: fluidCS.SetBuffer(kernelID, "_ParticlesForceBufferWrite", ...);
+24: fluidCS.Dispatch(kernelID, threadGroupsX, 1, 1);
+25:
+26: // Integrate
+27: kernelID = fluidCS.FindKernel("IntegrateCS");
+28: fluidCS.SetBuffer(kernelID, "_ParticlesBufferRead", ...);
+29: fluidCS.SetBuffer(kernelID, "_ParticlesForceBufferRead", ...);
+30: fluidCS.SetBuffer(kernelID, "_ParticlesBufferWrite", ...);
+31: fluidCS.Dispatch(kernelID, threadGroupsX, 1, 1);
+32:
+33: SwapComputeBuffer(ref particlesBufferRead, ref particlesBufferWrite);
+34: }
+
+これまでに述べたComputeShaderのカーネル関数を、毎フレーム呼び出す部分です。それぞれのカーネル関数に対して適切なComputeBufferを与えてあげます。
+ここで、タイムステップ幅\Delta tを小さくすればするほどシミュレーションの誤差が出にくくなることを思い出してみてください。60FPSで実行する場合、\Delta t = 1 / 60となりますが、これでは誤差が大きく出てしまい粒子が爆発してしまいます。さらに、\Delta t = 1 / 60より小さいタイムステップ幅をとると、1フレーム当たりの時間の進み方が実時間より遅くなり、スローモーションになってしまいます。これを回避するには、\Delta t = 1 / (60 \times {iterarion})として、メインルーチンを1フレームにつきiterarion回回します。
+リスト5.14: 主要関数のイテレーション(FluidBase.cs)
+ 1: // 計算精度を上げるために時間刻み幅を小さくして複数回イテレーションする
+ 2: for (int i = 0; i<iterations; i++) {
+ 3: RunFluidSolver();
+ 4: }
+
+こうすることで、小さいタイムステップ幅で実時間のシミュレーションを行うことができます。
+ +通常のシングルアクセスのパーティクルシステムとは異なり、粒子同士が相互作用しますから、計算途中に他のデータが書き換わってしまっては困ります。これを回避するために、GPUで計算を行っている際に値を書き換えない読み込み用バッファと書き込み用バッファの2つを用意します。これらのバッファを毎フレーム入れ替えることで、競合なくデータを更新できます。
+リスト5.15: バッファを入れ替える関数(FluidBase.cs)
+ 1: void SwapComputeBuffer(ref ComputeBuffer ping, ref ComputeBuffer pong) {
+ 2: ComputeBuffer temp = ping;
+ 3: ping = pong;
+ 4: pong = temp;
+ 5: }
+
+リスト5.16: パーティクルのレンダリング(FluidRenderer.cs)
+ 1: void DrawParticle() {
+ 2:
+ 3: Material m = RenderParticleMat;
+ 4:
+ 5: var inverseViewMatrix = Camera.main.worldToCameraMatrix.inverse;
+ 6:
+ 7: m.SetPass(0);
+ 8: m.SetMatrix("_InverseMatrix", inverseViewMatrix);
+ 9: m.SetColor("_WaterColor", WaterColor);
+10: m.SetBuffer("_ParticlesBuffer", solver.ParticlesBufferRead);
+11: Graphics.DrawProcedural(MeshTopology.Points, solver.NumParticles);
+12: }
+
+10行目にて、流体粒子の位置計算結果を格納したバッファをマテリアルにセットし、シェーダーに転送します。11行目にて、パーティクルの個数分インスタンス描画をするよう命令しています。
+リスト5.17: パーティクルのレンダリング(Particle.shader)
+ 1: struct FluidParticle {
+ 2: float2 position;
+ 3: float2 velocity;
+ 4: };
+ 5:
+ 6: StructuredBuffer<FluidParticle> _ParticlesBuffer;
+ 7:
+ 8: // --------------------------------------------------------------------
+ 9: // Vertex Shader
+10: // --------------------------------------------------------------------
+11: v2g vert(uint id : SV_VertexID) {
+12:
+13: v2g o = (v2g)0;
+14: o.pos = float3(_ParticlesBuffer[id].position.xy, 0);
+15: o.color = float4(0, 0.1, 0.1, 1);
+16: return o;
+17: }
+
+1-6行目にて、流体粒子の情報を受け取るための情報の定義を行います。この時、スクリプトからマテリアルに転送したバッファの構造体と定義を一致させる必要があります。位置データの受け取りは、14行目のようにid : SV_VertexIDでバッファの要素を参照することで行います。
+あとは通常のパーティクルシステムと同様、図5.7のようにジオメトリシェーダーで計算結果の位置データを中心としたビルボード*10を作成し、粒子画像をアタッチしてレンダリングします。
+
++図5.7: ビルボードの作成 +
+[*10] 表が常に視点方向を向くPlaneのことを指します。
++図5.8: レンダリング結果 +
+動画はこちら(https://youtu.be/KJVu26zeK2w)に掲載しています。
+ +本章では、SPH法を用いた流体シミュレーションの手法を示しました。SPH法を用いることで、流体の動きをパーティクルシステムのように汎用的に扱うことができるようになりました。
+先述の通り、流体シミュレーションの手法はSPH法以外にもたくさんの種類があります。本章を通して、他の流体シミュレーション手法に加え、他の物理シミュレーション自体についても興味を持っていただき、表現の幅を広げていただければ幸いです。
+ + diff --git a/bake-translation.js b/bake-translation.js new file mode 100644 index 0000000..a2224c8 --- /dev/null +++ b/bake-translation.js @@ -0,0 +1,71 @@ +const fs = require('fs'); + +const puppeteer = require('puppeteer'); + + +async function autoScroll(page){ + await page.evaluate(async () => { + await new Promise((resolve, reject) => { + const scrollStep = 150; + const scrollInterval = 50; + + let totalHeight = 0; + const timer = setInterval(() => { + const scrollHeight = document.body.scrollHeight; + window.scrollBy(0, scrollStep); + totalHeight += scrollStep; + + if (totalHeight >= scrollHeight){ + clearInterval(timer); + resolve(); + } + }, scrollInterval); + }); + }); +} + + +(async () => { + const url = process.argv[2]; + const outputFile = process.argv[3] || 'test.html'; + + const browser = await puppeteer.launch({ headless: false }); + const page = await browser.newPage(); + + await page.goto(url); + await page.setViewport({ width: 1200, height: 800 }); + + await autoScroll(page); + + const html = await page.evaluate( + () => document.querySelector('html').outerHTML + .replace( + '', + `` + ) + .replace( + `var elem = new google.translate.TranslateElement({ + autoDisplay: false, + floatPosition: 0, + multilanguagePage: true, + pageLanguage: pageLang + }); + return elem;`, + '' + ) + .replace( + `var s = document.createElement('script'); + s.src = 'https://translate.google.com/translate_a/element.js?cb=' + + encodeURIComponent(cbId) + '&client=tee&hl=' + userLang; + document.getElementsByTagName('head')[0].appendChild(s);`, + '' + ) + ); + fs.writeFileSync(outputFile, html); + + await browser.close(); +})(); diff --git a/html-translated/Unity Graphics Programming Vol. 1.pdf b/html-translated/Unity Graphics Programming Vol. 1.pdf new file mode 100644 index 0000000..e385682 Binary files /dev/null and b/html-translated/Unity Graphics Programming Vol. 1.pdf differ diff --git a/html-translated/Unity Graphics Programming Vol. 2.pdf b/html-translated/Unity Graphics Programming Vol. 2.pdf new file mode 100644 index 0000000..99adee4 Binary files /dev/null and b/html-translated/Unity Graphics Programming Vol. 2.pdf differ diff --git a/html-translated/Unity Graphics Programming Vol. 3.pdf b/html-translated/Unity Graphics Programming Vol. 3.pdf new file mode 100644 index 0000000..01a8e07 Binary files /dev/null and b/html-translated/Unity Graphics Programming Vol. 3.pdf differ diff --git a/html-translated/Unity Graphics Programming Vol. 4.pdf b/html-translated/Unity Graphics Programming Vol. 4.pdf new file mode 100644 index 0000000..ae6139a Binary files /dev/null and b/html-translated/Unity Graphics Programming Vol. 4.pdf differ diff --git a/html-translated/UnityGraphicsProgrammingBook1.epub b/html-translated/UnityGraphicsProgrammingBook1.epub new file mode 100644 index 0000000..406ad4d Binary files /dev/null and b/html-translated/UnityGraphicsProgrammingBook1.epub differ diff --git a/html-translated/UnityGraphicsProgrammingBook2.epub b/html-translated/UnityGraphicsProgrammingBook2.epub new file mode 100644 index 0000000..3ceb834 Binary files /dev/null and b/html-translated/UnityGraphicsProgrammingBook2.epub differ diff --git a/html-translated/UnityGraphicsProgrammingBook3.epub b/html-translated/UnityGraphicsProgrammingBook3.epub new file mode 100644 index 0000000..0afcfed Binary files /dev/null and b/html-translated/UnityGraphicsProgrammingBook3.epub differ diff --git a/html-translated/UnityGraphicsProgrammingBook4.epub b/html-translated/UnityGraphicsProgrammingBook4.epub new file mode 100644 index 0000000..e5635f2 Binary files /dev/null and b/html-translated/UnityGraphicsProgrammingBook4.epub differ diff --git a/html-translated/index.html b/html-translated/index.html new file mode 100644 index 0000000..2f6f502 --- /dev/null +++ b/html-translated/index.html @@ -0,0 +1,19 @@ +Procedural Modeling is a technique for building 3D models using rules. Modeling generally refers to using modeling software such as Blender or 3ds Max to manually operate to obtain the target shape while moving the vertices and line segments. In contrast, the approach of writing rules and obtaining shape as a result of a series of automated processes is called procedural modeling.
+Procedural modeling is applied in various fields. For example, in games, there are examples of being used for terrain generation, plant modeling, city construction, etc. By using this technology, each time you play, you will be staged. Content design such as changing the structure becomes possible.
+Also, in the fields of architecture and product design, the method of procedurally designing shapes using Grasshopper * 2 , which is a CAD software plug-in called Rhinoceros * 1 , is being actively used.
+[*1] http://www.rhino3d.co.jp/
[*2] http://www.grasshopper3d.com/
With procedural modeling, you can:
+A parametric structure is a structure in which the elements of the structure can be deformed according to a certain parameter. For example, in the case of a sphere model, the radius representing the size and the smoothness of the sphere are calculated. You can define parameters such as the number of segments to represent, and by changing those values, you can obtain a sphere with the desired size and smoothness.
+Once you have implemented a program that defines a parametric structure, you can get a model with a specific structure in various situations, which is convenient.
+ +As mentioned above, in fields such as games, there are many examples where procedural modeling is used to generate terrain and trees, and it is generated in real time in the content instead of incorporating what was once written as a model. Sometimes. Using procedural modeling techniques for real-time content, for example, you can create a tree that grows toward the sun at any position, or build a city where buildings line up from the clicked position. It can be realized.
+In addition, incorporating models of various patterns into the content will increase the data size, but if you use procedural modeling to increase the variation of the model, you can reduce the data size.
+If you learn procedural modeling techniques and build models programmatically, you will be able to develop your own modeling tools.
+ +In Unity, the geometry data that represents the shape of the model is managed by the Mesh class.
+The shape of the model consists of triangles arranged in 3D space, and one triangle is defined by three vertices. The official Unity documentation explains how to manage the vertex and triangle data of the model in the Mesh class as follows.
++In the Mesh class, all vertices are stored in one array, and each triangle is specified by three integers that are the indexes of the vertex array. The triangles are further collected as an array of integers. This integer is grouped every three from the beginning of the array, so elements 0, 1, and 2 define the first triangle, followed by the second triangles 3, 4, 5. * 3
+
[*3] https://docs.unity3d.com/jp/540/Manual/AnatomyofaMesh.html
The model has uv coordinates that represent the coordinates on the texture required for texture mapping to correspond to each vertex, and normal vectors (also called normal) required to calculate the influence of the light source during lighting. Can be included).
+ +In this chapter, the following Assets / ProceduralModeling in the https://github.com/IndieVisualLab/UnityGraphicsProgramming repository are prepared as sample programs.
+Since model generation by C # script is the main content of the explanation, we will proceed with the explanation while referring to the C # script under Assets / ProceduralModeling / Scripts.
+ +The sample code in this chapter has been confirmed to work with Unity 5.0 and above.
+ +Taking Quad, which is a basic model, as an example, we will explain how to build a model programmatically. Quad is a square model that combines two triangles consisting of four vertices, which is provided by default as Primitive Mesh in Unity, but since it is the most basic shape, it is an example to understand the structure of the model. Useful.
+
++Figure 1.1: Quad model structure Black circles represent the vertices of the model, and the numbers 0 to 3 in the black circles indicate the index of the vertices. Triangles specified in the order of 1,2, lower left is triangles specified in the order of 2,3,0) +
+First, create an instance of the Mesh class.
+// Create an instance of Mesh +var mesh = new Mesh (); ++
Next, generate a Vector3 array that represents the four vertices located at the four corners of the Quad. Also, prepare the uv coordinate and normal data so that they correspond to each of the four vertices.
+// Find half the length so that the width and height of the Quad are the length of size respectively.
+var hsize = size * 0.5f;
+
+// Quad vertex data
+var vertices = new Vector3[] {
+ new Vector3 (-hsize, hsize, 0f), // Upper left position of the first vertex Quad
+ new Vector3 (hsize, hsize, 0f), // Upper right position of the second vertex Quad
+ new Vector3 (hsize, -hsize, 0f), // Lower right position of the third vertex Quad
+ new Vector3 (-hsize, -hsize, 0f) // Lower left position of the 4th vertex Quad
+};
+
+// Quad uv coordinate data
+var uv = new Vector2[] {
+ new Vector2 (0f, 0f), // uv coordinates of the first vertex
+ new Vector2 (1f, 0f), // uv coordinates of the second vertex
+ new Vector2 (1f, 1f), // uv coordinates of the third vertex
+ new Vector2 (0f, 1f) // uv coordinates of the 4th vertex
+};
+
+// Quad normal data
+var normals = new Vector3[] {
+ new Vector3 (0f, 0f, -1f), // normal of the first vertex
+ new Vector3 (0f, 0f, -1f), // Normal of the second vertex
+ new Vector3 (0f, 0f, -1f), // normal of the third vertex
+ new Vector3 (0f, 0f, -1f) // Normal of the 4th vertex
+};
+
+Next, generate triangular data that represents the faces of the model. The triangle data is specified by an array of integers, and each integer corresponds to the index of the vertex array.
+// Quad face data Recognize as one face (triangle) by arranging three indexes of vertices
+var triangles = new int[] {
+ 0, 1, 2, // 1st triangle
+ 2, 3, 0 // Second triangle
+};
+
+Set the last generated data to the Mesh instance.
+mesh.vertices = vertices; +mesh.uv = uv; +mesh.normals = normals; +mesh.triangles = triangles; + +// Calculate the boundary area occupied by Mesh (required for culling) +mesh.RecalculateBounds(); + +return mesh; ++
The sample code used in this chapter uses a base class called ProceduralModelingBase. In the inherited class of this class, every time you change a model parameter (for example, size that represents width and height in Quad), a new Mesh instance is created and applied to MeshFilter to check the change result immediately. I can. (This function is realized by using the Editor script. ProceduralModelingEditor.cs)
+You can also visualize the UV coordinates and normal direction of the model by changing the enum type parameter called ProceduralModelingMaterial.
+
++Figure 1.2: From the left, the model to which ProcedureModelingMaterial.Standard, ProcedureModelingMaterial.UV, and ProcedureModelingMaterial.Normal are applied. +
+Now that you understand the structure of your model, let's create some primitive shapes.
+ +Plane is shaped like a grid of Quads.
+
++Figure 1.3: Plane model +
+Determine the number of rows and columns of the grid, place vertices at the intersections of each grid, build a Quad to fill each cell of the grid, and combine them to generate one Plane model.
+In the sample program Plane.cs, the number of vertices arranged vertically in the Plane, heightSegments, the number of vertices arranged horizontally widthSegments, and the parameters of vertical length height and horizontal length width are prepared. Each parameter affects the shape of the Plane as shown in the following figure.
+
++Figure 1.4: Plane parameters +
+First, we will generate vertex data to be placed at the intersections of the grid.
+var vertices = new List<Vector3>();
+var uv = new List<Vector2>();
+var normals = new List<Vector3>();
+
+// The reciprocal of the number of matrices to calculate the percentage of vertices on the grid (0.0 to 1.0)
+var winv = 1f / (widthSegments - 1);
+var hinv = 1f / (heightSegments - 1);
+
+for(int y = 0; y < heightSegments; y++) {
+ // Row position percentage (0.0 ~ 1.0)
+ var ry = y * hinv;
+
+ for(int x = 0; x < widthSegments; x++) {
+ // Percentage of column positions (0.0 ~ 1.0)
+ var rx = x * winv;
+
+ vertices.Add(new Vector3(
+ (rx - 0.5f) * width,
+ 0f,
+ (0.5f - ry) * height
+ ));
+ uv.Add(new Vector2(rx, ry));
+ normals.Add(new Vector3(0f, 1f, 0f));
+ }
+}
+
+Next, regarding triangle data, the vertex index set for each triangle is referenced as shown below in the loop that follows the rows and columns.
+var triangles = new List<int>();
+
+for(int y = 0; y < heightSegments - 1; y++) {
+ for(int x = 0; x < widthSegments - 1; x++) {
+ int index = y * widthSegments + x;
+ var a = index;
+ var b = index + 1;
+ var c = index + 1 + widthSegments;
+ var d = index + widthSegments;
+
+ triangles.Add(a);
+ triangles.Add(b);
+ triangles.Add(c);
+
+ triangles.Add(c);
+ triangles.Add(d);
+ triangles.Add(a);
+ }
+}
+
+The height (y coordinate) value of each vertex of Plane was set to 0, but by manipulating this height, it is not just a horizontal surface, but an uneven terrain or a shape like a small mountain. Can be obtained.
+The ParametricPlaneBase class inherits from the Plane class and overrides the Build function that creates the mesh. First, generate the original Plane model, call the Depth (float u, float v) function to find the height by inputting the uv coordinates of each vertex, and reset the height to flexibly shape it. Transforms.
+By implementing a class that inherits this ParametricPlaneBase class, you can generate a Plane model whose height changes depending on the vertices.
+ +protected override Mesh Build() {
+ // Generate the original Plane model
+ var mesh = base.Build ();
+
+ // Reset the height of the vertices of the Plane model
+ var vertices = mesh.vertices;
+
+ // The reciprocal of the number of matrices to calculate the percentage of vertices on the grid (0.0 to 1.0)
+ var winv = 1f / (widthSegments - 1);
+ var hinv = 1f / (heightSegments - 1);
+
+ for(int y = 0; y < heightSegments; y++) {
+ // Row position percentage (0.0 ~ 1.0)
+ var ry = y * hinv;
+ for(int x = 0; x < widthSegments; x++) {
+ // Percentage of column positions (0.0 ~ 1.0)
+ var rx = x * winv;
+
+ int index = y * widthSegments + x;
+ vertices[index].y = Depth(rx, ry);
+ }
+ }
+
+ // Reset the vertex position
+ mesh.vertices = vertices;
+ mesh.RecalculateBounds();
+
+ // Automatically calculate normal direction
+ mesh.RecalculateNormals();
+
+ return mesh;
+}
+
+In the sample scene ParametricPlane.scene, GameObject using the class (MountainPlane, TerrainPlane class) that inherits this ParametricPlaneBase is placed. Try changing each parameter and see how the shape changes.
+
++Figure 1.5: ParametricPlane.scene Model generated by the MountainPlane class on the left and the TerrainPlane class on the right +
+The Cylinder is a cylindrical model that looks like the following figure.
+
++Figure 1.6: Structure of Cylinder +
+The smoothness of the cylindrical circle can be controlled by the segments, and the vertical length and thickness can be controlled by the height and radius parameters, respectively. As shown in the example above, if you specify 7 for segments, the cylinder will look like a regular heptagon stretched vertically, and the larger the value of segments, the closer it will be to a circle.
+ +The vertices of the Cylinder should be evenly aligned around the circle located at the end of the cylinder.
+Use trigonometric functions (Mathf.Sin, Mathf.Cos) to place evenly aligned vertices along the circumference. The details of trigonometric functions are omitted here, but these functions can be used to obtain the position on the circumference based on the angle.
+
++Figure 1.7: Obtaining the position of a point on the circumference from a trigonometric function +
+As shown in this figure, the points located on the circle of radius radius from the angle θ (theta) are acquired by (x, y) = (Mathf.Cos (θ) * radius, Mathf.Sin (θ) * radius). can do.
+Based on this, perform the following processing to obtain the vertex positions of segments evenly arranged on the circumference of the radius radius.
+for (int i = 0; i < segments; i++) {
+ // 0.0 ~ 1.0
+ float ratio = (float)i / (segments - 1);
+
+ // Convert [0.0 ~ 1.0] to [0.0 ~ 2π]
+ float rad = ratio * PI2;
+
+ // Get a position on the circumference
+ float cos = Mathf.Cos(rad), sin = Mathf.Sin(rad);
+ float x = cos * radius, y = sin * radius;
+}
+
+In Cylinder modeling, vertices are evenly placed along the circumference of the end of the cylinder, and the vertices are joined together to form a side surface. For each side, just as you would build a Quad, take two corresponding vertices from the top and bottom and place the triangles facing each other to build one side, a rectangle. The sides of the Cylinder can be imagined as the Quads arranged along a circle.
+
++Figure 1.8: Modeling the sides of a cylinder Black circles are evenly distributed vertices along the circumference at the edges a to d in the vertices are index variables assigned to the vertices when constructing a triangle in the Cylinder.cs program. +
+First of all, we will build the side, but in the Cylinder class, we have prepared a function GenerateCap to generate the data of the vertices arranged around the circumference located at the upper end and the lower end.
+var vertices = new List<Vector3>();
+var normals = new List<Vector3>();
+var uvs = new List<Vector2>();
+var triangles = new List<int>();
+
+// Top height and bottom height
+float top = height * 0.5f, bottom = -height * 0.5f;
+
+// Generate vertex data that makes up the side
+GenerateCap(segments + 1, top, bottom, radius, vertices, uvs, normals, true);
+
+// To refer to the vertices on the circle when constructing the side triangles
+// Divine for index to go around the circle
+var len = (segments + 1) * 2;
+
+// Build the sides by connecting the top and bottom
+for (int i = 0; i < segments + 1; i++) {
+ int idx = i * 2;
+ int a = idx, b = idx + 1, c = (idx + 2) % len, d = (idx + 3) % len;
+ triangles.Add(a);
+ triangles.Add(c);
+ triangles.Add(b);
+
+ triangles.Add(d);
+ triangles.Add(b);
+ triangles.Add(c);
+}
+
+In the GenerateCap function, the vertex and normal data are set in the variable passed as List type.
+void GenerateCap(
+ int segments,
+ float top,
+ float bottom,
+ float radius,
+ List<Vector3> vertices,
+ List<Vector2> uvs,
+ List<Vector3> normals,
+ bool side
+) {
+ for (int i = 0; i < segments; i++) {
+ // 0.0 ~ 1.0
+ float ratio = (float)i / (segments - 1);
+
+ // 0.0 ~ 2π
+ float rad = ratio * PI2;
+
+ // Place vertices evenly at the top and bottom along the circumference
+ float cos = Mathf.Cos(rad), sin = Mathf.Sin(rad);
+ float x = cos * radius, z = sin * radius;
+ Vector3 tp = new Vector3(x, top, z), bp = new Vector3(x, bottom, z);
+
+ // upper end
+ vertices.Add(tp);
+ uvs.Add(new Vector2(ratio, 1f));
+
+ // Bottom edge
+ vertices.Add(bp);
+ uvs.Add(new Vector2(ratio, 0f));
+
+ if(side) {
+ // Normal to the outside of the side
+ var normal = new Vector3(cos, 0f, sin);
+ normals.Add(normal);
+ normals.Add(normal);
+ } else {
+ normals.Add (new Vector3 (0f, 1f, 0f)); // Normals pointing up the lid
+ normals.Add (new Vector3 (0f, -1f, 0f)); // Normals pointing down the lid
+ }
+ }
+}
+
+In the Cylinder class, you can set with the openEnded flag whether to make the model with the top and bottom closed. If you want to close the top and bottom, form a circular "lid" and plug the ends.
+The vertices that make up the surface of the lid do not use the vertices that make up the side, but create a new vertex at the same position as the side. This is to separate the normals on the sides and the lid for natural lighting. (When constructing the vertex data of the side, specify true in the side variable of the argument of GenerateCap, and when constructing the lid, specify false so that the appropriate normal direction is set.)
+If the side and lid share the same vertex, the side and lid will refer to the same normal, which makes lighting unnatural.
+
++Figure 1.9: When the side of Cylinder and the top of the lid are shared (left: BadCylinder.cs) and when another vertex is prepared as in the sample program (right: Cylinder.cs) The lighting on the left becomes unnatural. ing +
+To model a circular lid, prepare vertices that are evenly arranged on the circumference (generated from the GenerateCap function) and vertices that are located in the middle of the circle, and the vertices along the circumference from the middle vertex. Join together to form a circular lid by building a triangle that resembles an evenly divided pizza.
+
++Figure 1.10: Cylinder lid modeling example with segments parameter of 6. +
+// Generate top and bottom lids
+if(openEnded) {
+ // Add new vertices for lid model, not shared with sides, to use different normals when lighting
+ GenerateCap(
+ segments + 1,
+ top,
+ bottom,
+ radius,
+ vertices,
+ uvs,
+ normals,
+ false
+ );
+
+ // The apex in the middle of the top lid
+ vertices.Add(new Vector3(0f, top, 0f));
+ uvs.Add(new Vector2(0.5f, 1f));
+ normals.Add(new Vector3(0f, 1f, 0f));
+
+ // The apex in the middle of the bottom lid
+ vertices.Add(new Vector3(0f, bottom, 0f)); // bottom
+ uvs.Add(new Vector2(0.5f, 0f));
+ normals.Add(new Vector3(0f, -1f, 0f));
+
+ var it = vertices.Count - 2;
+ var ib = vertices.Count - 1;
+
+ // offset to avoid referencing the vertex index for the side
+ var offset = len;
+
+ // Top lid surface
+ for (int i = 0; i < len; i += 2) {
+ triangles.Add(it);
+ triangles.Add((i + 2) % len + offset);
+ triangles.Add(i + offset);
+ }
+
+ // Bottom lid surface
+ for (int i = 1; i < len; i += 2) {
+ triangles.Add(ib);
+ triangles.Add(i + offset);
+ triangles.Add((i + 2) % len + offset);
+ }
+}
+
+Tubular is a tubular model that looks like the following figure.
+
++Figure 1.11: Tubular model +
+The Cylinder model has a straight cylindrical shape, while the Tubular has a curved, untwisted cylinder. In the example of the tree model described later, one branch is represented by Tubular, and a method of constructing one tree by combining them is adopted, but Tubular is used in situations where a tubular shape that bends smoothly is required. I will play an active part.
+ +The structure of the tubular model is as shown in the following figure.
+
++Figure 1.12: Cylindrical structure Tubular visualizes the points that divide the curve along with a sphere and the nodes that make up the sides with a hexagon. +
+Divide the curve, build sides for each node separated by the division points, and combine them to generate one Tubular model.
+The sides of each node are similar to the sides of a cylinder, with the top and bottom vertices of the sides evenly arranged along a circle, and the cylinders are connected along a curve to build them together. You can think of things as Tubular types.
+ +In the sample program, the base class CurveBase that represents a curve is prepared. Various algorithms have been devised for drawing curves in three-dimensional space, and it is necessary to select an easy-to-use method according to the application. In the sample program, the class CatmullRomCurve, which inherits the CurveBase class, is used.
+I will omit the details here, but CatmullRomCurve has the feature of forming a curve while interpolating between points so that it passes through all the passed control points, and it is easy to use because you can specify the points you want to pass through the curve. Has a good reputation for its goodness.
+The CurveBase class that represents a curve provides GetPointAt (float) and GetTangentAt (float) functions to obtain the position and slope (tangent vector) of a point on the curve, and specify a value of [0.0 to 1.0] as an argument. By doing so, you can get the position and slope of the point between the start point (0.0) and the end point (1.0).
+ +To create a twist-free cylinder along a curve, three orthogonal vectors "tangent vector, normal vector, binormal vector" that change smoothly along the curve You will need an array. The tangent vector is a unit vector that represents the slope at one point on the curve, and the normal vector and the normal vector are obtained as vectors that are orthogonal to each other.
+With these orthogonal vectors, you can get "coordinates on the circumference orthogonal to the curve" at a point on the curve.
+
++Figure 1.13: Find the unit vector (v) that points to the coordinates on the circumference from the normal and binormal. Multiply this unit vector (v) by the radius radius to make it orthogonal to the curve. You can get the coordinates on the circumference of the radius radius +
+A set of three orthogonal vectors at a point on this curve is called a Frenet frame.
+
++Figure 1.14: Visualization of the Frenet frame array that makes up Tubular The frame represents one Frenet frame, and the three arrows indicate the tangent vector, the normal vector, and the binormal vector. +
+Tubular modeling is performed by finding the vertex data for each clause based on the normals and binormals obtained from this Frenet frame, and connecting them together.
+In the sample program, the CurveBase class has a function ComputeFrenetFrames to generate this Frenet frame array.
+ +The Tubular class has a CatmullRomCurve class that represents a curve, and forms a cylinder along the curve drawn by this CatmullRomCurve.
+The CatmullRomCurve class requires four or more control points, and when you manipulate the control points, the shape of the curve changes, and the shape of the Tubular model changes accordingly.
+var vertices = new List<Vector3>();
+var normals = new List<Vector3>();
+var tangents = new List<Vector4>();
+var uvs = new List<Vector2>();
+var triangles = new List<int>();
+
+// Get the Frenet frame from the curve
+var frames = curve.ComputeFrenetFrames(tubularSegments, closed);
+
+// Generate Tubular vertex data
+for(int i = 0; i < tubularSegments; i++) {
+ GenerateSegment(curve, frames, vertices, normals, tangents, i);
+}
+// Place the last vertex at the start of the curve if you want to generate a closed cylinder, or at the end of the curve if it is not closed
+GenerateSegment(
+ curve,
+ frames,
+ vertices,
+ normals,
+ tangents,
+ (!closed) ? tubularSegments : 0
+);
+
+// Set the uv coordinates from the start point of the curve to the end point
+for (int i = 0; i <= tubularSegments; i++) {
+ for (int j = 0; j <= radialSegments; j++) {
+ float u = 1f * j / radialSegments;
+ float v = 1f * i / tubularSegments;
+ uvs.Add(new Vector2(u, v));
+ }
+}
+
+// Build the side
+for (int j = 1; j <= tubularSegments; j++) {
+ for (int i = 1; i <= radialSegments; i++) {
+ int a = (radialSegments + 1) * (j - 1) + (i - 1);
+ int b = (radialSegments + 1) * j + (i - 1);
+ int c = (radialSegments + 1) * j + i;
+ int d = (radialSegments + 1) * (j - 1) + i;
+
+ triangles.Add(a); triangles.Add(d); triangles.Add(b);
+ triangles.Add(b); triangles.Add(d); triangles.Add(c);
+ }
+}
+
+var mesh = new Mesh ();
+mesh.vertices = vertices.ToArray();
+mesh.normals = normals.ToArray();
+mesh.tangents = tangents.ToArray();
+mesh.uv = uvs.ToArray();
+mesh.triangles = triangles.ToArray();
+
+The function GenerateSegment calculates the vertex data of the specified clause based on the normal and binormal extracted from the Frenet frame mentioned above, and sets it in the variable passed in List type.
+void GenerateSegment(
+ CurveBase curve,
+ List<FrenetFrame> frames,
+ List<Vector3> vertices,
+ List<Vector3> normals,
+ List<Vector4> tangents,
+ int index
+) {
+ // 0.0 ~ 1.0
+ var u = 1f * index / tubularSegments;
+
+ var p = curve.GetPointAt(u);
+ var fr = frames[index];
+
+ var N = fr.Normal;
+ var B = fr.Binormal;
+
+ for(int j = 0; j <= radialSegments; j++) {
+ // 0.0 ~ 2π
+ float rad = 1f * j / radialSegments * PI2;
+
+ // Arrange the vertices evenly along the circumference
+ float cos = Mathf.Cos(rad), sin = Mathf.Sin(rad);
+ var v = (cos * N + sin * B).normalized;
+ vertices.Add(p + radius * v);
+ normals.Add(v);
+
+ var tangent = fr.Tangent;
+ tangents.Add(new Vector4(tangent.x, tangent.y, tangent.z, 0f));
+ }
+}
+
+This section introduces techniques for generating more complex models using the Procedural Modeling techniques described so far.
+ +Plant modeling is often mentioned as an application of the Procedural Modeling technique. The Tree API * 4 for modeling trees in the Editor is also provided in Unity, and there is software dedicated to plant modeling called Speed Tree * 5 .
+[*4] https://docs.unity3d.com/ja/540/Manual/tree-FirstTree.html
[*5] http://www.speedtree.com/
In this section, we will focus on modeling trees, which are relatively simple modeling methods among plants.
+ +There is L-System as an algorithm that can describe and express the structure of plants. The L-System was proposed by botanist Aristid Lindenmayer in 1968, and the L-System L comes from his name.
+L-System can be used to express the self-similarity found in the shape of plants.
+Self-similarity means that when you magnify the shape of the details of an object, it matches the shape of the object as seen on a large scale. For example, when observing the branching of a tree, the branching of the part near the trunk And, there is a similarity in the way the branches are divided near the tip.
+
++Figure 1.15: A figure in which each branch is branched by changing by 30 degrees. It can be seen that the root part and the branch tip part are similar, but even such a simple figure looks like a tree ( Sample program LSystem.scene) +
+The L-System provides a mechanism for developing complex sequences of symbols by representing elements with symbols, defining rules to replace the symbols, and repeatedly applying the rules to the symbols.
+For example, to give a simple example
+To
+If you rewrite according to
+a -> ab -> aba -> organize -> organize -> ...
+Each step produces complex results.
+An example of using this L-System for graphic generation is the LSystem class of the sample program.
+In the LSystem class, the following operations
+Is available,
+To
+According to this, the rule is applied repeatedly a fixed number of times.
+As a result, you can draw a self-similar figure, as shown in the sample LSystem.scene. The property of "recursively rewriting the state" of this L-System creates self-similarity. Self-similarity is also called Fractal and is also a research area.
+ +As an example of actually applying L-System to a program that generates a tree model, we prepared a class called ProceduralTree.
+In ProceduralTree, like the LSystem class explained in the previous section, the tree shape is generated by recursively calling the routine "advance branches, branch, and advance branches".
+In the LSystem class in the previous section, the simple rule for branching was "branch in two directions, left and right at a fixed angle", but in ProceduralTree, random numbers are used, and the number of branches and the branching direction have randomness. However, we have set rules so that the branches branch in a complicated manner.
+
++図1.16: ProceduralTree.scene +
+The TreeData class is a class that includes parameters that determine the degree of branching of branches and parameters that determine the size of the tree and the fineness of the mesh of the model. You can design a tree shape by adjusting the parameters of an instance of this class.
+ +Use some parameters in the TreeData class to adjust the degree of branching.
+ +The number of branches branching from one branch is adjusted by the branchesMin / branchesMax parameters. branchesMin represents the minimum number of branches, branchesMax represents the maximum number of branches, and the number between branchesMin and branchesMax is randomly selected to determine the number of branches.
+ +The direction in which the branching branches grow is adjusted with the growthAngleMin and growthAngleMax parameters. GrowthAngleMin represents the minimum angle in the branching direction, and growthAngleMax represents the maximum angle. The number between growthAngleMin and growthAngleMax is randomly selected to determine the branching direction.
+Each branch has a tangent vector that represents the direction of extension, and a normal vector and a binormal vector as vectors that are orthogonal to it.
+The value randomly obtained from the growthAngleMin / growAngleMax parameters is rotated in the direction of the normal vector and the direction of the binormal vector with respect to the tangent vector in the direction extending from the branch point.
+By applying a random rotation to the tangent vector in the direction extending from the branch point, the direction in which the branch at the branch destination grows is changed, and the branching is changed in a complicated manner.
+
++Figure 1.17: Random rotation applied in the direction extending from the branch point The T arrow at the branch point is the extending direction (tangent vector), the N arrow is the normal vector, and the B arrow is the normal line (normal vector). Binormal vector), and random rotation is applied in the direction of the normal and the normal with respect to the extending direction. +
+The growthAngleScale parameter is provided so that the angle of rotation randomly applied in the direction in which the branch grows increases toward the tip of the branch. This growthAngleScale parameter has a stronger effect on the rotation angle and increases the rotation angle as the generation parameter representing the generation of the branch instance approaches 0, that is, as it approaches the tip of the branch.
+// The branching angle increases as the branch tip increases +var scale = Mathf.Lerp ( + 1f, + data.growthAngleScale, + 1f - 1f * generation / generations +); + +// Rotation in the normal direction +var qn = Quaternion.AngleAxis(scale * data.GetRandomGrowthAngle(), normal); + +// Rotation in the binormal direction +var qb = Quaternion.AngleAxis(scale * data.GetRandomGrowthAngle(), binormal); + +// Determine the position of the branch tip while rotating qn * qb in the tangent direction where the branch tip is facing +this.to = from + (qn * qb) * tangent * length; ++
Branches are represented by the TreeBranch class.
+If you call the constructor with TreeData for setting the branch pattern as an argument in addition to the parameters of the number of generations (generations) and the basic length (length) and thickness (radius), it will recursively internally. An instance of TreeBranch will be created.
+A TreeBranch that branches from one TreeBranch is stored in a children variable of type List <TreeBranch> in the original TreeBranch so that all branches can be traced from the root TreeBranch.
+ +Like Tubular, the model of one branch divides one curve, models the divided nodes as one Cylinder, and builds them so that they are connected.
+The TreeSegment class is a class that expresses a clause that divides a single curve.
+public class TreeSegment {
+ public FrenetFrame Frame { get { return frame; } }
+ public Vector3 Position { get { return position; } }
+ public float Radius { get { return radius; } }
+
+ // Direction vector tangent, which Tree Segment is facing,
+ // FrenetFrame with vectors normal and binormal orthogonal to it
+ FrenetFrame frame;
+
+ // Position of Tree Segment
+ Vector3 position;
+
+ // Tree Segment width (radius)
+ float radius;
+
+ public TreeSegment(FrenetFrame frame, Vector3 position, float radius) {
+ this.frame = frame;
+ this.position = position;
+ this.radius = radius;
+ }
+}
+
+One TreeSegment has a FrenetFrame, which is a set of a vector in the direction in which the node is facing and an orthogonal vector, and variables that represent the position and width, and holds the necessary information at the top and bottom when building a Cylinder.
+ +The model generation logic of Procedural Tree is an application of Tubular, which generates a Tubular model from the array of Tree Segments of one branch Tree Branch and aggregates them into one model to form the whole tree. Modeling with an approach.
+var root = new TreeBranch (
+ generations,
+ length,
+ radius,
+ data
+);
+
+var vertices = new List<Vector3>();
+var normals = new List<Vector3>();
+var tangents = new List<Vector4>();
+var uvs = new List<Vector2>();
+var triangles = new List<int>();
+
+// Get the total length of the tree
+// Divide the length of the branch by the total length to get the height of the uv coordinates (uv.y)
+// Set to change from the root to the tip of the branch with [0.0 ~ 1.0]
+float maxLength = TraverseMaxLength(root);
+
+// Recursively follow all branches and generate a mesh corresponding to each branch
+Traverse(root, (branch) => {
+ var offset = vertices.Count;
+
+ var vOffset = branch.Offset / maxLength;
+ var vLength = branch.Length / maxLength;
+
+ // Generate vertex data from a single branch
+ for(int i = 0, n = branch.Segments.Count; i < n; i++) {
+ var t = 1f * i / (n - 1);
+ var v = vOffset + vLength * t;
+
+ var segment = branch.Segments[i];
+ var N = segment.Frame.Normal;
+ var B = segment.Frame.Binormal;
+ for(int j = 0; j <= data.radialSegments; j++) {
+ // 0.0 ~ 2π
+ var u = 1f * j / data.radialSegments;
+ float rad = u * PI2;
+
+ float cos = Mathf.Cos(rad), sin = Mathf.Sin(rad);
+ var normal = (cos * N + sin * B).normalized;
+ vertices.Add(segment.Position + segment.Radius * normal);
+ normals.Add(normal);
+
+ var tangent = segment.Frame.Tangent;
+ tangents.Add(new Vector4(tangent.x, tangent.y, tangent.z, 0f));
+
+ uvs.Add(new Vector2(u, v));
+ }
+ }
+
+ // Build a one-branch triangle
+ for (int j = 1; j <= data.heightSegments; j++) {
+ for (int i = 1; i <= data.radialSegments; i++) {
+ int a = (data.radialSegments + 1) * (j - 1) + (i - 1);
+ int b = (data.radialSegments + 1) * j + (i - 1);
+ int c = (data.radialSegments + 1) * j + i;
+ int d = (data.radialSegments + 1) * (j - 1) + i;
+
+ a += offset;
+ b += offset;
+ c += offset;
+ d += offset;
+
+ triangles.Add(a); triangles.Add(d); triangles.Add(b);
+ triangles.Add(b); triangles.Add(d); triangles.Add(c);
+ }
+ }
+});
+
+var mesh = new Mesh ();
+mesh.vertices = vertices.ToArray();
+mesh.normals = normals.ToArray();
+mesh.tangents = tangents.ToArray();
+mesh.uv = uvs.ToArray();
+mesh.triangles = triangles.ToArray();
+mesh.RecalculateBounds();
+
+Procedural modeling of plants is deep even with trees alone, and methods such as obtaining a model of a natural tree by branching so that the irradiation rate of sunlight is high have been devised.
+If you are interested in modeling such plants, please refer to The Algorithmic Beauty of Plants * 6 , which was written by Aristid Lindenmayer, who invented the L-System, for various methods.
+[*6] http://algorithmicbotany.org/papers/#abop
From the procedural modeling examples introduced so far, we have learned the advantages of the technique of "dynamically generating a model while changing it according to parameters". You may get the impression that it is a technology for improving the efficiency of content development because you can efficiently create models of various variations.
+However, like modeling tools and sculpting tools out there, procedural modeling techniques can also be applied to "interactively generate models in response to user input."
+As an application example, we will introduce "Teddy," a technology that generates a three-dimensional model from contour lines created by handwritten sketches, devised by Takeo Igarashi of the Department of Computer Science, the University of Tokyo.
+
++Figure 1.18: Unity assets of "Teddy", a technology for 3D modeling by hand-drawn sketches http://uniteddy.info/ja +
+This technology was actually used in the game "Junk Masterpiece Theater Rakugaki Kingdom" * 7, which was released as software for PlayStation 2 in 2002, and it is said that "the picture you drew is converted to 3D and moved as an in-game character". The application has been realized.
+[* 7] https://ja.wikipedia.org/wiki/Kingdom of Rakugaki
With this technology
+[*8] https://en.wikipedia.org/wiki/Delaunay_triangulation
The 3D model is generated by the procedure. Regarding the details of the algorithm, a paper presented at SIGGRAPH, an international conference dealing with computer graphics, has been published. * 9
+[*9] http://www-ui.is.s.u-tokyo.ac.jp/~takeo/papers/siggraph99.pdf
The version of Teddy ported to Unity is published in the Asset Store, so anyone can incorporate this technology into their content. *Ten
+[*10] http://uniteddy.info/ja/
By using procedural modeling techniques in this way, it is possible to develop unique modeling tools and create content that develops according to the user's creation.
+ +With procedural modeling techniques
+I have seen that can be achieved.
+Since Unity itself is a game engine, you can imagine its application in games and video content from the examples introduced in this chapter.
+However, just as computer graphics technology itself has a wide range of applications, it can be considered that the range of applications for model generation technology is also wide. As I mentioned at the beginning, procedural modeling techniques are also used in the fields of architecture and product design, and with the development of digital fabrication such as 3D printer technology, there are opportunities to use the designed shapes in real life. Is also increasing at the individual level.
+In this way, if you think about the fields in which you will use the designed shapes from a broad perspective, you may find various situations where you can apply procedural modeling techniques.
+ +
|
![]() |
|
Here's a simple explanation of how to use ComputeShader (hereafter "Compute Shader" if needed) in Unity. Compute shaders are used to parallelize simple operations using the GPU and perform large numbers of operations at high speed. It also delegates processing to the GPU, but it is different from the normal rendering pipeline. In CG, it is often used to express the movement of a large number of particles.
+Some of the content that follows from this chapter uses compute shaders, and knowledge of compute shaders is required to read them.
+Here, we use two simple samples to explain what will be the first step in learning a compute shader. These don't cover everything in compute shaders, so be sure to supplement the information as needed.
+Although it is called ComputeShader in Unity, similar technologies include OpenCL, DirectCompute, and CUDA. The basic concepts are similar, especially with DirectCompute (DirectX). If you need more detailed information about the concept around the architecture, it is a good idea to collect information about these as well.
+The sample in this chapter is "Simple Compute Shader" from https://github.com/IndieVisualLab/UnityGraphicsProgramming .
+ +
++Figure 2.1: Kernel, thread, group image +
+Before explaining the concrete implementation, it is necessary to explain the concept of kernel (Kernel) , thread (Thread) , and group (Group) handled by the compute shader .
+A kernel is a process performed on the GPU and is treated as a function in your code (corresponding to the kernel in general system terms).
+A thread is a unit that runs the kernel. One thread runs one kernel. Compute shaders allow the kernel to run in parallel on multiple threads at the same time. Threads are specified in three dimensions (x, y, z).
+For example, (4, 1, 1) will execute 4 * 1 * 1 = 4 threads at the same time. If (2, 2, 1), 2 * 2 * 1 = 4 threads will be executed at the same time. The same four threads run, but in some situations it may be more efficient to specify the threads in two dimensions, such as the latter. This will be explained later. For the time being, it is necessary to recognize that the number of threads is specified in three dimensions.
+Finally, a group is a unit that executes a thread. Also, the threads that a group runs are called group threads . For example, suppose a group has (4, 1, 1) threads per unit. When there are two of these groups, each group has (4, 1, 1) threads.
+Groups are specified in three dimensions, just like threads. For example, when a (2, 1, 1) group runs a kernel running on (4, 4, 1) threads, the number of groups is 2 * 1 * 1 = 2. Each of these two groups will have 4 * 4 * 1 = 16 threads. Therefore, the total number of threads is 2 * 16 = 32.
+ +Sample (1) "SampleScene_Array" deals with how to execute an appropriate calculation with a compute shader and get the result as an array. The sample includes the following operations:
+The execution result of sample (1) is as follows. Since it is only debug output, please check the operation while reading the source code.
+
++Figure 2.2: Execution result of sample (1) +
+From here, I will explain using a sample as an example. It's very short, so it's a good idea to take a look at the compute shader implementation first. The basic configuration is a function definition, a function implementation, a buffer, and variables as needed.
+SimpleComputeShader_Array.compute
+#pragma kernel KernelFunction_A
+#pragma kernel KernelFunction_B
+
+RWStructuredBuffer<int> intBuffer;
+float floatValue;
+
+[numthreads(4, 1, 1)]
+void KernelFunction_A(uint3 groupID : SV_GroupID,
+ uint3 groupThreadID : SV_GroupThreadID)
+{
+ intBuffer[groupThreadID.x] = groupThreadID.x * floatValue;
+}
+
+[numthreads(4, 1, 1)]
+void KernelFunction_B(uint3 groupID : SV_GroupID,
+ uint3 groupThreadID : SV_GroupThreadID)
+{
+ intBuffer[groupThreadID.x] += 1;
+}
+
+Features include the numthreads attribute and SV_GroupID semantics, which will be discussed later.
+ +As mentioned earlier, aside from the exact definition, the kernel refers to a single operation performed on the GPU and is treated as a single function in the code. Multiple kernels can be implemented in one compute shader.
+In this example, KernelFunction_Athere is no kernel and the KernelFunction_Bfunction corresponds to the kernel. Also, the function #pragma kernelto be treated as a kernel is defined using. This distinguishes it from the kernel and other functions.
A unique index is given to the kernel to identify any one of the multiple defined kernels. The indexes are #pragma kernelgiven as 0, 1… from the top in the order defined by.
Create a buffer area to store the result of execution by the compute shader . The sample variable RWStructuredBuffer<int> intBuffer} corresponds to this.
If you want to give an arbitrary value from the script (CPU) side, prepare a variable in the same way as general CPU programming. In this example, the variable intValuecorresponds to this, and the value is passed from the script.
The numthreads attribute (Attribute) specifies the number of threads that execute the kernel (function). The number of threads is specified by (x, y, z). For example, (4, 1, 1), 4 * 1 * 1 = 4 threads execute the kernel. Besides, (2, 2, 1) runs the kernel in 2 * 2 * 1 = 4 threads. Both are executed in 4 threads, but the difference and proper use will be described later.
+ +There are restrictions on the arguments that can be set in the kernel, and the degree of freedom is extremely low compared to general CPU programming.
+The following the argument value セマンティクスis referred to as, in this example groupID : SV_GroupIDcity groupThreadID : SV_GroupThreadIDsets the. Semantics are meant to indicate what the value of the argument is and cannot be renamed to any other name.
The argument name (variable name) can be defined freely, but one of the semantics defined when using the compute shader must be set. In other words, it is not possible to implement an argument of any type and refer to it in the kernel, and the arguments that can be referenced in the kernel are selected from the specified limited ones.
+SV_GroupIDIndicates in which group the thread running the kernel is running (x, y, z). SV_GroupThreadIDIndicates the number of threads in the group that runs the kernel with (x, y, z).
For example, in a group of (4, 4, 1), when running a thread of (2, 2, 1) SV_GroupID, returns a value of (0 ~ 3, 0 ~ 3, 0). SV_GroupThreadIDReturns a value of (0 ~ 1, 0 ~ 1, 0).
In addition to the semantics set in the sample, there are other SV_~semantics that start with and can be used, but I will omit the explanation here. I think it's better to read it after understanding the movement of the compute shader.
In the sample, the thread numbers are assigned to the prepared buffers in order. groupThreadIDIs given the thread number to run in a group. This kernel runs in (4, 1, 1) threads, so groupThreadIDis given (0 ~ 3, 0, 0).
SimpleComputeShader_Array.compute
+[numthreads(4, 1, 1)]
+void KernelFunction_A(uint3 groupID : SV_GroupID,
+ uint3 groupThreadID : SV_GroupThreadID)
+{
+ intBuffer[groupThreadID.x] = groupThreadID.x * intValue;
+}
+
+This sample runs this thread in groups (1, 1, 1) (from the script below). That is, it runs only one group, which contains 4 * 1 * 1 threads. As a result groupThreadID.xPlease make sure that the value of 0 to 3 is applied to.
* Although is groupIDnot used in this example, the number of groups specified in 3D is given as in the case of threads. Try substituting it and use it to see how the compute shader works.
Run the implemented compute shader from a script. The items required on the script side are as follows.
+comuteShaderkernelIndex_KernelFunction_A, BintComputeBufferSimpleComputeShader_Array.cs
+public ComputeShader computeShader;
+int kernelIndex_KernelFunction_A;
+int kernelIndex_KernelFunction_B;
+ComputeBuffer intComputeBuffer;
+
+void Start()
+{
+ this.kernelIndex_KernelFunction_A
+ = this.computeShader.FindKernel("KernelFunction_A");
+ this.kernelIndex_KernelFunction_B
+ = this.computeShader.FindKernel("KernelFunction_B");
+
+ this.intComputeBuffer = new ComputeBuffer(4, sizeof(int));
+ this.computeShader.SetBuffer
+ (this.kernelIndex_KernelFunction_A,
+ "intBuffer", this.intComputeBuffer);
+
+ this.computeShader.SetInt("intValue", 1);
+ …
+
+In order to run a kernel, you need index information to specify that kernel. The indexes are #pragma kernelgiven as 0, 1… from the top in the order defined by FindKernel, but it is better to use the function from the script side .
SimpleComputeShader_Array.cs
+this.kernelIndex_KernelFunction_A
+ = this.computeShader.FindKernel("KernelFunction_A");
+
+this.kernelIndex_KernelFunction_B
+ = this.computeShader.FindKernel("KernelFunction_B");
+
+Prepare a buffer area to save the calculation result by the compute shader (GPU) on the CPU side. It is ComputeBufferdefined as in Unity .
SimpleComputeShader_Array.cs
+this.intComputeBuffer = new ComputeBuffer(4, sizeof(int)); +this.computeShader.SetBuffer + (this.kernelIndex_KernelFunction_A, "intBuffer", this.intComputeBuffer); ++
ComputeBufferInitialize by specifying (1) the size of the area to be saved and (2) the size of the data to be saved per unit. Spaces for four int sizes are provided here. This is because the execution result of the compute shader is saved as an int [4]. Resize as needed.
Then, implemented in the compute shader, (1) specify which kernel runs, (2) specify which buffer to use on which GPU, and (3) specify which buffer on the CPU corresponds to. To do.
+In this example, the buffer area KernelFunction_A(2) referenced when (1) is executed is specified to correspond to intBuffer(3) intComputeBuffer.
SimpleComputeShader_Array.cs
+this.computeShader.SetInt("intValue", 1);
+
+Depending on what you want to process, you may want to pass a value from the script (CPU) side to the compute shader (GPU) side and refer to it. Most types of values ComputeShader.Set~can be set to variables in the compute shader using. At this time, the variable name of the argument set in the argument and the variable name defined in the compute shader must match. In this example, intValuewe are passing 1.
The kernel implemented (defined) in the compute shader is ComputeShader.Dispatchexecuted by the method. Runs the kernel with the specified index in the specified number of groups. The number of groups is specified by X * Y * Z. In this sample, 1 * 1 * 1 = 1 group.
SimpleComputeShader_Array.cs
+this.computeShader.Dispatch
+ (this.kernelIndex_KernelFunction_A, 1, 1, 1);
+
+int[] result = new int[4];
+
+this.intComputeBuffer.GetData(result);
+
+for (int i = 0; i < 4; i++)
+{
+ Debug.Log(result[i]);
+}
+
+The execution result of the compute shader (kernel) is ComputeBuffer.GetDataobtained by.
Check the implementation on the compute shader side again. In this sample, the following kernels are running in 1 * 1 * 1 = 1 groups. The threads are 4 * 1 * 1 = 4 threads. It also intValuegives 1 from the script.
SimpleComputeShader_Array.compute
+[numthreads(4, 1, 1)]
+void KernelFunction_A(uint3 groupID : SV_GroupID,
+ uint3 groupThreadID : SV_GroupThreadID)
+{
+ intBuffer[groupThreadID.x] = groupThreadID.x * intValue;
+}
+
+groupThreadID(SV_GroupThreadID)Will contain a value that indicates which thread in the group the kernel is currently running on, so in this example (0 ~ 3, 0, 0) will be entered. Therefore, groupThreadID.xis 0 to 3. In other words, intBuffer[0] = 0 ~ intBuffer[3] = 3will be until are executed in parallel.
When running different kernels implemented in one compute shader, specify the index of another kernel. In this example, KernelFunction_Arun after KernelFunction_B. Furthermore KernelFunction_A, the buffer area used in is KernelFunction_Balso used.
SimpleComputeShader_Array.cs
+this.computeShader.SetBuffer
+(this.kernelIndex_KernelFunction_B, "intBuffer", this.intComputeBuffer);
+
+this.computeShader.Dispatch(this.kernelIndex_KernelFunction_B, 1, 1, 1);
+
+this.intComputeBuffer.GetData(result);
+
+for (int i = 0; i < 4; i++)
+{
+ Debug.Log(result[i]);
+}
+
+KernelFunction_BExecutes code similar to the following. This time intBufferis KernelFunction_APlease note that specifies continue what we used in.
SimpleComputeShader_Array.compute
+RWStructuredBuffer<int> intBuffer;
+
+[numthreads(4, 1, 1)]
+void KernelFunction_B
+(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID)
+{
+ intBuffer[groupThreadID.x] += 1;
+}
+
+In this sample, KernelFunction_Aby intBuffera 20-3 it has been given in the order. Therefore, KernelFunction_Bafter running, make sure the value is between 1 and 4.
ComputeBuffers that are no longer in use must be explicitly destroyed.
+SimpleComputeShader_Array.cs
+this.intComputeBuffer.Release(); ++
The intent of specifying multidimensional threads or groups is not covered in this sample. For example, (4, 1, 1) thread and (2, 2, 1) thread both run 4 threads, but it makes sense to use the two properly. This will be explained in the sample (2) that follows.
+ +Sample (2) In "SampleScene_Texture", the calculation result of the compute shader is acquired as a texture. The sample includes the following operations:
+The execution result of sample (2) is as follows. Generates a texture that has a horizontal and vertical gradient.
+
++Figure 2.3: Execution result of sample (2) +
+See the sample for the overall implementation. In this sample, the following code is roughly executed in the compute shader. Notice that the kernel runs in multidimensional threads. Since it is (8, 8, 1), it will be executed in 8 * 8 * 1 = 64 threads per group. Another RWTexture2D<float4>major change is that the calculation result is saved in.
SimpleComputeShader_Texture.compute
+RWTexture2D<float4> textureBuffer;
+
+[numthreads(8, 8, 1)]
+void KernelFunction_A(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ float width, height;
+ textureBuffer.GetDimensions(width, height);
+
+ textureBuffer[dispatchThreadID.xy]
+ = float4(dispatchThreadID.x / width,
+ dispatchThreadID.x / width,
+ dispatchThreadID.x / width,
+ 1);
+}
+
+SV_DispatchThradIDNo semantics were used in sample (1) . It's a bit complicated, but it shows "where the thread running a kernel is in all threads (x, y, z)" .
SV_DispathThreadIDIs the SV_Group_ID * numthreads + SV_GroupThreadIDvalue calculated by. SV_Group_IDIndicates a group with (x, y, z), and indicates the SV_GroupThreadIDthreads contained in a group with (x, y, z).
For example, suppose you run a kernel in a (2, 2, 1) group that runs on (4, 1, 1) threads. One of the kernels runs on the (2, 0, 0) th thread in the (0, 1, 0) th group. In this case SV_DispatchThreadID, (0, 1, 0) * (4, 1, 1) + (2, 0, 0) = (0, 1, 0) + (2, 0, 0) = (2, 1, 0) ).
Now let's consider the maximum value. In the (2, 2, 1) group, when the kernel runs on the (4, 1, 1) thread, the (3, 0, 0) th thread in the (1, 1, 0) th group Is the last thread. In this case SV_DispatchThreadID, (1, 1, 0) * (4, 1, 1) + (3, 0, 0) = (4, 1, 0) + (3, 0, 0) = (7, 1, 0) ).
After that, it is difficult to explain in chronological order, so please check while reading the entire sample.
+Sample (2) dispatchThreadID.xysets groups and threads to show all the pixels on the texture. Since it is the script side that sets the group, we need to look across the script and the compute shader.
SimpleComputeShader_Texture.compute
+textureBuffer[dispatchThreadID.xy] + = float4(dispatchThreadID.x / width, + dispatchThreadID.x / width, + dispatchThreadID.x / width, + 1); ++
In this sample, we have prepared a texture of 512x512, but when dispatchThreadID.xis 0 ~ 511, it dispatchThreadID / widthis 0 ~ 0.998…. In other words, as dispatchThreadID.xythe value (= pixel coordinates) increases, it will be filled from black to white.
Textures consist of RGBA channels, each set from 0 to 1. When all 0s, it is completely black, and when all 1s, it is completely white.
+The following is an explanation of the implementation on the script side. In sample (1), we prepared an array buffer to store the calculation results of the compute shader. In sample (2), we will prepare a texture instead.
+SimpleComputeShader_Texture.cs
+RenderTexture renderTexture_A;
+…
+void Start()
+{
+ this.renderTexture_A = new RenderTexture
+ (512, 512, 0, RenderTextureFormat.ARGB32);
+ this.renderTexture_A.enableRandomWrite = true;
+ this.renderTexture_A.Create();
+…
+
+Initialize RenderTexture by specifying the resolution and format. Note RenderTexture.enableRandomWritethat this is enabled to enable writing to the texture.
Just as you can get the index of the kernel, you can also get how many threads the kernel can run (thread size).
+SimpleComputeShader_Texture.cs
+void Start()
+{
+…
+ uint threadSizeX, threadSizeY, threadSizeZ;
+
+ this.computeShader.GetKernelThreadGroupSizes
+ (this.kernelIndex_KernelFunction_A,
+ out threadSizeX, out threadSizeY, out threadSizeZ);
+…
+
+DispathExecute the process with the method. At this time, pay attention to how to specify the number of groups. In this example, the number of groups is calculated by "horizontal (vertical) resolution of texture / number of threads in horizontal (vertical) direction".
When thinking about the horizontal direction, the texture resolution is 512 and the number of threads is 8, so the number of horizontal groups is 512/8 = 64. Similarly, the vertical direction is 64. Therefore, the total number of groups is 64 * 64 = 4096.
+SimpleComputeShader_Texture.cs
+void Update()
+{
+ this.computeShader.Dispatch
+ (this.kernelIndex_KernelFunction_A,
+ this.renderTexture_A.width / this.kernelThreadSize_KernelFunction_A.x,
+ this.renderTexture_A.height / this.kernelThreadSize_KernelFunction_A.y,
+ this.kernelThreadSize_KernelFunction_A.z);
+
+ plane_A.GetComponent<Renderer>()
+ .material.mainTexture = this.renderTexture_A;
+
+In other words, each group will process 8 * 8 * 1 = 64 (= number of threads) pixels. Since there are 4096 groups, we will process 4096 * 64 = 262,144 pixels. The image is 512 * 512 = 262,144 pixels, which means that we were able to process just all the pixels in parallel.
+ +The other kernel fills using the y coordinate instead of x. At this time, note that a value close to 0, a black color, appears at the bottom. You may need to consider the origin when working with textures.
+ +Multidimensional threads and groups work well when you need multidimensional results, or when you need multidimensional operations, as in sample (2). If sample (2) is to be processed in a one-dimensional thread, the vertical pixel coordinates will need to be calculated arbitrarily.
+You can confirm it when you actually implement it, but when you have a stride in image processing, for example, a 512x512 image, the 513th pixel is the (0, 1) coordinate, and so on. ..
+It is better to reduce the number of operations, and the complexity increases as the advanced processing is performed. When designing processing with compute shaders, it's a good idea to consider whether you can take advantage of multidimensionality.
+ +In this chapter, we have provided introductory information in the form of explaining samples of compute shaders, but from now on, we will supplement some information necessary for learning.
+ +
++Figure 2.4: Image of GPU architecture +
+If you have a basic knowledge of GPU architecture and structure, it will be useful for optimizing it when implementing processing using compute shaders, so I will introduce it here a little.
+The GPU is equipped with a large number of Streaming Multiprocessors (SM) , which are shared and parallelized to execute the given processing.
+The SM has multiple smaller Streaming Processors (SPs) , and the SP calculates the processing assigned to the SM.
+The SM has registers and shared memory, which allows it to read and write faster than global memory (memory on DRAM) . Registers are used for local variables that are referenced only within the function, and shared memory can be referenced and written by all SPs that belong to the same SM.
+In other words, it is ideal to know the maximum size and scope of each memory and realize an optimal implementation that can read and write memory at high speed without waste.
+For example, shared memory, which you may need to consider most, is groupshareddefined using storage-class modifiers . Since this is an introduction, I will omit a concrete introduction example, but please remember it as a technique and terminology necessary for optimization and use it for future learning.
The fastest accessible memory area located closest to the SP. It consists of 4 bytes and contains kernel (function) scope variables. Since each thread is independent, it cannot be shared.
+ +A memory area located in the SM, which is managed together with the L1 cache. It can be shared by SPs (= threads) in the same SM and can be accessed fast enough.
+ +A memory area on the DRAM, not on the GPU. References are slow because they are far from the processor on the GPU. On the other hand, it has a large capacity and can read and write data from all threads.
+ +The memory area on the DRAM, not the GPU, stores data that does not fit in the registers. References are slow because they are far from the processor on the GPU.
+ +This memory is dedicated to texture data and handles global memory exclusively for textures.
+ +It is a read-only memory and is used to store kernel (function) arguments and constants. It has its own cache and can be referenced faster than global memory.
+ +If the total number of threads is larger than the number of data you actually want to process, it will result in threads that are executed (or not processed) meaninglessly, which is inefficient. Design the total number of threads to match the number of data you want to process as much as possible.
+ +Introducing the upper limit of the current specifications at the time of writing. Please note that it may not be the latest version. However, it is required to implement it while considering these restrictions.
+The limits on the number of threads and groups were not mentioned in the discussion. This is because it changes depending on the shader model (version). It is expected that the number that can be paralleled will continue to increase in the future.
+The group limit is (x, y, z), 65535 each.
+ +The upper limit of shared memory is 16 KB per unit group, and the size of shared memory that a thread can write is limited to 256 bytes per unit.
+ +Other references in this chapter are:
+
|
![]() |
|
In this chapter, we will explain the implementation of group simulation using the Boids algorithm using Compute Shader. Birds, fish and other terrestrial animals sometimes flock. The movements of this group show regularity and complexity, and have a certain beauty and have attracted people. In computer graphics, it is not realistic to control the behavior of each individual by hand, and an algorithm for forming a group called Boids was devised. This simulation algorithm consists of some simple rules and is easy to implement, but in a simple implementation it is necessary to check the positional relationship with all individuals, and as the number of individuals increases, it becomes squared. The amount of calculation will increase proportionally. If you want to control many individuals, it is very difficult to implement with CPU. Therefore, we will take advantage of the powerful parallel computing power of the GPU. Unity provides a shader program called Compute Shader to perform such general purpose computing (GPGPU) by GPU. The GPU has a special storage area called shared memory, which can be used effectively by using Compute Shader. In addition, Unity has an advanced rendering function called GPU instancing, which allows you to draw a large number of arbitrary meshes. We will introduce a program that controls and draws a large number of Boid objects using the functions that make use of the computing power of these Unity GPUs.
+ +A group of simulation algorithms called Boids was developed by Craig Reynolds in 1986 and published the following year in 1987 at ACM SIGGRAPH as a paper entitled "Flocks, Herds, and Schools: A Distributed Behavioral Model".
+In Reynolds, a herd produces complex behavior as a result of each individual modifying its own behavior based on the position and direction of movement of other individuals around it, through perceptions such as sight and hearing. Pay attention to the fact that there is.
+Each individual follows three simple rules of conduct:
+ +Move to avoid crowding with individuals within a certain distance
+ +Individuals within a certain distance move toward the average in the direction they are facing
+ +Move to the average position of an individual within a certain distance
+
++Figure 3.1: Basic rules for Boids +
+You can program the movement of the herd by controlling the individual movements according to these rules.
+ +https://github.com/IndieVisualLab/UnityGraphicsProgramming
+Open the BoidsSimulationOnGPU.unity scene data in the Assets / BoidsSimulationOnGPU folder in the sample Unity project in this document .
+ +The programs introduced in this chapter use Compute Shader and GPU instancing.
+ComputeShader runs on the following platforms or APIs:
+GPU instancing is available on the following platforms or APIs:
+In this sample program, Graphics.DrawMeshInstacedIndirect method is used. Therefore, the Unity version must be 5.6 or later.
+ +This sample program consists of the following code.
+Scripts, material resources, etc. are set like this
+
++Figure 3.2: Settings on Unity Editor +
+This code manages Boids simulation parameters, Compute Shader that describes buffers and calculation instructions required for calculations on the GPU, and so on.
+GPUBoids.cs
+using UnityEngine;
+using System.Collections;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+
+public class GPUBoids : MonoBehaviour
+{
+ // Boid data structure
+ [System.Serializable]
+ struct BoidData
+ {
+ public Vector3 Velocity; // Velocity
+ public Vector3 Position; // position
+ }
+ // Thread size of thread group
+ const int SIMULATION_BLOCK_SIZE = 256;
+
+ #region Boids Parameters
+ // Maximum number of objects
+ [Range(256, 32768)]
+ public int MaxObjectNum = 16384;
+
+ // Radius with other individuals to which the bond applies
+ public float CohesionNeighborhoodRadius = 2.0f;
+ // Radius with other individuals to which alignment is applied
+ public float AlignmentNeighborhoodRadius = 2.0f;
+ // Radius with other individuals to which separation is applied
+ public float SeparateNeighborhoodRadius = 1.0f;
+
+ // Maximum speed
+ public float MaxSpeed = 5.0f;
+ // Maximum steering force
+ public float MaxSteerForce = 0.5f;
+
+ // Weight of binding force
+ public float CohesionWeight = 1.0f;
+ // Weight of aligning force
+ public float AlignmentWeight = 1.0f;
+ // Weight of separating force
+ public float SeparateWeight = 3.0f;
+
+ // Weight of force to avoid walls
+ public float AvoidWallWeight = 10.0f;
+
+ // Center coordinates of the wall
+ public Vector3 WallCenter = Vector3.zero;
+ // wall size
+ public Vector3 WallSize = new Vector3(32.0f, 32.0f, 32.0f);
+ #endregion
+
+ #region Built-in Resources
+ // Reference to Compute Shader for Boids simulation
+ public ComputeShader BoidsCS;
+ #endregion
+
+ #region Private Resources
+ // Buffer that stores the steering force (Force) of the Boid
+ ComputeBuffer _boidForceBuffer;
+ // Buffer containing basic Boid data (speed, position)
+ ComputeBuffer _boidDataBuffer;
+ #endregion
+
+ #region Accessors
+ // Get the buffer that stores the basic data of Boid
+ public ComputeBuffer GetBoidDataBuffer()
+ {
+ return this._boidDataBuffer != null ? this._boidDataBuffer : null;
+ }
+
+ // Get the number of objects
+ public int GetMaxObjectNum()
+ {
+ return this.MaxObjectNum;
+ }
+
+ // Returns the center coordinates of the simulation area
+ public Vector3 GetSimulationAreaCenter()
+ {
+ return this.WallCenter;
+ }
+
+ // Returns the size of the box in the simulation area
+ public Vector3 GetSimulationAreaSize()
+ {
+ return this.WallSize;
+ }
+ #endregion
+
+ #region MonoBehaviour Functions
+ void Start()
+ {
+ // Initialize the buffer
+ InitBuffer();
+ }
+
+ void Update()
+ {
+ // simulation
+ Simulation();
+ }
+
+ void OnDestroy()
+ {
+ // Discard the buffer
+ ReleaseBuffer();
+ }
+
+ void OnDrawGizmos()
+ {
+ // Draw the simulation area in wireframe as a debug
+ Gizmos.color = Color.cyan;
+ Gizmos.DrawWireCube (WallCenter, WallSize);
+ }
+ #endregion
+
+ #region Private Functions
+ // Initialize the buffer
+ void InitBuffer()
+ {
+ // Initialize the buffer
+ _boidDataBuffer = new ComputeBuffer(MaxObjectNum,
+ Marshal.SizeOf(typeof(BoidData)));
+ _boidForceBuffer = new ComputeBuffer(MaxObjectNum,
+ Marshal.SizeOf(typeof(Vector3)));
+
+ // Initialize Boid data, Force buffer
+ var forceArr = new Vector3[MaxObjectNum];
+ var boidDataArr = new BoidData [MaxObjectNum];
+ for (var i = 0; i < MaxObjectNum; i++)
+ {
+ forceArr[i] = Vector3.zero;
+ boidDataArr[i].Position = Random.insideUnitSphere * 1.0f;
+ boidDataArr[i].Velocity = Random.insideUnitSphere * 0.1f;
+ }
+ _boidForceBuffer.SetData(forceArr);
+ _boidDataBuffer.SetData(boidDataArr);
+ forceArr = null;
+ boidDataArr = null;
+ }
+
+ // simulation
+ void Simulation()
+ {
+ ComputeShader cs = BoidsCS;
+ int id = -1;
+
+ // Find the number of thread groups
+ int threadGroupSize = Mathf.CeilToInt(MaxObjectNum
+ / SIMULATION_BLOCK_SIZE);
+
+ // Calculate steering force
+ id = cs.FindKernel ("ForceCS"); // Get the kernel ID
+ cs.SetInt("_MaxBoidObjectNum", MaxObjectNum);
+ cs.SetFloat("_CohesionNeighborhoodRadius",
+ CohesionNeighborhoodRadius);
+ cs.SetFloat("_AlignmentNeighborhoodRadius",
+ AlignmentNeighborhoodRadius);
+ cs.SetFloat("_SeparateNeighborhoodRadius",
+ SeparateNeighborhoodRadius);
+ cs.SetFloat ("_ MaxSpeed", MaxSpeed);
+ cs.SetFloat("_MaxSteerForce", MaxSteerForce);
+ cs.SetFloat("_SeparateWeight", SeparateWeight);
+ cs.SetFloat("_CohesionWeight", CohesionWeight);
+ cs.SetFloat("_AlignmentWeight", AlignmentWeight);
+ cs.SetVector("_WallCenter", WallCenter);
+ cs.SetVector("_WallSize", WallSize);
+ cs.SetFloat("_AvoidWallWeight", AvoidWallWeight);
+ cs.SetBuffer(id, "_BoidDataBufferRead", _boidDataBuffer);
+ cs.SetBuffer(id, "_BoidForceBufferWrite", _boidForceBuffer);
+ cs.Dispatch (id, threadGroupSize, 1, 1); // Run Compute Shader
+
+ // Calculate speed and position from steering force
+ id = cs.FindKernel ("IntegrateCS"); // Get the kernel ID
+ cs.SetFloat("_DeltaTime", Time.deltaTime);
+ cs.SetBuffer(id, "_BoidForceBufferRead", _boidForceBuffer);
+ cs.SetBuffer(id, "_BoidDataBufferWrite", _boidDataBuffer);
+ cs.Dispatch (id, threadGroupSize, 1, 1); // Run Compute Shader
+ }
+
+ // Free the buffer
+ void ReleaseBuffer()
+ {
+ if (_boidDataBuffer != null)
+ {
+ _boidDataBuffer.Release();
+ _boidDataBuffer = null;
+ }
+
+ if (_boidForceBuffer != null)
+ {
+ _boidForceBuffer.Release();
+ _boidForceBuffer = null;
+ }
+ }
+ #endregion
+}
+
+
+The InitBuffer function declares the buffer to use when performing calculations on the GPU. We use a class called ComputeBuffer as a buffer to store the data to be calculated on the GPU. Compute Buffer is a data buffer that stores data for the Compute Shader. You will be able to read and write to the memory buffer on the GPU from a C # script. Pass the number of elements in the buffer and the size (number of bytes) of one element as arguments at initialization. You can get the size (in bytes) of the type by using the Marshal.SizeOf () method. In ComputeBuffer, you can use SetData () to set the value of an array of any structure.
+ +The Simulation function passes the required parameters to ComputeShader and issues a calculation instruction.
+The function written in ComputeShader that actually causes the GPU to perform calculations is called the kernel. The execution unit of this kernel is called a thread, and in order to perform parallel computing processing according to the GPU architecture, any number is treated as a group, and they are called a thread group. Set the product of the number of threads and the number of thread groups to be equal to or greater than the number of Boid objects.
+The kernel is specified in the ComputeShader script using the #pragma kernel directive. An ID is assigned to each of them, and you can get this ID from the C # script by using the FindKernel method.
+Use the SetFloat method, SetVector method, SetBuffer method, etc. to pass the parameters and buffers required for simulation to the Compute Shader. You will need the kernel ID when setting buffers and textures.
+By executing the Dispatch method, an instruction is issued to calculate the kernel defined in Compute Shader on the GPU. In the arguments, specify the kernel ID and the number of thread groups.
+ +Describe the calculation instruction to GPU. There are two kernels, one that calculates the steering force and the other that applies that force to update speed and position.
+Boids.compute
+// Specify kernel function
+#pragma kernel ForceCS // Calculate steering force
+#pragma kernel IntegrateCS // Calculate speed and position
+
+// Boid data structure
+struct BoidData
+{
+ float3 velocity; // velocity
+ float3 position; // position
+};
+
+// Thread size of thread group
+#define SIMULATION_BLOCK_SIZE 256
+
+// Boid data buffer (for reading)
+StructuredBuffer<BoidData> _BoidDataBufferRead;
+// Boid data buffer (for reading and writing)
+RWStructuredBuffer<BoidData> _BoidDataBufferWrite;
+// Boid steering force buffer (for reading)
+StructuredBuffer<float3> _BoidForceBufferRead;
+// Boid steering force buffer (for reading and writing)
+RWStructuredBuffer<float3> _BoidForceBufferWrite;
+
+int _MaxBoidObjectNum; // Number of Boid objects
+
+float _DeltaTime; // Time elapsed from the previous frame
+
+float _SeparateNeighborhoodRadius; // Distance to other individuals to which separation is applied
+float _AlignmentNeighborhoodRadius; // Distance to other individuals to which alignment is applied
+float _CohesionNeighborhoodRadius; // Distance to other individuals to which the bond applies
+
+float _MaxSpeed; // Maximum speed
+float _MaxSteerForce; // Maximum steering force
+
+float _SeparateWeight; // Weight when applying separation
+float _AlignmentWeight; // Weight when applying alignment
+float _CohesionWeight; // Weight when applying join
+
+float4 _WallCenter; // Wall center coordinates
+float4 _WallSize; // Wall size
+float _AvoidWallWeight; // Weight of strength to avoid walls
+
+
+// Limit the magnitude of the vector
+float3 limit(float3 vec, float max)
+{
+ float length = sqrt (dot (vec, vec)); // size
+ return (length > max && length > 0) ? vec.xyz * (max / length) : vec.xyz;
+}
+
+// Return the opposite force when hitting the wall
+float3 avoidWall(float3 position)
+{
+ float3 wc = _WallCenter.xyz;
+ float3 ws = _WallSize.xyz;
+ float3 acc = float3(0, 0, 0);
+ // x
+ acc.x = (position.x < wc.x - ws.x * 0.5) ? acc.x + 1.0 : acc.x;
+ acc.x = (position.x > wc.x + ws.x * 0.5) ? acc.x - 1.0 : acc.x;
+
+ // Y
+ acc.y = (position.y < wc.y - ws.y * 0.5) ? acc.y + 1.0 : acc.y;
+ acc.y = (position.y > wc.y + ws.y * 0.5) ? acc.y - 1.0 : acc.y;
+
+ // with
+ acc.z = (position.z <wc.z - ws.z * 0.5)? acc.z + 1.0: acc.z;
+ acc.z = (position.z > wc.z + ws.z * 0.5) ? acc.z - 1.0 : acc.z;
+
+ return acc;
+}
+
+// Shared memory for Boid data storage
+groupshared BoidData boid_data[SIMULATION_BLOCK_SIZE];
+
+// Kernel function for calculating steering force
+[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
+void ForceCS
+(
+ uint3 DTid: SV_DispatchThreadID, // ID unique to the entire thread
+ uint3 Gid: SV_GroupID, // Group ID
+ uint3 GTid: SV_GroupThreadID, // Thread ID in the group
+ uint GI: SV_GroupIndex // SV_GroupThreadID in one dimension 0-255
+)
+{
+ const unsigned int P_ID = DTid.x; // own ID
+ float3 P_position = _BoidDataBufferRead [P_ID] .position; // own position
+ float3 P_velocity = _BoidDataBufferRead [P_ID] .velocity; // own speed
+
+ float3 force = float3 (0, 0, 0); // Initialize steering force
+
+ float3 sepPosSum = float3 (0, 0, 0); // Position addition variable for separation calculation
+ int sepCount = 0; // Variable for counting the number of other individuals calculated for separation
+
+ float3 aliVelSum = float3 (0, 0, 0); // Velocity addition variable for alignment calculation
+ int aliCount = 0; // Variable for counting the number of other individuals calculated for alignment
+
+ float3 cohPosSum = float3 (0, 0, 0); // Position addition variable for join calculation
+ int cohCount = 0; // Variable for counting the number of other individuals calculated for binding
+
+ // Execution for each SIMULATION_BLOCK_SIZE (number of group threads) (execution for the number of groups)
+ [loop]
+ for (uint N_block_ID = 0; N_block_ID < (uint)_MaxBoidObjectNum;
+ N_block_ID += SIMULATION_BLOCK_SIZE)
+ {
+ // Store Boid data for SIMULATION_BLOCK_SIZE in shared memory
+ boid_data[GI] = _BoidDataBufferRead[N_block_ID + GI];
+
+ // All group sharing access is complete
+ // Until all threads in the group reach this call
+ // Block the execution of all threads in the group
+ GroupMemoryBarrierWithGroupSync();
+
+ // Calculation with other individuals
+ for (int N_tile_ID = 0; N_tile_ID < SIMULATION_BLOCK_SIZE;
+ N_tile_ID++)
+ {
+ // Position of other individuals
+ float3 N_position = boid_data[N_tile_ID].position;
+ // Speed of other individuals
+ float3 N_velocity = boid_data[N_tile_ID].velocity;
+
+ // Difference in position between yourself and other individuals
+ float3 diff = P_position - N_position;
+ // Distance between yourself and the position of other individuals
+ float dist = sqrt(dot(diff, diff));
+
+ // --- Separation ---
+ if (dist > 0.0 && dist <= _SeparateNeighborhoodRadius)
+ {
+ // Vector from the position of another individual to itself
+ float3 repulse = normalize(P_position - N_position);
+ // Divide by the distance between yourself and the position of another individual (the longer the distance, the smaller the effect)
+ repulse /= dist;
+ sepPosSum + = repulse; // Add
+ sepCount ++; // Population count
+ }
+
+ // --- Alignment ---
+ if (dist > 0.0 && dist <= _AlignmentNeighborhoodRadius)
+ {
+ aliVelSum + = N_velocity; // Add
+ aliCount ++; // Population count
+ }
+
+ // --- Cohesion ---
+ if (dist > 0.0 && dist <= _CohesionNeighborhoodRadius)
+ {
+ cohPosSum + = N_position; // Add
+ cohCount ++; // Population count
+ }
+ }
+ GroupMemoryBarrierWithGroupSync();
+ }
+
+ // steering force (separated)
+ float3 sepSteer = (float3)0.0;
+ if (sepCount > 0)
+ {
+ sepSteer = sepPosSum / (float) sepCount; // Calculate the average
+ sepSteer = normalize (sepSteer) * _MaxSpeed; // Adjust to maximum speed
+ sepSteer = sepSteer --P_velocity; // Calculate steering force
+ sepSteer = limit (sepSteer, _MaxSteerForce); // Limit steering force
+ }
+
+ // Steering force (alignment)
+ float3 aliSteer = (float3)0.0;
+ if (aliCount > 0)
+ {
+ aliSteer = aliVelSum / (float) aliCount; // Calculate the average velocity of close individuals
+ aliSteer = normalize (aliSteer) * _MaxSpeed; // Adjust to maximum speed
+ aliSteer = aliSteer --P_velocity; // Calculate steering force
+ aliSteer = limit (aliSteer, _MaxSteerForce); // Limit steering force
+ }
+ // steering force (combined)
+ float3 cohSteer = (float3)0.0;
+ if (cohCount > 0)
+ {
+ // / Calculate the average of the positions of close individuals
+ cohPosSum = cohPosSum / (float)cohCount;
+ cohSteer = cohPosSum --P_position; // Find the vector in the average position direction
+ cohSteer = normalize (cohSteer) * _MaxSpeed; // Adjust to maximum speed
+ cohSteer = cohSteer --P_velocity; // Calculate steering force
+ cohSteer = limit (cohSteer, _MaxSteerForce); // Limit steering force
+ }
+ force + = aliSteer * _AlignmentWeight; // Add a force to align with the steering force
+ force + = cohSteer * _CohesionWeight; // Add force to combine with steering force
+ force + = sepSteer * _SeparateWeight; // Add a separating force to the steering force
+
+ _BoidForceBufferWrite [P_ID] = force; // Write
+}
+
+// Kernel function for speed and position calculation
+[numthreads(SIMULATION_BLOCK_SIZE, 1, 1)]
+void IntegrateCS
+(
+ uint3 DTid: SV_DispatchThreadID // Unique ID for the entire thread
+)
+{
+ const unsigned int P_ID = DTid.x; // Get index
+
+ BoidData b = _BoidDataBufferWrite [P_ID]; // Read the current Boid data
+ float3 force = _BoidForceBufferRead [P_ID]; // Read the steering force
+
+ // Give repulsive force when approaching the wall
+ force += avoidWall(b.position) * _AvoidWallWeight;
+
+ b.velocity + = force * _DeltaTime; // Apply steering force to speed
+ b.velocity = limit (b.velocity, _MaxSpeed); // Limit speed
+ b.position + = b.velocity * _DeltaTime; // Update position
+
+ _BoidDataBufferWrite [P_ID] = b; // Write the calculation result
+}
+
+
+The ForceCS kernel calculates the steering force.
+ +Variables with the storage qualifier groupshared will now be written to shared memory. Shared memory cannot write large amounts of data, but it is located close to registers and can be accessed very quickly. This shared memory can be shared within the thread group. By writing the information of other individuals for SIMULATION_BLOCK_SIZE together in the shared memory so that it can be read at high speed within the same thread group, the calculation considering the positional relationship with other individuals is efficient. I will go to the target.
+
++Figure 3.3: Basic GPU architecture +
+When accessing the data written to the shared memory, it is necessary to describe the GroupMemoryBarrierWithGroupSync () method to synchronize the processing of all threads in the thread group. GroupMemoryBarrierWithGroupSync () blocks the execution of all threads in the group until all threads in the thread group reach this call. This ensures that all threads in the thread group have properly initialized the boid_data array.
+ +If there is an individual closer than the specified distance, the vector from the position of the individual to its own position is calculated and normalized. By dividing the vector by the value of the distance, it is weighted so that it avoids more when it is closer and avoids it smaller when it is far, and it is added as a force to prevent collision with other individuals. After the calculation with all the individuals is completed, the steering force is calculated from the relationship with the current speed using the value.
+ +If there is an individual closer than the specified distance, the velocity (Velocity) of that individual is added up, the number of the individual is counted at the same time, and the velocity of the close individual (that is, the direction in which it is facing) is calculated by those values. Calculate the average of. After the calculation with all the individuals is completed, the steering force is calculated from the relationship with the current speed using the value.
+ +If there is an individual closer than the specified distance, the position of that individual is added, the number of the individual is counted at the same time, and the average (center of gravity) of the position of the close individual is calculated from those values. Furthermore, the vector toward that point is found, and the steering force is found in relation to the current speed.
+ +The IntegrateCS kernel updates the speed and position of the Boid based on the steering force obtained by ForceCS (). In AvoidWall, when you try to go out of the specified area, it applies a reverse force to stay inside the area.
+ +This script draws the results obtained from the Boids simulation on the specified mesh.
+BoidsRender.cs
+using System.Collections;
+using System.Collections.Generic;
+using UnityEngine;
+
+// Guarantee that the GPU Boids component is attached to the GameObject
+[RequireComponent(typeof(GPUBoids))]
+public class BoidsRender : MonoBehaviour
+{
+ #region Paremeters
+ // Scale of the Boids object to draw
+ public Vector3 ObjectScale = new Vector3(0.1f, 0.2f, 0.5f);
+ #endregion
+
+ #region Script References
+ // Reference GPUBoids script
+ public GPUBoids GPUBoidsScript;
+ #endregion
+
+ #region Built-in Resources
+ // Reference to the mesh to draw
+ public Mesh InstanceMesh;
+ // Reference material for drawing
+ public Material InstanceRenderMaterial;
+ #endregion
+
+ #region Private Variables
+ // Arguments for GPU instancing (for transfer to ComputeBuffer)
+ // Number of indexes per instance, number of instances,
+ // Start index position, base vertex position, instance start position
+ uint[] args = new uint[5] { 0, 0, 0, 0, 0 };
+ // Argument buffer for GPU instancing
+ ComputeBuffer argsBuffer;
+ #endregion
+
+ #region MonoBehaviour Functions
+ void Start ()
+ {
+ // Initialize the argument buffer
+ argsBuffer = new ComputeBuffer(1, args.Length * sizeof(uint),
+ ComputeBufferType.IndirectArguments);
+ }
+
+ void Update ()
+ {
+ // Instancing the mesh
+ RenderInstancedMesh();
+ }
+
+ void OnDisable()
+ {
+ // Release the argument buffer
+ if (argsBuffer != null)
+ argsBuffer.Release();
+ argsBuffer = null;
+ }
+ #endregion
+
+ #region Private Functions
+ void RenderInstancedMesh()
+ {
+ // The drawing material is Null, or the GPUBoids script is Null,
+ // Or if GPU instancing is not supported, do not process
+ if (InstanceRenderMaterial == null || GPUBoidsScript == null ||
+ !SystemInfo.supportsInstancing)
+ return;
+
+ // Get the number of indexes of the specified mesh
+ uint numIndices = (InstanceMesh != null) ?
+ (uint)InstanceMesh.GetIndexCount(0) : 0;
+ // Set the number of indexes of the mesh
+ args[0] = numIndices;
+ // Set the number of instances
+ args[1] = (uint)GPUBoidsScript.GetMaxObjectNum();
+ argsBuffer.SetData (args); // Set in buffer
+
+ // Set the buffer containing Boid data to the material
+ InstanceRenderMaterial.SetBuffer("_BoidDataBuffer",
+ GPUBoidsScript.GetBoidDataBuffer());
+ // Set the Boid object scale
+ InstanceRenderMaterial.SetVector("_ObjectScale", ObjectScale);
+ // define the boundary area
+ var bounds = new Bounds
+ (
+ GPUBoidsScript.GetSimulationAreaCenter(), // 中心
+ GPUBoidsScript.GetSimulationAreaSize() // サイズ
+ );
+ // GPU instantiate and draw mesh
+ Graphics.DrawMeshInstancedIndirect
+ (
+ InstanceMesh, // Instancing mesh
+ 0, // submesh index
+ InstanceRenderMaterial, // Material to draw
+ bounds, // realm domain
+ argsBuffer // Argument buffer for GPU instancing
+ );
+ }
+ #endregion
+}
+
+
+When you want to draw a large number of the same mesh, if you create GameObjects one by one, the draw call will increase and the drawing load will increase. In addition, the cost of transferring the calculation result of ComputeShader to the CPU memory is high, and if you want to perform processing at high speed, it is necessary to pass the calculation result of GPU as it is to the drawing shader and perform drawing processing. With Unity's GPU instancing, you can draw a large number of identical meshes at high speed with few draw calls without creating unnecessary GameObjects.
+ +This script uses the Graphics.DrawMeshInstancedIndirect method to draw a mesh with GPU instancing. This method allows you to pass the number of mesh indexes and instances as a ComputeBuffer. This is useful if you want to read all instance data from the GPU.
+Start () initializes the argument buffer for this GPU instancing. Specify ComputeBufferType.IndirectArguments as the third argument of the constructor at initialization .
+RenderInstancedMesh () is performing mesh drawing with GPU instancing. The Boid data (velocity, position array) obtained by the Boids simulation is passed to the material InstanceRenderMaterial for drawing with the SetBuffer method.
+The Graphics.DrawMeshInstancedIndrect method is passed as an argument a buffer that stores data such as the mesh to be instantiated, the index of the submesh, the drawing material, the boundary data, and the number of instances.
+This method should normally be called within Update ().
+ +A shader for drawing that supports the Graphics.DrawMeshInstancedIndrect method.
+BoidsRender.shader
+Shader "Hidden/GPUBoids/BoidsRender"
+{
+ Properties
+ {
+ _Color ("Color", Color) = (1,1,1,1)
+ _MainTex ("Albedo (RGB)", 2D) = "white" {}
+ _Glossiness ("Smoothness", Range(0,1)) = 0.5
+ _Metallic ("Metallic", Range(0,1)) = 0.0
+ }
+ SubShader
+ {
+ Tags { "RenderType"="Opaque" }
+ LOD 200
+
+ CGPROGRAM
+ #pragma surface surf Standard vertex:vert addshadow
+ #pragma instancing_options procedural:setup
+
+ struct Input
+ {
+ float2 uv_MainTex;
+ };
+ // Boid structure
+ struct BoidData
+ {
+ float3 velocity; // velocity
+ float3 position; // position
+ };
+
+ #ifdef UNITY_PROCEDURAL_INSTANCING_ENABLED
+ // Boid data structure buffer
+ StructuredBuffer<BoidData> _BoidDataBuffer;
+ #endif
+
+ sampler2D _MainTex; // Texture
+
+ half _Glossiness; // Gloss
+ half _Metallic; // Metal characteristics
+ fixed4 _Color; // Color
+
+ float3 _ObjectScale; // Boid object scale
+
+ // Convert Euler angles (radians) to rotation matrix
+ float4x4 eulerAnglesToRotationMatrix(float3 angles)
+ {
+ float ch = cos(angles.y); float sh = sin(angles.y); // heading
+ float ca = cos(angles.z); float sa = sin(angles.z); // attitude
+ float cb = cos(angles.x); float sb = sin(angles.x); // bank
+
+ // RyRxRz (Heading Bank Attitude)
+ return float4x4(
+ ch * ca + sh * sb * sa, -ch * sa + sh * sb * ca, sh * cb, 0,
+ cb * sa, cb * ca, -sb, 0,
+ -sh * ca + ch * sb * sa, sh * sa + ch * sb * ca, ch * cb, 0,
+ 0, 0, 0, 1
+ );
+ }
+
+ // Vertex shader
+ void vert(inout appdata_full v)
+ {
+ #ifdef UNITY_PROCEDURAL_INSTANCING_ENABLED
+
+ // Get Boid data from instance ID
+ BoidData boidData = _BoidDataBuffer[unity_InstanceID];
+
+ float3 pos = boidData.position.xyz; // Get the position of Boid
+ float3 scl = _ObjectScale; // Get the Boid scale
+
+ // Define a matrix to convert from object coordinates to world coordinates
+ float4x4 object2world = (float4x4)0;
+ // Substitute scale value
+ object2world._11_22_33_44 = float4(scl.xyz, 1.0);
+ // Calculate the rotation about the Y axis from the velocity
+ float rotY =
+ atan2(boidData.velocity.x, boidData.velocity.z);
+ // Calculate the rotation about the X axis from the velocity
+ float rotX =
+ -asin(boidData.velocity.y / (length(boidData.velocity.xyz)
+ + 1e-8)); // 0 division prevention
+ // Find the rotation matrix from Euler angles (radians)
+ float4x4 rotMatrix =
+ eulerAnglesToRotationMatrix (float3 (rotX, rotY, 0));
+ // Apply rotation to matrix
+ object2world = mul(rotMatrix, object2world);
+ // Apply position (translation) to matrix
+ object2world._14_24_34 + = pos.xyz;
+
+ // Coordinate transformation of vertices
+ v.vertex = mul(object2world, v.vertex);
+ // Convert normals to coordinates
+ v.normal = normalize(mul(object2world, v.normal));
+ #endif
+ }
+
+ void setup()
+ {
+ }
+
+ // Surface shader
+ void surf (Input IN, inout SurfaceOutputStandard o)
+ {
+ fixed4 c = tex2D (_MainTex, IN.uv_MainTex) * _Color;
+ o.Albedo = c.rgb;
+ o.Metallic = _Metallic;
+ o.Smoothness = _Glossiness;
+ }
+ ENDCG
+ }
+ FallBack "Diffuse"
+}
+
+
+#pragma surface surf Standard vertex: vert addshadow In this part, surf () is specified as the surface shader, Standard is specified as the lighting model, and vert () is specified as the custom vertex shader.
+You can tell Unity to generate an additional variant for when using the Graphics.DrawMeshInstancedIndirect method by writing procedural: FunctionName in the #pragma instancing_options directive, specified by FunctionName at the beginning of the vertex shader stage. The function will be called. If you look at the official sample (https://docs.unity3d.com/ScriptReference/Graphics.DrawMeshInstancedIndirect.html) etc., in this function, based on the position, rotation and scale of each instance, of the unity_ObjectToWorld matrix, unity_WorldToObject matrix I am rewriting, but in this sample program, I receive Boids data in the vertex shader and perform coordinate conversion of vertices and normals (I do not know if it is good ...). Therefore, nothing is described in the specified setup function.
+ +Describe the processing to be performed on the vertices of the mesh passed to the shader in the vertex shader (Vertex Shader).
+You can get a unique ID for each instance by unity_InstanceID. By specifying this ID in the index of the array of StructuredBuffer declared as a buffer of Boid data, you can get Boid data unique to each instance.
+ +From the Boid's velocity data, calculate the value of rotation that points in the direction of travel. For the sake of intuitive handling, we will use Euler angles for rotation. If you think of a Boid as a flying object, the three-axis rotations of the coordinates relative to the object are called pitch, yaw, and roll, respectively.
+
++Figure 3.4: Axis and Rotation Names +
+First, from the velocity about the Z axis and the velocity about the X axis, find the yaw (which direction is facing the horizontal plane) using the atan2 method that returns the arctangent.
+
++Figure 3.5: Relationship between speed and angle (yaw) +
+Next, from the magnitude of the velocity and the ratio of the velocity with respect to the Y axis, the pitch (slope up and down) is calculated using the asin method that returns an inverse sine (arc sine). If the speed of the Y axis is small among the speeds of each axis, the amount of rotation is weighted so that there is little change and the speed remains horizontal.
+
++Figure 3.6: Relationship between velocity and angle (pitch) +
+Coordinate transformation processes such as movement, rotation, and scaling can be collectively represented by a single matrix. Defines a 4x4 matrix object2world.
+ +First, substitute the scale value. The matrix S that scales by \ rm S_x S_y S_z {} on each of the XYZ axes is expressed as follows.
+\rm
+S=
+\left(
+\begin{array}{cccc}
+\rm S_x & 0 & 0 & 0 \\
+0 & \rm S_y & 0 & 0 \\
+0 & 0 & \rm S_z & 0 \\
+0 & 0 & 0 & 1
+\end{array}
+\right)
+
+Variables of type float4x4 in HLSL can specify specific elements of the matrix using a swizzle such as ._11_22_33_44. By default, the components are arranged as follows:
+Form 3.1:
+| 11 | 12 | 13 | 14 |
|---|---|---|---|
| 21 | 22 | 23 | 24 |
| 31 | 32 | 33 | 34 |
| 41 | 42 | 43 | 44 |
Here, substitute the XYZ scale values for 11, 22, 33, and 1 for 44.
+ +Then apply the rotation. If the rotation \ rm R_x R_y R_z {} for each of the XYZ axes is represented by a matrix,
+\rm
+R_x(\phi)=
+\left(
+\begin{array}{cccc}
+1 & 0 & 0 & 0 \\
+0 & \rm cos(\phi) & \rm -sin(\phi) & 0 \\
+0 & \rm sin(\phi) & \rm cos(\phi) & 0 \\
+0 & 0 & 0 & 1
+\end{array}
+\right)
+
+\rm
+R_y(\theta)=
+\left(
+\begin{array}{cccc}
+\rm cos(\theta) & 0 & \rm sin(\theta) & 0 \\
+0 & 1 & 0 & 0 \\
+\rm -sin(\theta) & 0 & \rm cos(\theta) & 0 \\
+0 & 0 & 0 & 1
+\end{array}
+\right)
+
+\rm
+R_z (\ psi) =
+\left(
+\begin{array}{cccc}
+\rm cos(\psi) & \rm -sin(\psi) & 0 & 0 \\
+\rm sin(\psi) & \rm cos(\psi) & 0 & 0 \\
+0 & 0 & 1 & 0 \\
+0 & 0 & 0 & 1
+\end{array}
+\right)
+
+Combine this into a matrix. At this time, the behavior at the time of rotation changes depending on the order of the axes of rotation to be combined, but if you combine in this order, it should be similar to the standard rotation of Unity.
+
++Figure 3.7: Synthesis of rotation matrix +
+The rotation is applied by finding the product of the rotation matrix thus obtained and the matrix to which the above scale is applied.
+ +Then apply translation. Assuming that \ rm T_x T_y T_z {} translates to each axis , the matrix is expressed as follows.
+\ rm T =
+\left(
+\begin{array}{cccc}
+1 & 0 & 0 & \rm T_x \\
+0 & 1 & 0 & \rm T_y \\
+0 & 0 & 1 & \rm T_z \\
+0 & 0 & 0 & 1
+\end{array}
+\right)
+
+This translation can be applied by adding the Position data for each of the XYZ axes to the 14, 24, and 34 components.
+By applying the matrix obtained by these calculations to the vertices and normals, the Boid transform data is reflected.
+ +I think that objects that move like a group like this are drawn.
+
++Figure 3.8: Execution result +
+The implementation introduced in this chapter uses the minimum Boids algorithm, but it has different characteristics such as a large group or a number of small colonies even by adjusting the parameters. I think it will move. In addition to the basic rules of conduct shown here, there are other rules to consider. For example, if this is a school of fish and foreign enemies that prey on them appear, they will naturally move away, and if there are obstacles such as terrain, the fish will avoid hitting them. When thinking about vision, the field of view and accuracy differ depending on the species of animal, and I think that if you exclude other individuals outside the field of view from the calculation process, it will be closer to the actual one. The characteristics of movement also change depending on the environment such as whether it flies in the sky, moves in water, or moves on land, and the characteristics of the motor organs for locomotion. You should also pay attention to individual differences.
+Parallel processing by GPU can calculate more individuals than calculation by CPU, but basically the calculation with other individuals is done by brute force, and the calculation efficiency is not very good. To do this, the calculation cost is improved by improving the efficiency of searching for nearby individuals, such as registering individuals in an area divided by a grid or block according to their position and performing calculation processing only for individuals existing in adjacent areas. Can be suppressed.
+There is still plenty of room for improvement, and by applying appropriate implementation and behavioral rules, we will be able to express even more beautiful, powerful, dense and tasty group movements. I want to be able to do it.
+ +
|
![]() |
|
In this chapter, we will explain the fluid simulation by the lattice method using Compute Shader.
+ +https://github.com/IndieVisualLab/UnityGraphicsProgramming/
+It is stored in Assets / StabeFluids of.
+ +In this chapter, we will explain the fluid simulation by the lattice method and the calculation method and understanding of mathematical formulas necessary for realizing them. First of all, what is the grid method? In order to explore its meaning, let's take a closer look at how to analyze "flow" in fluid mechanics.
+ +Fluid mechanics is characterized by formulating a natural phenomenon, "flow," and making it computable. How can this "flow" be quantified and analyzed?
If you go straight to it, you can quantify it by deriving the "flow velocity when the time advances for a moment". To put it a little mathematically, it can be rephrased as an analysis of the amount of change in the flow velocity vector when differentiating with time.
However, there are two possible methods for analyzing this flow.
One is to measure the flow velocity vector of each fixed lattice space by dividing the hot water in the bath into a grid when imagining the hot water in the bath.
And the other is to float a duck in the bath and analyze the movement of the duck itself. Of these two methods, the former is called the "Euler's method" and the latter is called the "Lagrange's method".
Now, let's get back to computer graphics. There are several simulation methods for fluid simulation, such as "Euler's method" and "Lagrange's method", but they can be roughly divided into the following three types.
+As you can imagine from the meaning of Chinese characters, the grid method creates a grid-like "field" when simulating the flow, like the "Euler's method", and when it is differentiated with time, it is It is a method of simulating the speed of each grid. In addition, the particle method is a method of simulating the advection of the particles themselves, focusing on the particles, such as the "Lagrange method".
Along with the lattice method and particle method, there are areas of strength and weakness in each other.
The lattice method is good at calculating pressure, viscosity, diffusion, etc. in fluid simulation, but not good at advection calculation.
On the contrary, the particle method is good at calculating advection. (You can imagine
these strengths and weaknesses when you think of how to analyze Euler's method and Lagrange's method.) To supplement these, the lattice method + particle method represented by the FLIP method. There are also methods that complement each other's areas of expertise.
In this paper, we will explain the implementation method of fluid simulation and the necessary mathematical formulas in the simulation based on Stable Fluids, which is a paper on incompressible viscous fluid simulation in the lattice method by Jon Stam published at SIGGRAPH 1999. ..
+ +First, let's look at the Navier-Stokes equation in the grid method.
+\dfrac {\partial \overrightarrow {u}} {\partial t}=-\left( \overrightarrow {u} \cdot \nabla \right) \overrightarrow {u} + \nu \nabla ^{2} \overrightarrow {u} + \overrightarrow{f}
+
+\dfrac {\partial \rho} {\partial t}=-\left( \overrightarrow {u} \cdot \nabla \right) \rho + \kappa \nabla ^{2} \rho + S
+
+\nabla \cdot \overrightarrow{u} = 0
+
+Of the above, the first equation represents the velocity field and the second represents the density field. The third is the "continuity equation (conservation of mass)". Let's unravel these three formulas one by one.
+ +First, let's unravel the "continuity equation (conservation of mass)", which is short as an equation and works as a condition when simulating an "incompressible" fluid.
When simulating a fluid, it is necessary to make a clear distinction between compressible and incompressible objects. For example, if the target is a gas whose density changes with pressure, it will be a compressible fluid. Conversely, objects with a constant density, such as water, are incompressible fluids.
Since this chapter deals with incompressible fluid simulations, the divergence of each cell in the velocity field should be kept at zero. That is, it offsets the inflow and outflow of the velocity field and keeps it at zero. If there is an inflow, it will flow out, so the flow velocity will propagate. This condition can be expressed by the following equation as a continuity equation (conservation of mass).
\nabla \cdot \overrightarrow{u} = 0
+
+The above means that "divergence is 0". First, let's check the formula of "divergence".
+ +\nabla \cdot \overrightarrow{u} = \nabla \cdot (u, v) = \dfrac{\partial u}{\partial x} + \dfrac{\partial v}{\partial y}
+
+\ nabla (nabla operator) is called the vector differential operator. For example, assuming that the vector field is two-dimensional, the partial differential of \ left (\ dfrac {\ partial} {\ partial x} _, \ dfrac {\ partial} {\ partial y} \ right) is calculated as shown in the figure. It acts as an operator that simplifies the notation of partial differential when taking. Since the \ nabla operator is an operator, it has no meaning by itself, but the operation content changes depending on whether the expression to be combined together is an inner product, an outer product, or just a function such as \ nabla f .
This time, let's explain "divergence" which takes the inner product of partial differential. First, let's see why this formula means "divergence".
In order to understand the divergence, let's first cut out one cell in the lattice space as shown below.
+
++Figure 4.1: Extracting cells in the differential interval (Δx, Δy) from the vector field +
+Divergence is the calculation of how many vectors flow out and flow into one cell of a vector field. The outflow is + and the inflow is-.
+As mentioned above, the divergence is the amount of change between the specific point x in the x direction and the slightly advanced \ Delta x when looking at the partial differential when the cell of the vector field is cut out, and the specific amount in the y direction. It can be calculated by the inner product of the amount of change between the point y and the slightly advanced \ Delta y . The reason why the outflow can be obtained by the inner product with the partial differential can be proved by performing a differential operation on the above figure.
+\ frac {i (x + \ Delta x, y) \ Delta y - i (x, y) \ Delta y + j (x, y + \ Delta y) \ Delta x - j (x, y) \ Delta x } {\ Delta x \ Delta y}
+
+ = \ frac {i (x + \ Delta x, y) - i (x, y)} {\ Delta x} + \ frac {j (x, y + \ Delta y) - j (x, y)} { \ Delta Y}
+
+Taking the limit from the above formula,
+\ lim _ {\ Delta x \ to 0} \ frac {i (x + \ Delta x, y) - i (x, y)} {\ Delta x} + \ lim _ {\ Delta y \ to 0} \ frac {j (x, y + \ Delta y) - j (x, y)} {\ Delta y} = \ dfrac {\ partial i} {\ partial x} + \ dfrac {\ partial j} {\ partial y }
+
+By doing so, we can see that the final equation is the equation of the inner product with the partial derivative.
+ +Next, I will explain the velocity field, which is the main body of the lattice method. Before that, in implementing the Navier-Stokes equation of the velocity field, let's confirm the gradient and the Laplacian in addition to the divergence confirmed earlier.
+ +\nabla f(x, y) = \left( \dfrac{\partial f}{\partial x}_,\dfrac{\partial f}{\partial y}\right)
+
+\ nabla f (grad \ f) is the formula for finding the gradient. The meaning is which vector is finally directed by sampling the coordinates slightly advanced in each partial differential direction with the function f and synthesizing the obtained values in each partial differential direction. I will. In other words, it is possible to calculate a vector that points in the direction of the larger value when partially differentiated.
+ +\Delta f = \nabla^2 f = \nabla \cdot \nabla f = \frac{\partial^2 f}{\partial x^2} + \frac{\partial^2 f}{\partial y^2}
+
+The Laplacian is represented by a symbol with the nabla turned upside down. (Same as delta, but read from the context and make sure you don't make a mistake.) Also write
\ nabla ^ 2 f or \ nabla \ cdot \ nabla f and it is calculated as the second derivative.
Also, if you think about it by disassembling it, you can take the form of finding the divergence by taking the gradient of the function.
In terms of meaning, in the vector field, the part concentrated in the gradient direction has a lot of inflow, so when the divergence is taken, the part with a low gradient has a lot of springing out, so when the divergence is taken, it becomes +. I can imagine that.
Laplacian operators include scalar Laplacian and vector Laplacian, and when acting on a vector field, gradient, divergence, and rotation (cross product of ∇ and vector) are used.
\nabla^2 \overrightarrow{u} = \nabla \nabla \cdot \overrightarrow{u} - \nabla \times \nabla \times \overrightarrow{u}
+
+However, only in the case of the Cartesian coordinate system, the gradient and divergence can be obtained for each component of the vector and can be obtained by combining them.
+\nabla^2 \overrightarrow{u} = \left(
+\dfrac{\partial ^2 u_x}{\partial x^2}+\dfrac{\partial ^2 u_x}{\partial y^2}+\dfrac{\partial ^2 u_x}{\partial z^2}_,
+\ dfrac {\ partial ^ 2 u_y} {\ partial x ^ 2} + \ dfrac {\ partial ^ 2 u_y} {\ partial y ^ 2} + \ dfrac {\ partial ^ 2 u_y} {\ partial z ^ 2} _,
+\dfrac{\partial ^2 u_z}{\partial x^2}+\dfrac{\partial ^2 u_z}{\partial y^2}+\dfrac{\partial ^2 u_z}{\partial z^2}
+\right)
+
+This completes the confirmation of the mathematical formulas required to solve the Navier-Stokes equation in the grid method. From here, let's look at the velocity field equation for each term.
+ +\dfrac {\partial \overrightarrow {u}} {\partial t}=-\left( \overrightarrow {u} \cdot \nabla \right) \overrightarrow {u} + \nu \nabla ^{2} \overrightarrow {u} + \overrightarrow {f}
+
+Of the above, \ overrightarrow {u} is the flow velocity, \ nu is the kinematic viscosity, and \ overrightarrow {f} is the external force (force).
You can see that the left side is the flow velocity when the partial differential is taken with respect to time. On the right side, the first term is the advection term, the second term is the diffusion viscosity term, the third term is the pressure term, and the fourth term is the external force term.
Even if these can be done collectively at the time of calculation, it is necessary to implement them in steps at the time of implementation.
First of all, as a step, if you do not receive an external force, you cannot make a change under the initial conditions, so I would like to start with the external force term in the fourth term.
This is simply the part that adds the vectors from the outside. In other words, when the velocity field is 0 in the initial condition, the vector is added to the corresponding ID of RWTexture2D from the UI as the starting point of the vector or some event.
The kernel of the external force term of the compute shader is implemented as follows. Also, describe the definitions of each coefficient and buffer that will be used in the compute shader.
float visc; //Dynamic viscosity coefficient
+float dt; // delta time
+float velocityCoef; //external force coefficient of velocity field
+float densityCoef; //Density outside pressure coefficient
+
+// xy = velocity, z = density, fluid solver to pass to drawing shader
+RWTexture2D<float4> solver;
+//density field, density field
+RWTexture2D<float> density;
+//velocity field, velocity field
+RWTexture2D<float2> velocity;
+// xy = for vel, z = for dens. when project, x = p, y = div
+// Save the buffer one step before and the temporary buffer when saving the mass
+RWTexture2D<float3> prev;
+// xy = velocity source, z = density source External force input buffer
+Texture2D source;
+
+[numthreads(THREAD_X, THREAD_Y, THREAD_Z)]
+void AddSourceVelocity(uint2 id : SV_DispatchThreadID)
+{
+ uint w, h;
+ velocity.GetDimensions(w, h);
+
+ if (id.x < w && id.y < h)
+ {
+ velocity[id] += source[id].xy * velocityCoef * dt;
+ prev[id] = float3(source[id].xy * velocityCoef * dt, prev[id].z);
+ }
+}
+
+The next step is to implement the second term, the diffusion viscosity term.
+ +\nu \nabla ^{2} \overrightarrow {u}
+
+When there are values on the left and right of the \ nabla operator and \ Delta operator, there is a rule that "acts only on the right element", so in this case, leave the kinematic viscosity coefficient once and leave the vector Laplacian part. Think first.
With vector Laplacian for the flow velocity \ overright arrow {u} , the gradient and divergence of each component of the vector are taken and combined, and the flow velocity is diffused adjacently. By multiplying it by the kinematic viscosity coefficient, the momentum of diffusion is adjusted.
Here, since the gradient of each component of the flow velocity is taken and diffused, inflow from the adjacency and outflow to the adjacency occur, and the phenomenon that the vector received in step 1 affects the adjacency can be understood. think.
In terms of mounting, some ingenuity is required. If implemented according to the formula, if the diffusivity obtained by multiplying the viscosity coefficient by the differential time / number of lattices becomes high, vibration will occur, convergence will not be achieved, and the simulation itself will eventually diverge.
Iterative methods such as the Gauss-Seidel method, Jacobi method, and SOR method are used here to make the diffusion stable. Here, let's simulate with the Gauss-Seidel method.
The Gauss-Seidel method is a method of converting a formula into a linear equation consisting of unknowns for its own cell, using the calculated value immediately at the next iteration, and chaining it to converge to an approximate answer. The higher the number of iterations, the more accurate the values will converge, but graphics in real-time rendering require better frame rates and aesthetics rather than accurate results, so iterations are machine. Adjust for performance and appearance.
#define GS_ITERATE 4
+
+[numthreads(THREAD_X, THREAD_Y, THREAD_Z)]
+void DiffuseVelocity(uint2 id : SV_DispatchThreadID)
+{
+ uint w, h;
+ velocity.GetDimensions(w, h);
+
+ if (id.x < w && id.y < h)
+ {
+ float a = dt * visc * w * h;
+
+ [unroll]
+ for (int k = 0; k < GS_ITERATE; k++) {
+ velocity[id] = (prev[id].xy + a * (
+ velocity[int2(id.x - 1, id.y)] +
+ velocity[int2(id.x + 1, id.y)] +
+ velocity[int2(id.x, id.y - 1)] +
+ velocity[int2(id.x, id.y + 1)]
+ )) / (1 + 4 * a);
+ SetBoundaryVelocity(id, w, h);
+ }
+ }
+}
+
+The SetBoundaryVelocity function above is a method for boundaries. Please refer to the repository for details.
+ +\nabla \cdot \overrightarrow{u} = 0
+
+Let's go back to the conservation of mass side before proceeding with the section. In the process so far, the force received in the external force term is diffused in the velocity field, but at present, the mass of each cell is not preserved, and the mass is in the place where it keeps springing out and the place where there is a lot of inflow. Is in an unsaved state.
As in the equation above, you must save the mass and bring the divergence of each cell to 0, so let's save the mass here.
In addition, when performing the mass conservation step with Compute Shader, the field must be fixed because the partial differential operation with the adjacent thread is performed. It was expected to speed up if the partial differential operation could be performed in the group shared memory, but when the partial differential was taken from another group thread, the value could not be obtained and the result was dirty, so here is a buffer. While confirming, proceed in 3 steps.
Divergence calculation from velocity field> Poisson's equation is calculated by Gauss-Seidel method> Subtract to velocity field, divide the
kernel into 3 steps of conservation of mass, and bring it to conservation of mass while determining the field. In addition, SetBound ~ system is a method call for the boundary.
//Quality preservation Step1.
+// In step1, calculate the divergence from the velocity field
+[numthreads(THREAD_X, THREAD_Y, THREAD_Z)]
+void ProjectStep1(uint2 id : SV_DispatchThreadID)
+{
+ uint w, h;
+ velocity.GetDimensions(w, h);
+
+ if (id.x < w && id.y < h)
+ {
+ float2 uvd;
+ uvd = float2(1.0 / w, 1.0 / h);
+
+ prev[id] = float3(0.0,
+ -0.5 *
+ (uvd.x * (velocity[int2(id.x + 1, id.y)].x -
+ velocity[int2(id.x - 1, id.y)].x)) +
+ (uvd.y * (velocity[int2(id.x, id.y + 1)].y -
+ velocity[int2(id.x, id.y - 1)].y)),
+ prev [id] .z);
+
+ SetBoundaryDivergence(id, w, h);
+ SetBoundaryDivPositive(id, w, h);
+ }
+}
+
+//Quality preservation Step2.
+// In step2, the Poisson equation is solved by the Gauss-Seidel method from the divergence obtained in step1.
+[numthreads(THREAD_X, THREAD_Y, THREAD_Z)]
+void ProjectStep2(uint2 id : SV_DispatchThreadID)
+{
+ uint w, h;
+
+ velocity.GetDimensions(w, h);
+
+ if (id.x < w && id.y < h)
+ {
+ for (int k = 0; k < GS_ITERATE; k++)
+ {
+ prev[id] = float3(
+ (prev[id].y + prev[uint2(id.x - 1, id.y)].x +
+ prev[uint2(id.x + 1, id.y)].x +
+ prev[uint2(id.x, id.y - 1)].x +
+ prev[uint2(id.x, id.y + 1)].x) / 4,
+ prev[id].yz);
+ SetBoundaryDivPositive(id, w, h);
+ }
+ }
+}
+
+//Quality preservation Step3.
+// In step3, set ∇ ・ u = 0.
+[numthreads(THREAD_X, THREAD_Y, THREAD_Z)]
+void ProjectStep3(uint2 id : SV_DispatchThreadID)
+{
+ uint w, h;
+
+ velocity.GetDimensions(w, h);
+
+ if (id.x < w && id.y < h)
+ {
+ float velX, velY;
+ float2 uvd;
+ uvd = float2(1.0 / w, 1.0 / h);
+
+ velX = velocity[id].x;
+ velY = velocity[id].y;
+
+ velX -= 0.5 * (prev[uint2(id.x + 1, id.y)].x -
+ prev[uint2(id.x - 1, id.y)].x) / uvd.x;
+ velY -= 0.5 * (prev[uint2(id.x, id.y + 1)].x -
+ prev[uint2(id.x, id.y - 1)].x) / uvd.y;
+
+ velocity[id] = float2(velX, velY);
+ SetBoundaryVelocity(id, w, h);
+ }
+}
+
+The velocity field is now in a state of conservation of mass. Since the inflow occurs at the place where the outflow occurs and the outflow occurs from the place where there is a lot of inflow, it is expressed like a fluid.
+ +-\left( \overrightarrow {u} \cdot \nabla \right) \overrightarrow {u}
+
+Lagrange's method is used for the advection term, but the work of back tracing the velocity field one step before and moving the value of the place where the velocity vector is subtracted from the corresponding cell to the current location Do this for each cell. When backtraced, it does not go back to the place where it fits exactly in the grid, so when advection, linear interpolation with the neighboring 4 cells is performed and the correct value is advected.
+[numthreads(THREAD_X, THREAD_Y, THREAD_Z)]
+void AdvectVelocity(uint2 id : SV_DispatchThreadID)
+{
+ uint w, h;
+ density.GetDimensions (w, h);
+
+ if (id.x < w && id.y < h)
+ {
+ int ddx0, ddx1, ddy0, ddy1;
+ float x, y, s0, t0, s1, t1, dfdt;
+
+ dfdt = dt * (w + h) * 0.5;
+
+ // Back trace point index.
+ x = (float)id.x - dfdt * prev[id].x;
+ y = (float)id.y - dfdt * prev[id].y;
+ // Clamp so that the points are within the simulation range.
+ clamp(x, 0.5, w + 0.5);
+ clamp(y, 0.5, h + 0.5);
+ // Determining cells near the back trace point.
+ ddx0 = floor(x);
+ ddx1 = ddx0 + 1;
+ ddy0 = floor(y);
+ Dy1 = Dy0 + 1;
+ // Save the difference for linear interpolation with neighboring cells.
+ s1 = x - ddx0;
+ s0 = 1.0 - s1;
+ t1 = y - ddy0;
+ t0 = 1.0 - t1;
+
+ // Backtrace, take the value one step before by linear interpolation with the neighborhood, and substitute it for the current velocity field.
+ velocity[id] = s0 * (t0 * prev[int2(ddx0, ddy0)].xy +
+ t1 * prev[int2(ddx0, ddy1)].xy) +
+ s1 * (t0 * prev[int2(ddx1, ddy0)].xy +
+ t1 * prev[int2(ddx1, ddy1)].xy);
+ SetBoundaryVelocity(id, w, h);
+ }
+}
+
+Next, let's look at the density field equation.
+\dfrac {\partial \rho} {\partial t}=-\left( \overrightarrow {u} \cdot \nabla \right) \rho + \kappa \nabla ^{2} \rho + S
+
+Of the above, \ overrightarrow {u} is the flow velocity, \ kappa is the diffusion coefficient, ρ is the density, and S is the external pressure.
The density field is not always necessary, but by placing the pixels on the screen diffused by the density field on each vector when the velocity field is calculated, it becomes possible to express a more fluid-like expression that flows while melting. I will.
As some of you may have noticed by looking at the formula of the density field, the flow is exactly the same as the velocity field, the difference is that the vector is a scalar and the kinematic viscosity coefficient \ nu is the diffusion coefficient. There are only three points, one that is \ kappa and the other that does not use the law of conservation of mass.
Since the density field is a field of change in density, it does not need to be incompressible and does not need to be conserved by mass. In addition, the kinematic viscosity coefficient and the diffusion coefficient have the same usage as coefficients.
Therefore, it is possible to implement the density field by making a kernel other than the mass conservation law of the kernel used in the velocity field earlier by lowering the dimension. I will not explain the density field on paper, but please refer to the density field as it is also implemented in the repository.
Fluids can be simulated by using the above velocity field, density field, and conservation of mass law, but let's take a look at the simulation steps at the end.
+The above is the simulation step of StableFluid.
+ +By executing and dragging on the screen with the mouse, it is possible to cause the following fluid simulation.
+
++Figure 4.2: Execution example +
+Fluid simulation, unlike pre-rendering, is a heavy field for real-time game engines like Unity. However, due to the improvement of GPU computing power, it has become possible to produce FPS that can withstand even a certain resolution if it is two-dimensional. Also, if you try to implement the Gauss-Seidel iterative method, which is a heavy load for the GPU that came out on the way, with another process, or substitute the speed field itself with curl noise, etc. It will be possible to express fluids with lighter calculations.
+If you have read this chapter and are interested in fluids even a little, please try "Fluid simulation by particle method" in the next chapter. Since you can approach the fluid from a different angle than the grid method, you can experience the depth of fluid simulation and the fun of mounting.
+ +
|
![]() |
|
In the previous chapter, we explained how to create a fluid simulation using the grid method. In this chapter, we will use the particle method, which is another fluid simulation method, especially the SPH method, to express the movement of the fluid. Please note that there are some inadequate expressions as the explanation is given in a slightly chewed manner.
+ +There are Euler's viewpoint and Lagrange's viewpoint as the method of observing the movement of fluid. Euler's viewpoint is to fix observation points on the fluid at equal intervals and analyze the movement of the fluid at the observation points. On the other hand, the Lagrange viewpoint is to float an observation point that moves along the flow of fluid and observe the movement of the fluid at that observation point ( see Fig. 5.1 ). Basically, the fluid simulation method using Euler's viewpoint is called the lattice method, and the fluid simulation method using Lagrange's viewpoint is called the particle method.
+
++Figure 5.1: Left: Euler-like, Right: Lagrange-like +
+The method of calculating the derivative differs between the Euler perspective and the Lagrange perspective. First , the physical quantity * 1 expressed from Euler's point of view is shown below.
+[* 1] Physical quantities refer to observable velocities and masses. In short, you can think of it as something that has a unit.
\phi = \phi (\overrightarrow{x}, t)
+
+This means the physical quantity \ phi at the position \ overrightarrow {x} at time t . The time derivative of this physical quantity is
+ \frac{\partial \phi}{\partial t}
+
+Can be expressed as. Of course, this is a derivative from Euler's point of view because the position of the physical quantity is fixed by \ overridearrow {x} .
+[* 2] The movement of the observation point along the flow is called advection.
On the other hand, from the Lagrange perspective , the observation point itself is a function of time because it moves * 2 along the flow . Therefore, the observation point at \ overrightarrow {x} _0 in the initial state is at time t .
+ \overrightarrow{x}(\overrightarrow{x}_0, t)
+
+Exists in. Therefore, the notation of physical quantities is also
+ \phi = \phi (\overrightarrow{x}(\overrightarrow{x}_0, t), t)
+
+It will be. Looking at the current physical quantity and the amount of change in the physical quantity after \ Delta t seconds according to the definition of differentiation
+ \displaystyle \lim_{\Delta t \to 0} \frac{\phi(\overrightarrow{x}(\overrightarrow{x}_0, t + \Delta t), t + \Delta t) - \phi(\overrightarrow{x}(\overrightarrow{x}_0, t), t)}{\Delta t}
+
+ = \sum_i \frac{\partial \phi}{\partial x_i} \frac{\partial x_i}{\partial t} + \frac{\partial \phi}{\partial t}
+
+ = \left( \left( \begin{matrix}u_1\\u_2\\u_3\end{matrix} \right)
+ \cdot
+ \left( \begin{matrix} \frac{\partial}{\partial x_1}\\\frac{\partial}{\partial x_2}\\\frac{\partial}{\partial x_3} \end{matrix} \right)
+ + \frac{\partial}{\partial t}
+ \right) \phi\\
+
+ = (\frac{\partial}{\partial t} + \overrightarrow{u} \cdot {grad}) \phi
+
+It will be. This is the time derivative of the physical quantity considering the movement of the observation point. However, using this notation complicates the formula, so
+ \dfrac{D}{Dt} := \frac{\partial}{\partial t} + \overrightarrow{u} \cdot {grad}
+
+It can be shortened by introducing the operator. A series of operations that take into account the movement of observation points is called Lagrange differentiation. At first glance, it may seem complicated, but in the particle method where the observation points move, it is more convenient to express the equation from a Lagrangian point of view.
+ +A fluid can be considered to have no volume change if its velocity is well below the speed of sound. This is called the uncompressed condition of the fluid and is expressed by the following formula.
+ \nabla \cdot \overrightarrow{u} = 0
+
+This indicates that there is no gushing or disappearance in the fluid. Since the derivation of this equation involves a slightly complicated integral, the explanation is omitted * 3 . Think of it as "do not compress the fluid!"
+[* 3] It is explained in detail in "Fluid Simulation for Computer Graphics --Robert Bridson".
In the particle method, a fluid is divided into small particles and the movement of the fluid is observed from a Lagrangian perspective. This particle corresponds to the observation point in the previous section. Even if it is called the "particle method" in one word, many methods have been proposed at present, and it is famous
+And so on.
+ +First, the Navier-Stokes equation (hereinafter referred to as NS equation) in the particle method is described as follows.
+ \dfrac{D \overrightarrow{u}}{Dt} = -\dfrac{1}{\rho}\nabla p + \nu \nabla \cdot \nabla \overrightarrow{u} + \overrightarrow{g}
+ \label{eq:navier}
+
+The shape is a little different from the NS equation that came out in the grid method in the previous chapter. The advection term is completely omitted, but if you look at the relationship between the Euler derivative and the Lagrange derivative, you can see that it can be transformed into this shape well. In the particle method, the observation point is moved along the flow, so there is no need to consider the advection term when calculating the NS equation. The calculation of advection can be completed by directly updating the particle position based on the acceleration calculated by the NS equation.
+A real fluid is a collection of molecules, so it can be said to be a kind of particle system. However, it is impossible to calculate the actual number of molecules with a computer, so it is necessary to adjust it to a computable size. Each grain ( * 4 ) shown in Figure 5.2 represents a portion of the fluid divided by a computable size. Each of these grains can be thought of as having a mass m , a position vector \ overridearrow {x} , a velocity vector \ overridearrow {u} , and a volume V , respectively.
+
++Figure 5.2: Fluid particle approximation +
+For each of these grains , the acceleration is calculated by calculating the external force \ overridearrow {f} and solving the equation of motion m \ overridearrow {a} = \ overridearrow {f} , and how in the next time step. You can decide whether to move to.
+[* 4] Called'Blob'in English
As mentioned above, each particle moves by receiving some force from the surroundings, but what is that "force"? A simple example is gravity m \ overrightarrow {g}, but it should also receive some force from surrounding particles. These forces are explained below.
+ +The first force exerted on a fluid particle is pressure. The fluid always flows from the higher pressure side to the lower pressure side. If the same amount of pressure is applied from all directions, the force will be canceled and the movement will stop, so consider the case where the pressure balance is uneven. As mentioned in the previous chapter, by taking the gradient of the pressure scalar field, it is possible to calculate the direction with the highest pressure rise rate from the viewpoint of your own particle position. The direction in which the particles receive the force is from the one with the highest pressure to the one with the lowest pressure, so take the minus and become-\ nabla p . Also, since particles have a volume, the pressure applied to the particles is calculated by multiplying-\ nabla p by the volume of the particles * 5 . Finally, the result --V \ nabla p is derived.
+[* 5] Due to the uncompressed condition of the fluid, the integral of the pressure applied to the particles can be expressed simply by multiplying the volume.
The second force applied to fluid particles is viscous force. A viscous fluid is a fluid that is hard to deform, such as honey and melted chocolate. Applying the word viscous to the expression of the particle method, the velocity of a particle is easy to average the velocity of the surrounding particles . As mentioned in the previous chapter, the operation of averaging the surroundings can be performed using the Laplacian.
+Expressing the degree of viscosity using the kinematic viscosity coefficient \ mu , it can be expressed as \ mu \ nabla \ cdot \ nabla \ overridearrow {u} .
+ +Applying these forces to the equation of motion m \ overrightarrow {a} = \ overrightarrow {f} ,
+ m \dfrac{D\overrightarrow{u}}{Dt} = - V \nabla p + V \mu \nabla \cdot \nabla \overrightarrow{u} + m\overrightarrow{g}
+
+Here, since m is \ rho V, it is transformed ( V is canceled).
+ \rho \dfrac{D\overrightarrow{u}}{Dt} = - \nabla p + \mu \nabla \cdot \nabla \overrightarrow{u} + \rho \overrightarrow{g}
+
+Divide by both sides \ rho ,
+ \dfrac{D\overrightarrow{u}}{Dt} = - \dfrac{1}{\rho}\nabla p + \dfrac{\mu}{\rho} \nabla \cdot \nabla \overrightarrow{u} + \overrightarrow{g}
+
+Finally, the coefficient of viscosity term \ dfrac {\ mu} {\ rho} in \ nu by introducing,
+ \dfrac{D\overrightarrow{u}}{Dt} = - \dfrac{1}{\rho}\nabla p + \nu \nabla \cdot \nabla \overrightarrow{u} + \overrightarrow{g}
+
+Therefore, we were able to derive the NS equation mentioned at the beginning.
+ +In the particle method, the particles themselves represent the observation points of the fluid, so the calculation of the advection term is completed by simply moving the particle position. In the actual calculation of the time derivative, infinitely small time is used, but since infinity cannot be expressed by computer calculation, the differentiation is expressed using sufficiently small time \ Delta t . This is called the difference, and the smaller \ Delta t , the more accurate the calculation.
+Introducing the expression of difference for acceleration,
+ \overrightarrow{a} = \dfrac{D\overrightarrow{u}}{Dt} \equiv \frac{\Delta \overrightarrow{u}}{\Delta t}
+
+It will be. So the velocity increment \ Delta \ overrightarrow {u} is
+\Delta \overrightarrow{u} = \Delta t \overrightarrow{a}
+
+And also for the position increment,
+ \overrightarrow{u} = \frac{\partial \overrightarrow{x}}{\partial t} \equiv \frac{\Delta \overrightarrow{x}}{\Delta t}
+
+Than,
+\Delta \overrightarrow{x} = \Delta t \overrightarrow{u}
+
+It will be.
+By using this result, the velocity vector and position vector in the next frame can be calculated. Assuming that the particle velocity in the current frame is \ overrightarrow {u} _n , the particle velocity in the next frame is \ overrightarrow {u} _ {n + 1} .
+\overrightarrow{u}_{n+1} = \overrightarrow{u}_n + \Delta \overrightarrow{u} = \overrightarrow{u}_n + \Delta t \overrightarrow{a}
+
+Can be expressed as.
+Assuming that the particle position in the current frame is \ overridearrow {x} _n , the particle position in the next frame is \ overridearrow {x} _ {n + 1} .
+\overrightarrow{x}_{n+1} = \overrightarrow{x}_n + \Delta \overrightarrow{x} = \overrightarrow{x}_n + \Delta t \overrightarrow{u}
+
+Can be expressed as.
+This technique is called the forward Euler method. By repeating this every frame, it is possible to express the movement of particles at each time.
+ +In the previous section, we explained how to derive the NS equation in the particle method. Of course, these differential equations cannot be solved as they are on a computer, so some kind of approximation needs to be made. As a method, I will explain the SPH method that is often used in the CG field .
+The SPH method was originally used for collision simulation between celestial bodies in astrophysics , but it was also applied to fluid simulation in CG by Desbrun et al. * 6 in 1996 . In addition, parallelization is easy, and the current GPU can calculate a large number of particles in real time. In computer simulation, it is necessary to discretize continuous physical quantities and perform calculations, and the method of performing this discretization using a function called a weight function is called the SPH method.
+[*6] Desbrun and Cani, Smoothed Particles: A new paradigm for animating highly deformable bodies, Eurographics Workshop on Computer Animation and Simulation (EGCAS), 1996.
In the SPH method, each particle has an influence range, and the closer the particle is to other particles, the greater the influence of that particle. Figure 5.3 shows the extent of this effect .
+
++Figure 5.3: Two-dimensional weighting function +
+This function is called the weight function * 7 .
+[* 7] Normally, this function is also called a kernel function, but it is called this function to distinguish it from the kernel function in Compute Shader.
Assuming that the physical quantity in the SPH method is \ phi , it is discretized as follows using a weighting function.
+ \phi(\overrightarrow{x}) = \sum_{j \in N}m_j\frac{\phi_j}{\rho_j}W(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+N, m, \ rho, and h are the set of neighboring particles, the mass of the particles, the density of the particles, and the radius of influence of the weighting function, respectively. Also, the function W is the weighting function mentioned earlier.
+Furthermore, partial differential operations such as gradient and Laplacian can be applied to this physical quantity, and the gradient is
+ \nabla \phi(\overrightarrow{x}) = \sum_{j \in N}m_j\frac{\phi_j}{\rho_j} \nabla W(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+Laplacian
+ \nabla^2 \phi(\overrightarrow{x}) = \sum_{j \in N}m_j\frac{\phi_j}{\rho_j} \nabla^2 W(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+Can be expressed as. As you can see from the equation, the physical quantity gradient and the Laplacian are images that apply only to the weighting function. The weight function W is different depending on the physical quantity to be obtained, but the explanation of the reason is omitted * 8 .
+[* 8] It is explained in detail in "Basics of Physical Simulation for CG-Makoto Fujisawa".
The density of fluid particles is determined by using the formula of the physical quantity discretized by the weighting function.
+ \rho(\overrightarrow{x}) = \sum_{j \in N}m_jW_{poly6}(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+Is given. Here, the weight function W to be used is given below.
+
++Figure 5.4: Poly6 weight function +
+Discretizing the viscosity term also uses the weighting function as in the case of density.
+ f_{i}^{visc} = \mu\nabla^2\overrightarrow{u}_i = \mu \sum_{j \in N}m_j\frac{\overrightarrow{u}_j - \overrightarrow{u}_i}{\rho_j} \nabla^2 W_{visc}(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+Is expressed as. Where the weighting function Laplacian \ nabla ^ 2 W_ {visc} is given below.
+
++Figure 5.5: Laplacian of Viscosity weighting function +
+Similarly, the pressure term is discretized.
+ f_{i}^{press} = - \frac{1}{\rho_i} \nabla p_i = - \frac{1}{\rho_i} \sum_{j \in N}m_j\frac{p_j - p_i}{2\rho_j} \nabla W_{spiky}(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+Where the gradient of the weighting function W_ {spiky} is given below.
+
++Figure 5.6: Gradient of Spiky weight function +
+At this time, the pressure of the particles is called the Tait equation in advance.
+ p = B\left\{\left(\frac{\rho}{\rho_0}\right)^\gamma - 1\right\}
+
+It is calculated by. Where B is the gas constant. In order to guarantee incompressibility, Poisson's equation must be solved, but it is not suitable for real-time calculation. Instead, it is said that the SPH method * 9 is not as good at calculating the pressure term as the lattice method in terms of ensuring incompressibility approximately.
+[* 9] The SPH method, which calculates pressure using the Tait equation, is specially called the WCSPH method.
Samples can be found under Assets / SPHFluid in this repository ( https://github.com/IndieVisualLab/UnityGraphicsProgramming ). Please note that in this implementation, speedup and numerical stability are not considered in order to explain the SPH method as simply as possible.
+ +A description of the various parameters used in the simulation can be found in the comments in the code.
+Listing 5.1: Parameters used for simulation (FluidBase.cs)
+1: NumParticleEnum particleNum = NumParticleEnum.NUM_8K; // Number of particles + 2: float smoothlen = 0.012f; // Particle radius + 3: float pressureStiffness = 200.0f; // Pressure term coefficient + 4: float restDensity = 1000.0f; // rest density + 5: float particleMass = 0.0002f; // particle mass + 6: float viscosity = 0.1f; // Viscosity coefficient + 7: float maxAllowableTimestep = 0.005f; // Time step width + 8: float wallStiffness = 3000.0f; // Penalty wall power + 9: int iterations = 4; // number of iterations +10: Vector2 gravity = new Vector2(0.0f, -0.5f); // 重力 +11: Vector2 range = new Vector2 (1, 1); // Simulation space +12: bool simulate = true; // Run or pause +13: +14: int numParticles; // Number of particles +15: float timeStep; // Time step width +16: float densityCoef; // Poly6 kernel density coefficient +17: float gradPressureCoef; // Pressure coefficient of Spiky kernel +18: float lapViscosityCoef; // Laplacian kernel viscosity coefficient ++
Please note that in this demoscene, the inspector sets a value that is different from the parameter initialization value described in the code.
+ +Since the coefficient of the weight function does not change during simulation, it is calculated on the CPU side at the time of initialization. (However, it is updated in the Update function in consideration of the possibility of editing the parameter during execution)
+Since the mass of each particle is constant this time, the mass m in the formula of the physical quantity goes out of the sigma and becomes as follows.
+ \phi(\overrightarrow{x}) = m \sum_{j \in N}\frac{\phi_j}{\rho_j}W(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+Therefore, the mass can be included in the coefficient calculation.
+Since the coefficient changes depending on the type of weight function, calculate the coefficient for each.
+Listing 5.2: Pre-calculation of weight function coefficients (FluidBase.cs)
+1: densityCoef = particleMass * 4f / (Mathf.PI * Mathf.Pow(smoothlen, 8)); + 2: gradPressureCoef + 3: = particleMass * -30.0f / (Mathf.PI * Mathf.Pow(smoothlen, 5)); + 4: lapViscosityCoef + 5: = particleMass * 20f / (3 * Mathf.PI * Mathf.Pow(smoothlen, 5)); ++
Finally, the coefficients (and various parameters) calculated on the CPU side are stored in the constant buffer on the GPU side.
+Listing 5.3: Transferring a Value to the Compute Shader's Constant Buffer (FluidBase.cs)
+ 1: fluidCS.SetInt("_NumParticles", numParticles);
+ 2: fluidCS.SetFloat("_TimeStep", timeStep);
+ 3: fluidCS.SetFloat("_Smoothlen", smoothlen);
+ 4: fluidCS.SetFloat("_PressureStiffness", pressureStiffness);
+ 5: fluidCS.SetFloat("_RestDensity", restDensity);
+ 6: fluidCS.SetFloat("_Viscosity", viscosity);
+ 7: fluidCS.SetFloat("_DensityCoef", densityCoef);
+ 8: fluidCS.SetFloat("_GradPressureCoef", gradPressureCoef);
+ 9: fluidCS.SetFloat("_LapViscosityCoef", lapViscosityCoef);
+10: fluidCS.SetFloat("_WallStiffness", wallStiffness);
+11: fluidCS.SetVector("_Range", range);
+12: fluidCS.SetVector("_Gravity", gravity);
+
+Listing 5.4: Compute Shader constant buffer (SPH2D.compute)
+1: int _NumParticles; // Number of particles + 2: float _TimeStep; // Time step width (dt) + 3: float _Smoothlen; // particle radius + 4: float _PressureStiffness; // Becker's coefficient + 5: float _RestDensity; // Rest density + 6: float _DensityCoef; // Coefficient when calculating density + 7: float _GradPressureCoef; // Coefficient when calculating pressure + 8: float _LapViscosityCoef; // Coefficient when calculating viscosity + 9: float _WallStiffness; // Pushing back force of the penalty method +10: float _Viscosity; // Viscosity coefficient +11: float2 _Gravity; // Gravity +12: float2 _Range; // Simulation space +13: +14: float3 _MousePos; // Mouse position +15: float _MouseRadius; // Radius of mouse interaction +16: bool _MouseDown; // Is the mouse pressed? ++
Listing 5.5: Kernel function for calculating density (SPH2D.compute)
+ 1: [numthreads(THREAD_SIZE_X, 1, 1)]
+ 2: void DensityCS(uint3 DTid : SV_DispatchThreadID) {
+ 3: uint P_ID = DTid.x; // Particle ID currently being processed
+ 4:
+ 5: float h_sq = _Smoothlen * _Smoothlen;
+ 6: float2 P_position = _ParticlesBufferRead[P_ID].position;
+ 7:
+ 8: // Proximity exploration (O(n^2))
+ 9: float density = 0;
+10: for (uint N_ID = 0; N_ID < _NumParticles; N_ID++) {
+11: if (N_ID == P_ID) continue; // Avoid referencing itself
+12:
+13: float2 N_position = _ParticlesBufferRead[N_ID].position;
+14:
+15: float2 diff = N_position-P_position; // particle distance
+16: float r_sq = dot (diff, diff); // Particle distance squared
+17:
+18: // Exclude particles that do not fit within the radius
+19: if (r_sq < h_sq) {
+20: // No need to take a route as the calculation only includes the square
+21: density += CalculateDensity(r_sq);
+22: }
+23: }
+24:
+25: // Update density buffer
+26: _ParticlesDensityBufferWrite[P_ID].density = density;
+27: }
+
+Normally, it is necessary to search for nearby particles using an appropriate neighborhood search algorithm without searching for all particles, but in this implementation, 100% survey is performed for simplicity (for loop on line 10). .. Also, since the distance between you and the other particle is calculated, you avoid calculating between your own particles in the 11th line.
+The case classification by the effective radius h of the weight function is realized by the if statement on the 19th line. Density addition (sigma calculation) is realized by adding the calculation result inside sigma to the variable initialized with 0 in the 9th line. Here is the formula for calculating the density again.
+ \rho(\overrightarrow{x}) = \sum_{j \in N}m_jW_{poly6}(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+The density is calculated using the Poly6 weighting function as shown in the above equation. The Poly6 weighting function is calculated in Listing 5.6 .
+Listing 5.6: Density Calculation (SPH2D.compute)
+ 1: inline float CalculateDensity(float r_sq) {
+ 2: const float h_sq = _Smoothlen * _Smoothlen;
+ 3: return _DensityCoef * (h_sq - r_sq) * (h_sq - r_sq) * (h_sq - r_sq);
+ 4: }
+
+Finally, line 25 of Listing 5.5 writes to the write buffer.
+ +Listing 5.7: Weighting function (SPH2D.compute) to calculate pressure per particle
+ 1: [numthreads(THREAD_SIZE_X, 1, 1)]
+ 2: void PressureCS(uint3 DTid : SV_DispatchThreadID) {
+ 3: uint P_ID = DTid.x; // Particle ID currently being processed
+ 4:
+ 5: float P_density = _ParticlesDensityBufferRead[P_ID].density;
+ 6: float P_pressure = CalculatePressure(P_density);
+ 7:
+ 8: // Update pressure buffer
+ 9: _ParticlesPressureBufferWrite[P_ID].pressure = P_pressure;
+10: }
+
+Before solving the pressure term, calculate the pressure for each particle to reduce the calculation cost of the pressure term later. As I mentioned earlier, pressure calculation originally requires solving an equation called Poisson's equation, such as the following equation.
+ \nabla^2 p = \rho \frac{\nabla \overrightarrow{u}}{\Delta t}
+
+However, the operation of solving Poisson's equation accurately with a computer is very expensive, so it is calculated approximately using the Tait equation below.
+ p = B\left\{\left(\frac{\rho}{\rho_0}\right)^\gamma - 1\right\}
+
+Listing 5.8: Implementation of the Tait equation (SPH2D.compute)
+ 1: inline float CalculatePressure(float density) {
+ 2: return _PressureStiffness * max(pow(density / _RestDensity, 7) - 1, 0);
+ 3: }
+
+Listing 5.9: Kernel function to calculate pressure and viscosity terms (SPH2D.compute)
+ 1: [numthreads(THREAD_SIZE_X, 1, 1)]
+ 2: void ForceCS(uint3 DTid : SV_DispatchThreadID) {
+ 3: uint P_ID = DTid.x; // Particle ID currently being processed
+ 4:
+ 5: float2 P_position = _ParticlesBufferRead[P_ID].position;
+ 6: float2 P_velocity = _ParticlesBufferRead[P_ID].velocity;
+ 7: float P_density = _ParticlesDensityBufferRead[P_ID].density;
+ 8: float P_pressure = _ParticlesPressureBufferRead[P_ID].pressure;
+ 9:
+10: const float h_sq = _Smoothlen * _Smoothlen;
+11:
+12: // Proximity exploration (O(n^2))
+13: float2 press = float2 (0, 0);
+14: float2 visco = float2(0, 0);
+15: for (uint N_ID = 0; N_ID < _NumParticles; N_ID++) {
+16: if (N_ID == P_ID) continue; // Skip if targeting itself
+17:
+18: float2 N_position = _ParticlesBufferRead[N_ID].position;
+19:
+20: float2 diff = N_position - P_position;
+21: float r_sq = dot(diff, diff);
+22:
+23: // Exclude particles that do not fit within the radius
+24: if (r_sq < h_sq) {
+25: float N_density
+26: = _ParticlesDensityBufferRead[N_ID].density;
+27: float N_pressure
+28: = _ParticlesPressureBufferRead[N_ID].pressure;
+29: float2 N_velocity
+30: = _ParticlesBufferRead[N_ID].velocity;
+31: float r = sqrt(r_sq);
+32:
+33: // Pressure item
+34: press += CalculateGradPressure(...);
+35:
+36: // Sticky items
+37: visco += CalculateLapVelocity(...);
+38: }
+39: }
+40:
+41: // Integration
+42: float2 force = press + _Viscosity * visco;
+43:
+44: // Acceleration buffer update
+45: _ParticlesForceBufferWrite[P_ID].acceleration = force / P_density;
+46: }
+
+The pressure term and viscosity term are calculated in the same way as the density calculation method.
+First, the force is calculated by the following pressure term on the 31st line.
+ f_{i}^{press} = - \frac{1}{\rho_i} \nabla p_i = - \frac{1}{\rho_i} \sum_{j \in N}m_j\frac{p_j - p_i}{2\rho_j} \nabla W_{press}(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+The calculation of the contents of Sigma is performed by the following function.
+Listing 5.10: Calculation of Pressure Term Elements (SPH2D.compute)
+ 1: inline float2 CalculateGradPressure(...) {
+ 2: const float h = _Smoothlen;
+ 3: float avg_pressure = 0.5f * (N_pressure + P_pressure);
+ 4: return _GradPressureCoef * avg_pressure / N_density
+ 5: * pow(h - r, 2) / r * (diff);
+ 6: }
+
+Next, the force is calculated by the following viscosity term on the 34th line.
+ f_{i}^{visc} = \mu\nabla^2\overrightarrow{u}_i = \mu \sum_{j \in N}m_j\frac{\overrightarrow{u}_j - \overrightarrow{u}_i}{\rho_j} \nabla^2 W_{visc}(\overrightarrow{x_j} - \overrightarrow{x}, h)
+
+The calculation of the contents of Sigma is performed by the following function.
+Listing 5.11: Calculation of Viscosity Term Elements (SPH2D.compute)
+ 1: inline float2 CalculateLapVelocity(...) {
+ 2: const float h = _Smoothlen;
+ 3: float2 vel_diff = (N_velocity - P_velocity);
+ 4: return _LapViscosityCoef / N_density * (h - r) * vel_diff;
+ 5: }
+
+Finally, on line 39 of Listing 5.9 , the forces calculated by the pressure and viscosity terms are added and written to the buffer as the final output.
+ +Listing 5.12: Kernel function for collision detection and position update (SPH2D.compute)
+ 1: [numthreads(THREAD_SIZE_X, 1, 1)]
+ 2: void IntegrateCS(uint3 DTid : SV_DispatchThreadID) {
+ 3: const unsigned int P_ID = DTid.x; // Particle ID currently being processed
+ 4:
+ 5: // Position and speed before update
+ 6: float2 position = _ParticlesBufferRead[P_ID].position;
+ 7: float2 velocity = _ParticlesBufferRead[P_ID].velocity;
+ 8: float2 acceleration = _ParticlesForceBufferRead[P_ID].acceleration;
+ 9:
+10: // Mouse interaction
+11: if (distance(position, _MousePos.xy) < _MouseRadius && _MouseDown) {
+12: float2 dir = position - _MousePos.xy;
+13: float pushBack = _MouseRadius-length(dir);
+14: acceleration += 100 * pushBack * normalize(dir);
+15: }
+16:
+17: // Here to write collision detection -----
+18:
+19: // Wall boundary (penalty method)
+20: float dist = dot(float3(position, 1), float3(1, 0, 0));
+21: acceleration += min(dist, 0) * -_WallStiffness * float2(1, 0);
+22:
+23: dist = dot(float3(position, 1), float3(0, 1, 0));
+24: acceleration += min(dist, 0) * -_WallStiffness * float2(0, 1);
+25:
+26: dist = dot(float3(position, 1), float3(-1, 0, _Range.x));
+27: acceleration += min(dist, 0) * -_WallStiffness * float2(-1, 0);
+28:
+29: dist = dot(float3(position, 1), float3(0, -1, _Range.y));
+30: acceleration += min(dist, 0) * -_WallStiffness * float2(0, -1);
+31:
+32: // Addition of gravity
+33: acceleration += _Gravity;
+34:
+35: // Update the next particle position with the forward Euler method
+36: velocity += _TimeStep * acceleration;
+37: position += _TimeStep * velocity;
+38:
+39: // Particle buffer update
+40: _ParticlesBufferWrite[P_ID].position = position;
+41: _ParticlesBufferWrite[P_ID].velocity = velocity;
+42: }
+
+Collision detection with a wall is performed using the penalty method (lines 19-30). The penalty method is a method of pushing back with a strong force as much as it protrudes from the boundary position.
+Originally, collision detection with obstacles is also performed before collision detection with a wall, but in this implementation, interaction with the mouse is performed (lines 213-218). If the mouse is pressed, the specified force is applied to move it away from the mouse position.
+Gravity, which is an external force, is added on the 33rd line. Setting the gravity value to zero results in weightlessness and interesting visual effects. Also, update the position using the forward Euler method described above (lines 36-37) and write the final result to the buffer.
+ +Listing 5.13: Simulation key functions (FluidBase.cs)
+ 1: private void RunFluidSolver() {
+ 2:
+ 3: int kernelID = -1;
+ 4: int threadGroupsX = numParticles / THREAD_SIZE_X;
+ 5:
+ 6: // Density
+ 7: kernelID = fluidCS.FindKernel("DensityCS");
+ 8: fluidCS.SetBuffer(kernelID, "_ParticlesBufferRead", ...);
+ 9: fluidCS.SetBuffer(kernelID, "_ParticlesDensityBufferWrite", ...);
+10: fluidCS.Dispatch(kernelID, threadGroupsX, 1, 1);
+11:
+12: // Pressure
+13: kernelID = fluidCS.FindKernel("PressureCS");
+14: fluidCS.SetBuffer(kernelID, "_ParticlesDensityBufferRead", ...);
+15: fluidCS.SetBuffer(kernelID, "_ParticlesPressureBufferWrite", ...);
+16: fluidCS.Dispatch(kernelID, threadGroupsX, 1, 1);
+17:
+18: // Force
+19: kernelID = fluidCS.FindKernel("ForceCS");
+20: fluidCS.SetBuffer(kernelID, "_ParticlesBufferRead", ...);
+21: fluidCS.SetBuffer(kernelID, "_ParticlesDensityBufferRead", ...);
+22: fluidCS.SetBuffer(kernelID, "_ParticlesPressureBufferRead", ...);
+23: fluidCS.SetBuffer(kernelID, "_ParticlesForceBufferWrite", ...);
+24: fluidCS.Dispatch(kernelID, threadGroupsX, 1, 1);
+25:
+26: // Integrate
+27: kernelID = fluidCS.FindKernel("IntegrateCS");
+28: fluidCS.SetBuffer(kernelID, "_ParticlesBufferRead", ...);
+29: fluidCS.SetBuffer(kernelID, "_ParticlesForceBufferRead", ...);
+30: fluidCS.SetBuffer(kernelID, "_ParticlesBufferWrite", ...);
+31: fluidCS.Dispatch(kernelID, threadGroupsX, 1, 1);
+32:
+33: SwapComputeBuffer(ref particlesBufferRead, ref particlesBufferWrite);
+34: }
+
+This is the part that calls the Compute Shader kernel function described so far every frame. Give the appropriate ComputeBuffer for each kernel function.
+Now, remember that the smaller the time step width \ Delta t , the less error the simulation will have. When running at 60FPS, \ Delta t = 1/60 , but this causes a large error and the particles explode. Furthermore, if the time step width is smaller than \ Delta t = 1/60, the time per frame will advance slower than the real time, resulting in slow motion. To avoid this, set \ Delta t = 1 / (60 \ times {iterarion}) and run the main routine iterarion times per frame.
+Listing 5.14: Major Function Iteration (FluidBase.cs)
+ 1: // Reduce the time step width and iterate multiple times to improve the calculation accuracy.
+ 2: for (int i = 0; i<iterations; i++) {
+ 3: RunFluidSolver();
+ 4: }
+
+This allows you to perform real-time simulations with a small time step width.
+ +Unlike a normal single-access particle system, particles interact with each other, so it is a problem if other data is rewritten during the calculation. To avoid this, prepare two buffers, a read buffer and a write buffer, which do not rewrite the value when performing calculations on the GPU. By swapping these buffers every frame, you can update the data without conflict.
+Listing 5.15: Buffer Swapping Function (FluidBase.cs)
+ 1: void SwapComputeBuffer(ref ComputeBuffer ping, ref ComputeBuffer pong) {
+ 2: ComputeBuffer temp = ping;
+ 3: ping = pong;
+ 4: pong = temp;
+ 5: }
+
+Listing 5.16: Rendering Particles (FluidRenderer.cs)
+ 1: void DrawParticle() {
+ 2:
+ 3: Material m = RenderParticleMat;
+ 4:
+ 5: var inverseViewMatrix = Camera.main.worldToCameraMatrix.inverse;
+ 6:
+ 7: m.SetPass(0);
+ 8: m.SetMatrix("_InverseMatrix", inverseViewMatrix);
+ 9: m.SetColor("_WaterColor", WaterColor);
+10: m.SetBuffer("_ParticlesBuffer", solver.ParticlesBufferRead);
+11: Graphics.DrawProcedural(MeshTopology.Points, solver.NumParticles);
+12: }
+
+On the 10th line, set the buffer containing the position calculation result of the fluid particle in the material and transfer it to the shader. On the 11th line, we are instructing to draw instances for the number of particles.
+Listing 5.17: Particle Rendering (Particle.shader)
+ 1: struct FluidParticle {
+ 2: float2 position;
+ 3: float2 velocity;
+ 4: };
+ 5:
+ 6: StructuredBuffer<FluidParticle> _ParticlesBuffer;
+ 7:
+ 8: // --------------------------------------------------------------------
+ 9: // Vertex Shader
+10: // --------------------------------------------------------------------
+11: v2g vert(uint id : SV_VertexID) {
+12:
+13: v2g or = (v2g) 0;
+14: o.pos = float3(_ParticlesBuffer[id].position.xy, 0);
+15: o.color = float4 (0, 0.1, 0.1, 1);
+16: return o;
+17: }
+
+Lines 1-6 define the information for receiving fluid particle information. At this time, it is necessary to match the definition with the structure of the buffer transferred from the script to the material. The position data is received by referring to the buffer element with id: SV_VertexID as shown in the 14th line.
+After that, as with a normal particle system , create a billboard * 10 centered on the position data of the calculation result with the geometry shader as shown in Fig. 5.7 , and attach and render the particle image.
+
++Figure 5.7: Creating a billboard +
+[* 10] A Plane whose table always faces the viewpoint.
++Figure 5.8: Rendering result +
+The video is posted here ( https://youtu.be/KJVu26zeK2w ).
+ +In this chapter, the method of fluid simulation using the SPH method is shown. By using the SPH method, it has become possible to handle the movement of fluid as a general purpose like a particle system.
+As mentioned earlier, there are many types of fluid simulation methods other than the SPH method. Through this chapter, we hope that you will be interested in other physics simulations themselves in addition to other fluid simulation methods, and expand the range of expressions.
\ No newline at end of file diff --git a/html-translated/vol1/Chapter 5 _ Fluid Simulation by SPH Method_files/2dkernel.png b/html-translated/vol1/Chapter 5 _ Fluid Simulation by SPH Method_files/2dkernel.png new file mode 100644 index 0000000..fdf4611 Binary files /dev/null and b/html-translated/vol1/Chapter 5 _ Fluid Simulation by SPH Method_files/2dkernel.png differ diff --git a/html-translated/vol1/Chapter 5 _ Fluid Simulation by SPH Method_files/bill.jpg b/html-translated/vol1/Chapter 5 _ Fluid Simulation by SPH Method_files/bill.jpg new file mode 100644 index 0000000..a6ae857 Binary files /dev/null and b/html-translated/vol1/Chapter 5 _ Fluid Simulation by SPH Method_files/bill.jpg differ diff --git a/html-translated/vol1/Chapter 5 _ Fluid Simulation by SPH Method_files/blob.png b/html-translated/vol1/Chapter 5 _ Fluid Simulation by SPH Method_files/blob.png new file mode 100644 index 0000000..6f4b9b3 Binary files /dev/null and b/html-translated/vol1/Chapter 5 _ Fluid Simulation by SPH Method_files/blob.png differ diff --git a/html-translated/vol1/Chapter 5 _ Fluid Simulation by SPH Method_files/cleardot.gif b/html-translated/vol1/Chapter 5 _ Fluid Simulation by SPH Method_files/cleardot.gif new file mode 100644 index 0000000..1d11fa9 Binary files /dev/null and b/html-translated/vol1/Chapter 5 _ Fluid Simulation by SPH Method_files/cleardot.gif differ diff --git a/html-translated/vol1/Chapter 5 _ Fluid Simulation by SPH Method_files/element_main.js b/html-translated/vol1/Chapter 5 _ Fluid Simulation by SPH Method_files/element_main.js new file mode 100644 index 0000000..4c5de3c --- /dev/null +++ b/html-translated/vol1/Chapter 5 _ Fluid Simulation by SPH Method_files/element_main.js @@ -0,0 +1,486 @@ +(function(){/* + + Copyright The Closure Library Authors. + SPDX-License-Identifier: Apache-2.0 +*/ +var aa='" style="background-image:url(',ba="-disabled",ca="-document.getElementById('",da="/translate_a/t",ea="/translate_suggestion?client=",fa='
|
![]() |
|
This chapter mainly describes Geometry Shader, which is one of the stages of the rendering pipeline, and explains the dynamic grass-generating shader (commonly known as Grass Shader) using Geometry Shader.
+I've used some technical terms to describe the Geometry Shader, but if you're just trying to use the Geometry Shader, it's a good idea to take a look at the sample code.
+The Unity project in this chapter has been uploaded to the following Github repository.
+https://github.com/IndieVisualLab/UnityGraphicsProgramming/
+ +Geometry Shader is one of the programmable shaders that can dynamically convert, generate, and delete primitives (basic shapes that make up a mesh) on the GPU.
+Until now, if you try to change the mesh shape dynamically, such as by converting primitives, you need to take measures such as processing on the CPU or giving meta information to the vertices in advance and converting with Vertex Shader. did. However, Vertex Shader cannot acquire information about adjacent vertices, and there are strong restrictions such as not being able to create new vertices based on the vertices being processed and vice versa. .. However, processing with a CPU would take an unrealistically huge amount of time from the perspective of real-time processing. As you can see, there have been some problems with changing the shape of the mesh in real time.
+Therefore, Geometry Shader is installed as standard in DirectX 10 and OpenGL 3.2 as a function to solve these problems and enable free conversion processing within weak constraints. In OpenGL, it is also called Primitive Shader.
+ +It is located on the rendering pipeline after Vertex Shader and before Fragment Shader and rasterization. In other words, within the Fragment Shader, the vertices dynamically generated by the Geometry Shader and the original vertices passed to the Vertex Shader are processed without distinction.
+ +Normally, the input information to Vertex Shader is in units of vertices, and conversion processing is performed for those vertices. However, the input information to the Geometry Shader is a user-defined input primitive unit.
+The actual program will be described later, but the vertex information group processed by Vertex Shader will be divided and input based on the input primitive type. For example, if the input primitive type is triangle, three vertex information will be passed, if line, two vertex information will be passed, and if point, one vertex information will be passed. This makes it possible to perform processing while referring to other vertex information, which was not possible with vertex shader, and enables a wide range of calculations.
+One thing to note is that Vertex Shader processes on a vertex-by-vertex basis and passes information about the vertices it processes, but Geometry Shader is a primitive assembly topology regardless of the input primitive type. Processing is performed in units of primitives determined by. In other words, if you run the Geometry Shader on a Quad mesh with a topology of Triangles, as shown in Figure 6.1, the Geometry Shader will be run twice for triangles ① and ②. At this time, when the primitive type for input is Line, the information passed to the input is the vertices of two vertices 0,1,2 in the case of triangle ①, and the vertices 0,2,3 in the case of ②. It will be the apex of the two points.
+
++Figure 6.1: Quad mesh +
+The output of Geometry Shader is a set of vertex information for user-defined output primitive types. In Vertex Shader, it was 1 input and 1 output, but Geometry Shader will output multiple information, and there is no problem even if there is one or more primitives generated by the output information.
+For example, if the output primitive type is defined as triangle and a total of 9 vertices newly calculated are output, 3 triangles are generated by Geometry Shader. Since this process is performed in primitive units as described above, it is possible that the number of triangles that were originally one has increased to three.
+In addition, it is necessary to set in advance the maximum number of vertices to be output in one process called MaxVertexCount in Geometry Shader. For example, if MaxVertexCount is set to 9, Geometry Shader will be able to output the number of vertices from 0 to 9 points. Due to the "Geometry Shader Limits" described later, 1024 is generally the maximum value for this value.
+In addition, as a point to be careful when outputting vertex information, when adding a new vertex while maintaining the original mesh shape, the vertex information sent from Vertex Shader is also sent to Geometry Shader. Must be output. The Geometry Shader does not have the behavior of adding to the output of the Vertex Shader, but the output of the Geometry Shader is rasterized and passed to the Fragment Shader. Paradoxically, you can also dynamically reduce the number of vertices by setting the output of the Geometry Shader to 0.
+ +The Geometry Shader has a maximum number of output vertices and a maximum number of output elements for one output. The maximum number of output vertices is literally the limit value of the number of vertices, and although it depends on the GPU, 1024 is common, so you can increase the number of vertices from one triangle to a maximum of 1024 points. The elements in the maximum number of output elements are the information that the vertices have, such as coordinates and colors. Generally, the position elements of (x, y, z, w) and (r, g, b, a) There are a total of 8 color elements. The maximum number of outputs of this element also depends on the GPU, but since 1024 is also common, the output will be limited to 128 (1024/8) at the maximum.
+Since both of these restrictions must be met, even if the number of vertices can be output at 1024 points, the actual output of the Geometry Shader is limited to 128 points due to restrictions on the number of elements. So, for example, if you use Geometry Shader for a mesh with 2 primitives (Quad mesh, etc.), you can handle only up to 256 vertices (128 points * 2 primitives). ..
+This number of 128 points is the limit value of the value that can be set in MaxVertexCount in the previous section.
+ +Below is a Geometry Shader program with simple behavior. I will explain the explanation up to the previous section again by comparing it with the actual program.
+In addition to Geometry Shader, the explanation about ShaderLab syntax etc. required when writing shaders in Unity is omitted in this chapter, so if you have any questions, please refer to the official document below.
+https://docs.unity3d.com/ja/current/Manual/SL-Reference.html
+Shader "Custom/SimpleGeometryShader"
+{
+ Properties
+ {
+ _Height("Height", float) = 5.0
+ _TopColor("Top Color", Color) = (0.0, 0.0, 1.0, 1.0)
+ _BottomColor("Bottom Color", Color) = (1.0, 0.0, 0.0, 1.0)
+ }
+ SubShader
+ {
+ Tags { "RenderType" = "Opaque"}
+ LOD 100
+
+ Cull Off
+ Lighting Off
+
+ Pass
+ {
+ CGPROGRAM
+ #pragma target 5.0
+ #pragma vertex vert
+ #pragma geometry geom
+ #pragma fragment frag
+ #include "UnityCG.cginc"
+
+ uniform float _Height;
+ uniform float4 _TopColor, _BottomColor;
+
+ struct v2g
+ {
+ float4 pos : SV_POSITION;
+ };
+
+ struct g2f
+ {
+ float4 pos : SV_POSITION;
+ float4 col : COLOR;
+ };
+
+ v2g vert(appdata_full v)
+ {
+ v2g o;
+ o.pos = v.vertex;
+
+ return o;
+ }
+
+ [maxvertexcount(12)]
+ void geom(triangle v2g input[3],
+ inout TriangleStream<g2f> outStream)
+ {
+ float4 p0 = input[0].pos;
+ float4 p1 = input[1].pos;
+ float4 p2 = input[2].pos;
+
+ float4 c = float4(0.0f, 0.0f, -_Height, 1.0f)
+ + (p0 + p1 + p2) * 0.33333f;
+
+ g2f out0;
+ out0.pos = UnityObjectToClipPos(p0);
+ out0.col = _BottomColor;
+
+ g2f out1;
+ out1.pos = UnityObjectToClipPos(p1);
+ out1.col = _BottomColor;
+
+ g2f out2;
+ out2.pos = UnityObjectToClipPos(p2);
+ out2.col = _BottomColor;
+
+ g2f o;
+ o.pos = UnityObjectToClipPos (c);
+ o.col = _TopColor;
+
+ // bottom
+ outStream.Append(out0);
+ outStream.Append(out1);
+ outStream.Append(out2);
+ outStream.RestartStrip();
+
+ // sides
+ outStream.Append(out0);
+ outStream.Append(out1);
+ outStream.Append(o);
+ outStream.RestartStrip();
+
+ outStream.Append(out1);
+ outStream.Append(out2);
+ outStream.Append(o);
+ outStream.RestartStrip();
+
+ outStream.Append(out2);
+ outStream.Append(out0);
+ outStream.Append(o);
+ outStream.RestartStrip();
+ }
+
+ float4 frag(g2f i) : COLOR
+ {
+ return i.col;
+ }
+ ENDCG
+ }
+ }
+}
+
+In this shader, the center coordinates of the passed triangle are calculated and moved further upward, and each vertex of the passed triangle is connected to the calculated new coordinates. In other words, we are generating a simple triangular pyramid from a flat triangle.
+So if you apply this shader to a Quad mesh (consisting of two triangles), it will look like Figures 6.2 through 6.3.
+
++Figure 6.2: From a flat plate like this +
+
++Figure 6.3: Two three-dimensional triangular pyramids are now displayed +
+In this shader, I will extract and explain only the part related to Geometry Shader in particular.
+#pragma target 5.0 +#pragma vertex vert + +// Declare the use of Geometry Shader +#pragma geometry geom + +#pragma fragment frag +#include "UnityCG.cginc" ++
In the above declaration part, geomwe declare that the function named is a function for Geometry Shader. This geomwill cause the function to be called when the Geometry Shader stage is reached .
[maxvertexcount(12)] +void geom(triangle v2g input[3], inout TriangleStream<g2f> outStream) ++
Here is the function declaration for the Geometry Shader.
+ +triangle v2f input[3] ++
This is the part related to input.
+This time, I want to generate a triangular pyramid based on the triangle, so I input triangleit. As a result, the information of each vertex of the triangle, which is the unit primitive, is input, and since the triangle is composed of three vertices, the received formal argument is an array of length 3. So, if the input triangleis not input , pointonly one vertex will be composed, so geom(point v2f input[1])it will be received as an array of length 1 like.
inout TriangleStream<g2f> outStream ++
This is the part related to output.
+Since we want to make the primitive of the mesh generated this time a triangle, TriangleStreamwe declare it with a type. TriangleStremaSince the type means that the output is a triangle strip, it will generate a triangle based on each output vertex information. There are other PointStreamtypes and LineStreamtypes, so you need to select the output primitive type according to your purpose.
In addition, [maxvertexcount(12)]the maximum number of outputs is set to 12 in the part. This is because the number of triangles that make up the triangular pyramid is one at the base and three at the side, for a total of four, and three vertices are required for each triangle, so 12 vertices are output with 3 * 4. It is set to 12 because it will be different.
g2f out0; +out0.pos = UnityObjectToClipPos(p0); +out0.col = _BottomColor; + +g2f out1; +out1.pos = UnityObjectToClipPos(p1); +out1.col = _BottomColor; + +g2f out2; +out2.pos = UnityObjectToClipPos(p2); +out2.col = _BottomColor; + +g2f o; +o.pos = UnityObjectToClipPos (c); +o.col = _TopColor; + +// bottom +outStream.Append(out0); +outStream.Append(out1); +outStream.Append(out2); +outStream.RestartStrip(); + +// sides +outStream.Append(out0); +outStream.Append(out1); +outStream.Append(o); +outStream.RestartStrip(); + +outStream.Append(out1); +outStream.Append(out2); +outStream.Append(o); +outStream.RestartStrip(); + +outStream.Append(out2); +outStream.Append(out0); +outStream.Append(o); +outStream.RestartStrip(); ++
This is the part of the process that outputs the actual vertices.
+First of all, a g2f type variable for output is declared, and vertex coordinates and color information are stored. At this time, it is necessary to convert from the object space to the clip space of the camera in the same way as Vertex Shader.
+After that, the vertex information is output while being aware of the order of the vertices that make up the mesh. outStreamOf the variable Appendwill be added to the current stream by passing the output variable to the function, RestartStripto end the current primitive strip by calling the function, you have to start a new stream.
Since this is a TriangleStreamtriangle strip, the more Appendvertices you add in the function, the more connected triangles will be generated based on all the vertices added to the stream. So, Appendif RestartStripyou don't want to be connected based on the order in which the triangles are connected like this time, you need to call once to start a new stream. Of course, it is possible to reduce the number Appendof RestartStripfunction calls by devising the order .
In this section, we will explain Grass Shader, which is a little development from the previous section "Simple Geometry Shader", and uses Geometry Shader to generate grass in real time.
+The following is the Grass Shader program described.
+Shader "Custom/Grass" {
+ Properties
+ {
+ // Grass height
+ _Height("Height", float) = 80
+ // Grass width
+ _Width("Width", float) = 2.5
+
+ // The height of the bottom of the grass
+ _BottomHeight("Bottom Height", float) = 0.3
+ // Height of the middle part of the grass
+ _MiddleHeight("Middle Height", float) = 0.4
+ // Height of the top of the grass
+ _TopHeight("Top Height", float) = 0.5
+
+ // The width of the bottom of the grass
+ _BottomWidth("Bottom Width", float) = 0.5
+ // Width of the middle part of the grass
+ _MiddleWidth("Middle Width", float) = 0.4
+ // The width of the top of the grass
+ _TopWidth("Top Width", float) = 0.2
+
+ // How the bottom of the grass bends
+ _BottomBend("Bottom Bend", float) = 1.0
+ // How the middle part of the grass bends
+ _MiddleBend("Middle Bend", float) = 1.0
+ // How the top of the grass bends
+ _TopBend("Top Bend", float) = 2.0
+
+ // Wind strength
+ _WindPower("Wind Power", float) = 1.0
+
+ // The color of the top of the grass
+ _TopColor("Top Color", Color) = (1.0, 1.0, 1.0, 1.0)
+ // The color of the bottom of the grass
+ _BottomColor("Bottom Color", Color) = (0.0, 0.0, 0.0, 1.0)
+
+ // Noise texture that gives randomness to grass height
+ _HeightMap("Height Map", 2D) = "white"
+ // Noise texture that gives randomness to the orientation of the grass
+ _RotationMap("Rotation Map", 2D) = "black"
+ // Noise texture that gives randomness to wind strength
+ _WindMap("Wind Map", 2D) = "black"
+ }
+ SubShader
+ {
+ Tags{ "RenderType" = "Opaque" }
+
+ LOD 100
+ Cull Off
+
+ Pass
+ {
+ CGPROGRAM
+ #pragma target 5.0
+ #include "UnityCG.cginc"
+
+ #pragma vertex vert
+ #pragma geometry geom
+ #pragma fragment frag
+
+ float _Height, _Width;
+ float _BottomHeight, _MiddleHeight, _TopHeight;
+ float _BottomWidth, _MiddleWidth, _TopWidth;
+ float _BottomBend, _MiddleBend, _TopBend;
+
+ float _WindPower;
+ float4 _TopColor, _BottomColor;
+ sampler2D _HeightMap, _RotationMap, _WindMap;
+
+ struct v2g
+ {
+ float4 pos : SV_POSITION;
+ float3 nor : NORMAL;
+ float4 hey: TEXCOORD0;
+ float4 rot : TEXCOORD1;
+ float4 wind : TEXCOORD2;
+ };
+
+ struct g2f
+ {
+ float4 pos : SV_POSITION;
+ float4 color : COLOR;
+ };
+
+ v2g vert(appdata_full v)
+ {
+ v2g o;
+ float4 uv = float4(v.texcoord.xy, 0.0f, 0.0f);
+
+ o.pos = v.vertex;
+ o.nor = v.normal;
+ o.hei = tex2Dlod(_HeightMap, uv);
+ o.rot = tex2Dlod(_RotationMap, uv);
+ o.wind = tex2Dlod(_WindMap, uv);
+
+ return o;
+ }
+
+ [maxvertexcount(7)]
+ void geom(triangle v2g i[3], inout TriangleStream<g2f> stream)
+ {
+ float4 p0 = i[0].pos;
+ float4 p1 = i[1].pos;
+ float4 p2 = i[2].pos;
+
+ float3 n0 = i[0].nor;
+ float3 n1 = i[1].nor;
+ float3 n2 = i[2].nor;
+
+ float height = (i [0] .hei.r + i [1] .hei.r + i [2] .hei.r) / 3.0f;
+ float rot = (i[0].rot.r + i[1].rot.r + i[2].rot.r) / 3.0f;
+ float wind = (i[0].wind.r + i[1].wind.r + i[2].wind.r) / 3.0f;
+
+ float4 center = ((p0 + p1 + p2) / 3.0f);
+ float4 normal = float4(((n0 + n1 + n2) / 3.0f).xyz, 1.0f);
+
+ float bottomHeight = height * _Height * _BottomHeight;
+ float middleHeight = height * _Height * _MiddleHeight;
+ float topHeight = height * _Height * _TopHeight;
+
+ float bottomWidth = _Width * _BottomWidth;
+ float middleWidth = _Width * _MiddleWidth;
+ float topWidth = _Width * _TopWidth;
+
+ rot = rot - 0.5f;
+ float4 dir = float4(normalize((p2 - p0) * rot).xyz, 1.0f);
+
+ g2f o[7];
+
+ // Bottom.
+ o[0].pos = center - dir * bottomWidth;
+ o[0].color = _BottomColor;
+
+ o[1].pos = center + dir * bottomWidth;
+ o[1].color = _BottomColor;
+
+ // Bottom to Middle.
+ o[2].pos = center - dir * middleWidth + normal * bottomHeight;
+ o[2].color = lerp(_BottomColor, _TopColor, 0.33333f);
+
+ o[3].pos = center + dir * middleWidth + normal * bottomHeight;
+ o[3].color = lerp(_BottomColor, _TopColor, 0.33333f);
+
+ // Middle to Top.
+ o[4].pos = o[3].pos - dir * topWidth + normal * middleHeight;
+ o[4].color = lerp(_BottomColor, _TopColor, 0.66666f);
+
+ o[5].pos = o[3].pos + dir * topWidth + normal * middleHeight;
+ o[5].color = lerp(_BottomColor, _TopColor, 0.66666f);
+
+ // Top.
+ o[6].pos = o[5].pos + dir * topWidth + normal * topHeight;
+ or [6] .color = _TopColor;
+
+ // Bend.
+ dir = float4 (1.0f, 0.0f, 0.0f, 1.0f);
+
+ o [2] .pos + =
+ * (_WindPower * wind * _BottomBend)
+ * sin(_Time);
+ o [3] .pos + =
+ * (_WindPower * wind * _BottomBend)
+ * sin(_Time);
+ o [4] .pos + =
+ * (_WindPower * wind * _MiddleBend)
+ * sin(_Time);
+ o [5] .pos + =
+ * (_WindPower * wind * _MiddleBend)
+ * sin(_Time);
+ o [6] .pos + =
+ * (_WindPower * wind * _TopBend)
+ * sin(_Time);
+
+ [unroll]
+ for (int i = 0; i < 7; i++) {
+ o[i].pos = UnityObjectToClipPos(o[i].pos);
+ stream.Append(o[i]);
+ }
+ }
+
+ float4 frag(g2f i) : COLOR
+ {
+ return i.color;
+ }
+ ENDCG
+ }
+ }
+}
+
+If you apply this shader to a Plane mesh with multiple vertical and horizontal arrangements, it will look like Figure 6.4.
+
++Figure 6.4: Grass Shader results +
+I will explain the process of growing grass from this.
+ +This time, we will generate one grass for each primitive. As shown in Fig. 6.5, the shape of the grass is divided into the lower part, the middle part, and the upper part, and a total of 7 vertices are generated. I will.
+
++Figure 6.5: How to make a grass shape +
+Details are described in the comments, but the coefficient that controls the width and height of each part (lower part, middle part, upper part) in one grass, and the coefficient that controls the width and height of the whole grass It is prepared as the main parameter. Also, it doesn't look good if each grass has the same shape, so we use a noise texture to give it randomness.
+ +float height = (i [0] .hei.r + i [1] .hei.r + i [2] .hei.r) / 3.0f; +float rot = (i[0].rot.r + i[1].rot.r + i[2].rot.r) / 3.0f; +float wind = (i[0].wind.r + i[1].wind.r + i[2].wind.r) / 3.0f; + +float4 center = ((p0 + p1 + p2) / 3.0f); +float4 normal = float4(((n0 + n1 + n2) / 3.0f).xyz, 1.0f); ++
In this part, the height and direction of the grass and the numerical values that are the standard of the strength of the wind are calculated. You can calculate in Geometry Shader, but if you give the vertices meta information, you can treat it like the initial value when performing calculation on Geometry Shader, so calculate with Vertex Shader. I am.
+float4 center = ((p0 + p1 + p2) / 3.0f); +float4 normal = float4(((n0 + n1 + n2) / 3.0f).xyz, 1.0f); ++
Here, the central part of the grass and the direction in which the grass grows are calculated. If you decide this part by noise texture etc., you can give randomness in the direction of grass growth.
+float bottomHeight = height * _Height * _BottomHeight; + +... + +o[6].pos += dir * (_WindPower * wind * _TopBend) * sin(_Time); ++
The program is abbreviated because it is long. In this part, the height and width of the lower part, middle part, and upper part are calculated respectively, and the coordinates are calculated based on that.
+[unroll]
+for (int i = 0; i < 7; i++) {
+ o[i].pos = UnityObjectToClipPos(o[i].pos);
+ stream.Append(o[i]);
+}
+
+There are 7 vertices calculated in this part Append. This time, there is no problem even if the triangles are generated while being connected, so I have not done so RestartStrip.
In addition, the attribute called is applied to the forstatement [unroll]. This is an attribute that expands the processing in the loop as many times as the number of loops at compile time, and although it has the disadvantage of increasing the memory size, it has the advantage of operating at high speed.
So far, we have explained from the explanation of Geometry Shader to the basic and applied programs. There are some features that are slightly different from writing a program that runs on the CPU, but you should be able to utilize it if you suppress the basic part.
+In fact, it is generally said that Geometry Shader is slow. I haven't really felt it, but it may be difficult when the range of use is large. If you are going to use Geometry Shader on a large scale, please take a benchmark etc. once.
+Still, being able to dynamically and freely create and delete new meshes on the GPU will greatly expand the range of ideas. Personally, I think the most important thing is not what technology was used, but what is created and expressed by it. We hope that you will learn about and learn about one tool called Geometry Shader in this chapter, and feel some new possibilities.
+ +
|
![]() |
|
The Marching Cubes method is one of the volume rendering methods, and is an algorithm that converts 3D voxel data filled with scalar data into polygon data. The first paper was published in 1987 by William E. Lorensen and Harvey E. Cline.
+The Marching Cubes method was patented, but since it expired in 2005, it is now free to use.
+ +First, divide the volume data space with a 3D grid.
+
++Figure 7.1: 3D volume data and grid partitioning +
+Next, let's take out one of the divided grids. The boundaries of the eight vertices are calculated as 1 if the values of the eight corners of the grid are above the threshold and 0 if they are below the threshold.
The figure below shows the flow when the threshold is set to 0.5.
++Figure 7.2: Determining the boundary according to the value of the angle +
+There are 256 types of combinations of the eight corners, but if you make full use of rotation and inversion, it will fit in 15 types. Assign triangular polygon patterns corresponding to the 15 types of combinations.
+
++Figure 7.3: Combination of corners +
+The sample projects described in this chapter can be found in Assets / GPU ArchingCubes under Unity Graphics Programming's Unity Project https://github.com/IndieVisualLab/UnityGraphicsProgramming.
+For implementation, I ported it to Unity by referring to Paul Bourke's Polygonising a scalar field site * 1 .
+[*1] Polygonising a scalar field http://paulbourke.net/geometry/polygonise/
This time, I will explain along with this sample project.
+There are three main implementations.
+First, create from the GPU ArchingCubesDrawMesh class that initializes the mesh and registers the drawing .
+ +As explained in the previous section, the Marching cubes method is an algorithm that generates polygons by combining the eight corners of the grid. To do that in real time, you need to dynamically create polygons.
However, it is inefficient to generate a mesh vertex array on the CPU side (C # side) every frame.
So we use Geometry Shader. GeometryShader is a Shader located between VertexShader and FragmentShader, which can increase or decrease the number of vertices processed by VertexShader.
For example, you can add 6 vertices around one vertex to generate a plate polygon.
Furthermore, it is very fast because it is processed on the Shader side (GPU side).
This time, I will use Geometry Shader to generate and display Marching Cubes polygons.
First, define the variables used in the GPUMarchingCubesDrawMesh class.
+Listing 7.1: Definition of variables
+using UnityEngine;
+
+public class GPUMarchingCubesDrawMesh : MonoBehaviour {
+
+ #region public
+ public int segmentNum = 32; // Number of divisions on one side of the grid
+
+ [Range(0,1)]
+ public float threashold = 0.5f; // Threshold for the scalar value to mesh
+ public Material mat; // Material for rendering
+
+ public Color DiffuseColor = Color.green; // Diffuse color
+ public Color EmissionColor = Color.black; // Emission color
+ public float EmissionIntensity = 0; // Emission intensity
+
+ [Range(0,1)]
+ public float metallic = 0; // metallic feeling
+ [Range(0, 1)]
+ public float glossiness = 0.5f; // Glossiness
+ #endregion
+
+ #region private
+ int vertexMax = 0; // number of vertices
+ Mesh[] meshs = null; // Mesh配列
+ Material [] materials = null; // Material array for each mesh
+ float renderScale = 1f / 32f; // Display scale
+ MarchingCubesDefines mcDefines = null; // Constant array group for MarchingCubes
+ #endregion
+
+}
+
+Next, create a mesh to pass to the Geometry Shader. The vertices of the mesh should be placed one by one in the divided 3D grid. For example, if the number of divisions on one side is 64, 64 * 64 * 64 = 262,144 vertices are required.
+However, in Unity2017.1.1f1, the maximum number of vertices in one mesh is 65,535. Therefore, each mesh is divided so that the number of vertices is within 65,535.
+Listing 7.2: Meshing part
+void Initialize()
+{
+ vertexMax = segmentNum * segmentNum * segmentNum;
+
+ Debug.Log("VertexMax " + vertexMax);
+
+ // Divide the size of 1Cube by segmentNum to determine the size at the time of rendering
+ renderScale = 1f / segmentNum;
+
+ CreateMesh();
+
+ // Initialize constant array for Marching Cubes used in shader
+ mcDefines = new MarchingCubesDefines();
+}
+
+void CreateMesh()
+{
+ // Since the maximum number of vertices of Mesh is 65535, divide Mesh
+ int vertNum = 65535;
+ int meshNum = Mathf.CeilToInt ((float) vertexMax / vertNum); // Number of meshes to split
+ Debug.Log("meshNum " + meshNum );
+
+ meshs = new Mesh[meshNum];
+ materials = new Material[meshNum];
+
+ // Mesh bounce calculation
+ Bounds bounds = new Bounds(
+ transform.position,
+ new Vector3(segmentNum, segmentNum, segmentNum) * renderScale
+ );
+
+ int id = 0;
+ for (int i = 0; i < meshNum; i++)
+ {
+ // Vertex creation
+ Vector3[] vertices = new Vector3[vertNum];
+ int[] indices = new int[vertNum];
+ for(int j = 0; j < vertNum; j++)
+ {
+ vertices[j].x = id % segmentNum;
+ vertices[j].y = (id / segmentNum) % segmentNum;
+ vertices [j] .z = (id / (segmentNum * segmentNum))% segmentNum;
+
+ indices[j] = j;
+ id++;
+ }
+
+ // Mesh creation
+ meshs[i] = new Mesh();
+ meshs[i].vertices = vertices;
+ // Mesh Topology can be Points because polygons are created with Geometry Shader
+ meshs[i].SetIndices(indices, MeshTopology.Points, 0);
+ meshs[i].bounds = bounds;
+
+ materials[i] = new Material(mat);
+ }
+}
+
+The source MarchingCubesDefinces.cs defines a constant array used for rendering the Marching Cubes method and a ComputeBuffer for passing the constant array to the shader. ComputeBuffer is a buffer that stores data used by shaders. Since the data is stored in the memory on the GPU side, it is quickly accessible from the shader.
+In fact, the constant array used in the rendering of the Marching Cubes method can be defined on the shader side. However, the reason why the constant array used in the shader is initialized on the C # side is that the shader has a limitation that the number of literal values (directly written values) can only be registered up to 4096. If you define a huge array of constants in your shader, you will quickly reach the upper limit of the number of literal values.
+Therefore, by storing it in ComputeShader and passing it, it will not be a literal value, so it will not hit the upper limit. Therefore, the number of processes increases a little, but on the C # side, the constant array is stored in ComputeBuffer and passed to the shader.
+Listing 7.3: ComputeBuffer initialization part
+void Initialize()
+{
+ vertexMax = segmentNum * segmentNum * segmentNum;
+
+ Debug.Log("VertexMax " + vertexMax);
+
+ // Divide the size of 1Cube by segmentNum to determine the size at the time of rendering
+ renderScale = 1f / segmentNum;
+
+ CreateMesh();
+
+ // Initialize constant array for Marching Cubes used in shader
+ mcDefines = new MarchingCubesDefines();
+}
+
+In the Initialize () function mentioned earlier, MarchingCubesDefines is initialized.
+ +Next is the function that calls the rendering process.
This time, I'll use Graphics.DrawMesh () to render multiple meshes at once and to be affected by Unity's lighting. The meaning of DiffuseColor etc. defined in the public variable will be explained in the explanation on the shader side.
The ComputeBuffers of the MarchingCubesDefines class in the previous section are passed to the shader with material.setBuffer.
+Listing 7.4: Rendered part
+void RenderMesh()
+{
+ Vector3 halfSize = new Vector3(segmentNum, segmentNum, segmentNum)
+ * renderScale * 0.5f;
+ Matrix4x4 trs = Matrix4x4.TRS(
+ transform.position,
+ transform.rotation,
+ transform.localScale
+ );
+
+ for (int i = 0; i < meshs.Length; i++)
+ {
+ materials[i].SetPass(0);
+ materials[i].SetInt("_SegmentNum", segmentNum);
+ materials[i].SetFloat("_Scale", renderScale);
+ materials[i].SetFloat("_Threashold", threashold);
+ materials[i].SetFloat("_Metallic", metallic);
+ materials[i].SetFloat("_Glossiness", glossiness);
+ materials[i].SetFloat("_EmissionIntensity", EmissionIntensity);
+
+ materials[i].SetVector("_HalfSize", halfSize);
+ materials[i].SetColor("_DiffuseColor", DiffuseColor);
+ materials[i].SetColor("_EmissionColor", EmissionColor);
+ materials[i].SetMatrix("_Matrix", trs);
+
+ Graphics.DrawMesh(meshs[i], Matrix4x4.identity, materials[i], 0);
+ }
+}
+
+Listing 7.5: Calling Part
+// Use this for initialization
+void Start ()
+{
+ Initialize();
+}
+
+void Update()
+{
+ RenderMesh ();
+}
+
+Start () calls Initialize () to generate a mesh, and the Update () function calls RenderMesh () to render.
The reason for calling RenderMesh () with Update () is that Graphics.DrawMesh () does not draw immediately, but it feels like "registering for rendering process once".
By registering, Unity will adapt the lights and shadows. A similar function is Graphics.DrawMeshNow (), but it draws instantly, so Unity lights and shadows are not applied. Also, you need to call it with OnRenderObject () or OnPostRender () instead of Update ().
The shader this time is roughly divided into two parts, the " rendering part of the entity" and the "rendering part of the shadow" . In addition, three shader functions are executed within each, the vertex shader, the geometry shader, and the fragment shader.
+Since the shader source is long, I will have the sample project look at the entire implementation, and I will explain only the important points. The shader file described is GPU ArchingCubesRenderMesh.shader.
+ +At the top of the shader, we define the structure used for rendering.
+Listing 7.6: Structure Definition Part
+// Vertex data coming from the mesh
+struct appdata
+{
+ float4 vertex: POSITION; // vertex coordinates
+};
+
+// Data passed from the vertex shader to the geometry shader
+struct v2g
+{
+ float4 pos: SV_POSITION; // Vertex coordinates
+};
+
+// Data passed from the geometry shader to the fragment shader when rendering the entity
+struct g2f_light
+{
+ float4 pos: SV_POSITION; // Local coordinates
+ float3 normal: NORMAL; // normal
+ float4 worldPos: TEXCOORD0; // World coordinates
+ half3 sh : TEXCOORD3; // SH
+};
+
+// Data passed from the geometry shader to the fragment shader when rendering shadows
+struct g2f_shadow
+{
+ float4 pos: SV_POSITION; // coordinate
+ float4 hpos : TEXCOORD1;
+};
+
+Next, we are defining variables.
Listing 7.7: Variable definition part
+int _SegmentNum; + +float _Scale; +float _Threashold; + +float4 _DiffuseColor; +float3 _HalfSize; +float4x4 _Matrix; + +float _EmissionIntensity; +half3 _EmissionColor; + +half _Glossiness; +half _Metallic; + +StructuredBuffer<float3> vertexOffset; +StructuredBuffer<int> cubeEdgeFlags; +StructuredBuffer<int2> edgeConnection; +StructuredBuffer<float3> edgeDirection; +StructuredBuffer<int> triangleConnectionTable; ++
The contents of various variables defined here are passed by the material.Set ○○ function in the RenderMesh () function on the C # side. ComputeBuffers in the MarchingCubesDefines class have changed their type names to StructuredBuffer <○○>.
+ +The vertex shader is very simple, as most of the work is done by the geometry shader. It simply passes the vertex data passed from the mesh to the geometry shader as is.
+Listing 7.8: Vertex shader implementation
+// Vertex data coming from the mesh
+struct appdata
+{
+ float4 vertex: POSITION; // vertex coordinates
+};
+
+// Data passed from the vertex shader to the geometry shader
+struct v2g
+{
+ float4 pos: SV_POSITION; // coordinate
+};
+
+// Vertex shader
+v2g vert(appdata v)
+{
+ v2g or = (v2g) 0;
+ o.pos = v.vertex;
+ return o;
+}
+
+By the way, the vertex shader is common to the entity and the shadow.
+ +Since it is long, I will explain it while dividing it.
+Listing 7.9: Function declaration part of the geometry shader
+// Entity geometry shader +[maxvertexcount (15)] // Definition of the maximum number of vertices output from the shader +void geom_light(point v2g input[1], + inout TriangleStream<g2f_light> outStream) ++
First is the declaration part of the geometry shader.
+[maxvertexcount(15)]Is the definition of the maximum number of vertices output from the shader. With the algorithm of the Marching Cubes method this time, a maximum of 5 triangular polygons can be created per grid, so a total of 15 vertices are output in 3 * 5.
Therefore, write 15 in () of maxvertexcount.
Listing 7.10: Scalar value acquisition part of the eight corners of the grid
+float cubeValue [8]; // Array for getting scalar values at the eight corners of the grid
+
+// Get the scalar values for the eight corners of the grid
+for (i = 0; i < 8; i++) {
+ cubeValue[i] = Sample(
+ pos.x + vertexOffset[i].x,
+ pos.y + vertexOffset [i] .y,
+ pos.z + vertexOffset [i] .z
+ );
+}
+
+pos contains the coordinates of the vertices placed in the grid space when creating the mesh. As the name implies, vertexOffset is an array of offset coordinates added to pos.
+This loop gets the scalar values in the volume data of the coordinates of the eight corners of one vertex = one grid. vertexOffset points to the order of the corners of the grid.
+
++Figure 7.4: Order of grid corner coordinates +
+Listing 7.11: Sampling function part
+// sampling function
+float Sample(float x, float y, float z) {
+
+ // Are the coordinates out of the grid space?
+ if ((x <= 1) ||
+ (y <= 1) ||
+ (z <= 1) ||
+ (x >= (_SegmentNum - 1)) ||
+ (y >= (_SegmentNum - 1)) ||
+ (z> = (_SegmentNum - 1))
+ )
+ return 0;
+
+ float3 size = float3(_SegmentNum, _SegmentNum, _SegmentNum);
+
+ float3 pos = float3(x, y, z) / size;
+
+ float3 spPos;
+ float result = 0;
+
+ // Distance function of 3 spheres
+ for (int i = 0; i < 3; i++) {
+ float sp = -sphere(
+ pos - float3(0.5, 0.25 + 0.25 * i, 0.5),
+ 0.1 + (sin (_Time.y * 8.0 + i * 23.365) * 0.5 + 0.5) * 0.025) + 0.5;
+ result = smoothMax(result, sp, 14);
+ }
+
+ return result;
+}
+
+This function fetches the scalar value of the specified coordinates from the volume data. This time, instead of using a huge amount of 3D volume data, we will calculate the scalar value using a simple algorithm that uses a distance function.
+The 3D shape drawn by the Marching Cubes method this time is defined using what is called a "distance function" .
+The distance function here is, roughly speaking, a "function that satisfies the distance condition".
+For example, the distance function of a sphere is:
+Listing 7.12: Sphere Distance Function
+inline float sphere(float3 pos, float radius)
+{
+ return length(pos) - radius;
+}
+
+Coordinates are entered in pos, but consider the case where the center coordinates of the sphere are the origin (0,0,0). radius is the radius.
+The length is calculated by length (pos), but this is the distance between the origin and pos, and it is subtracted by the radius radius, so if the length is less than the radius, it is a natural but negative value.
+In other words, if you pass the coordinates pos and a negative value is returned, you can judge that the coordinates are inside the sphere.
+The advantage of the distance function is that it is easy to make the program small because the figure can be expressed with a simple calculation formula of several lines. You can find a lot of information about other distance functions on Inigo Quilez's site.
+http://iquilezles.org/www/articles/distfunctions/distfunctions.htm
+Listing 7.13: A composite of the distance functions of three spheres
+// Distance function of 3 spheres
+for (int i = 0; i < 3; i++) {
+ float sp = -sphere(
+ pos - float3(0.5, 0.25 + 0.25 * i, 0.5),
+ 0.1 + (sin (_Time.y * 8.0 + i * 23.365) * 0.5 + 0.5) * 0.025) + 0.5;
+ result = smoothMax(result, sp, 14);
+}
+
+This time, 8 corners (vertices) of 1 square of the grid are used as pos. The distance from the center of the sphere is treated as it is as the density of the volume data.
+As will be described later, the sign is inverted because it is polygonized when the threshold value is 0.5 or more. In addition, the coordinates are slightly shifted to obtain the distances to the three spheres.
+Listing 7.14: smoothMax function
+float smoothMax(float d1, float d2, float k)
+{
+ float h = exp(k * d1) + exp(k * d2);
+ return log(h) / k;
+}
+
+smoothMax is a function that blends the results of distance functions nicely. You can use this to fuse the three spheres like a metaball.
+Listing 7.15: Threshold Check
+// Check if the values at the eight corners of the grid exceed the threshold
+for (i = 0; i < 8; i++) {
+ if (cubeValue[i] <= _Threashold) {
+ flagIndex |= (1 << i);
+ }
+}
+
+int edgeFlags = cubeEdgeFlags[flagIndex];
+
+// Do not draw anything if empty or completely filled
+if ((edgeFlags == 0) || (edgeFlags == 255)) {
+ return;
+}
+
+If the scalar value at the corner of the grid exceeds the threshold, set a bit in flagIndex. Using the flagIndex as an index, the information for generating polygons is extracted from the cubeEdgeFlags array and stored in edgeFlags. If all corners of the grid are below or above the threshold, it is completely inside or outside and no polygons are generated.
+Listing 7.16: Polygon Vertex Coordinate Calculation
+float offset = 0.5;
+float3 vertex;
+for (i = 0; i < 12; i++) {
+ if ((edgeFlags & (1 << i)) != 0) {
+ // Get the threshold offset between the corners
+ offset = getOffset(
+ cubeValue[edgeConnection[i].x],
+ cubeValue[edgeConnection[i].y], _
+ Threashold
+ );
+
+ // Complement the coordinates of the vertices based on the offset
+ vertex = vertexOffset[edgeConnection[i].x]
+ + offset * edgeDirection[i];
+
+ edgeVertices[i].x = pos.x + vertex.x * _Scale;
+ edgeVertices [i] .y = pos.y + vertex.y * _Scale;
+ edgeVertices [i] .z = pos.z + vertex.z * _Scale;
+
+ // Normal calculation (requires vertex coordinates before scaling to resample)
+ edgeNormals [i] = getNormal (
+ defpos.x + vertex.x,
+ defpos.y + vertex.y,
+ defpos.z + vertex.z
+ );
+ }
+}
+
+This is where the vertex coordinates of the polygon are calculated. Looking at the bit of edgeFlags earlier, we are calculating the vertex coordinates of the polygon to be placed on the edge of the grid.
+getOffset gives the ratio (offset) from the current corner to the next corner from the scalar values and thresholds of the two corners of the grid. By offsetting the coordinates of the current corner toward the next corner by offset, the polygon will eventually become smooth.
+In getNormal, the normal is calculated by re-sampling and calculating the gradient.
+Listing 7.17: Concatenate vertices to make a polygon
+// Concatenate vertices to create polygons
+int vindex = 0;
+int findex = 0;
+// Create up to 5 triangles
+for (i = 0; i < 5; i++) {
+ findx = flagIndex * 16 + 3 * i;
+ if (triangleConnectionTable[findex] < 0)
+ break;
+
+ // make a triangle
+ for (j = 0; j < 3; j++) {
+ vindex = triangleConnectionTable[findex + j];
+
+ // Multiply the Transform matrix to convert to world coordinates
+ float4 ppos = mul(_Matrix, float4(edgeVertices[vindex], 1));
+ o.pos = UnityObjectToClipPos(ppos);
+
+ float3 norm = UnityObjectToWorldNormal(
+ normalize(edgeNormals[vindex])
+ );
+ o.normal = normalize(mul(_Matrix, float4(norm,0)));
+
+ outStream.Append (o); // Append vertices to strip
+ }
+ outStream.RestartStrip (); // Break once and start the next primitive strip
+}
+
+This is the place where the polygon is made by connecting the vertex coordinate groups obtained earlier. triangleConnectionTable Contains the indexes of the vertices that connect to the array. Multiply the vertex coordinates by the Transform matrix to convert to world coordinates, and then use UnityObjectToClipPos () to convert to screen coordinates.
+Also, UnityObjectToWorldNormal () converts the normals to the world coordinate system. These vertices and normals will be used for lighting in the next fragment shader.
+TriangleStream.Append () and RestartStrip () are special functions for geometry shaders. Append () adds vertex data to the current strip. RestartStrip () creates a new strip. Since it is a Triangle Stream, it is an image to append up to 3 on one strip.
+ +In order to reflect the lighting such as GI (Global Illumination) of Unity, the lighting processing part of Surface Shader after Generate code is ported.
+Listing 7.18: Fragment Shader Definition
+// Entity fragment shader +void frag_light(g2f_light IN, + out half4 outDiffuse : SV_Target0, + out half4 outSpecSmoothness : SV_Target1, + out half4 outNormal : SV_Target2, + out half4 outEmission : SV_Target3) ++
There are 4 outputs (SV_Target) to output to G-Buffer.
+Listing 7.19: Initializing the SurfaceOutputStandard structure
+#ifdef UNITY_COMPILER_HLSL + SurfaceOutputStandard o = (SurfaceOutputStandard)0; +#else + SurfaceOutputStandard o; +#endif + o.Albedo = _DiffuseColor.rgb; + o.Emission = _EmissionColor * _EmissionIntensity; + o.Metallic = _Metallic; + o.Smoothness = _Glossiness; + o.Alpha = 1.0; + o.Occlusion = 1.0; + o.Normal = normal; ++
Set parameters such as color and gloss to the SurfaceOutputStandard structure that will be used later.
+Listing 7.20: GI-related processing
+// Setup lighting environment +UnityGI gi; +UNITY_INITIALIZE_OUTPUT(UnityGI, gi); +gi.indirect.diffuse = 0; +gi.indirect.specular = 0; +gi.light.color = 0; +gi.light.dir = half3 (0, 1, 0); +gi.light.ndotl = LambertTerm(o.Normal, gi.light.dir); + +// Call GI (lightmaps/SH/reflections) lighting function +UnityGIInput giInput; +UNITY_INITIALIZE_OUTPUT(UnityGIInput, giInput); +giInput.light = gi.light; +giInput.worldPos = worldPos; +giInput.worldViewDir = worldViewDir; +giInput.atten = 1.0; + +giInput.ambient = IN.sh; + +giInput.probeHDR[0] = unity_SpecCube0_HDR; +giInput.probeHDR[1] = unity_SpecCube1_HDR; + +#if UNITY_SPECCUBE_BLENDING || UNITY_SPECCUBE_BOX_PROJECTION +// .w holds lerp value for blending +giInput.boxMin[0] = unity_SpecCube0_BoxMin; +#endif + +#if UNITY_SPECCUBE_BOX_PROJECTION +giInput.boxMax[0] = unity_SpecCube0_BoxMax; +giInput.probePosition [0] = unity_SpecCube0_ProbePosition; +giInput.boxMax[1] = unity_SpecCube1_BoxMax; +giInput.boxMin[1] = unity_SpecCube1_BoxMin; +giInput.probePosition [1] = unity_SpecCube1_ProbePosition; +#endif + +LightingStandard_GI (o, giInput, gi); ++
GI related processing. I put the initial value in UnityGIInput and write the result of GI calculated by LightnintStandard_GI () to UnityGI.
+Listing 7.21: Calculation of light reflection
+// call lighting function to output g-buffer +outEmission = LightingStandard_Deferred(o, worldViewDir, gi, + outDiffuse, + outSpecSmoothness, + outNormal); +outDiffuse.a = 1.0; + +#ifndef UNITY_HDR_ON +outEmission.rgb = exp2(-outEmission.rgb); +#endif ++
Pass the calculation results to LightingStandard_Deferred () to calculate the degree of light reflection and write it to the Emission buffer. In the case of HDR, write after sandwiching the part compressed by exp.
+ +It's almost the same as the actual geometry shader. I will explain only where there are differences.
+Listing 7.22: Shadow Geometry Shader
+int vindex = 0;
+int findex = 0;
+for (i = 0; i < 5; i++) {
+ findx = flagIndex * 16 + 3 * i;
+ if (triangleConnectionTable[findex] < 0)
+ break;
+
+ for (j = 0; j < 3; j++) {
+ vindex = triangleConnectionTable[findex + j];
+
+ float4 ppos = mul(_Matrix, float4(edgeVertices[vindex], 1));
+
+ float3 norm;
+ norm = UnityObjectToWorldNormal(normalize(edgeNormals[vindex]));
+
+ float4 lpos1 = mul(unity_WorldToObject, ppos);
+ o.pos = UnityClipSpaceShadowCasterPos (lpos1,
+ normalize(
+ mul(_Matrix,
+ float4(norm, 0)
+ )
+ )
+ );
+ o.pos = UnityApplyLinearShadowBias (o.pos);
+ o.hpos = o.pos;
+
+ outStream.Append(o);
+ }
+ outStream.RestartStrip();
+}
+
+Convert the vertex coordinates to the coordinates of the shadow projection destination with UnityClipSpaceShadowCasterPos () and UnityApplyLinearShadowBias ().
+ +Listing 7.23: Shadow Fragment Shader
+// Shadow Fragment Shader
+fixed4 frag_shadow(g2f_shadow i) : SV_Target
+{
+ return i.hpos.z / i.hpos.w;
+}
+
+It's too short to explain. Actually, the shadow is drawn normally even with return 0 ;. Is Unity doing a good job inside?
+ +When you run it, you should see a picture like this.
+
++Figure 7.5: undulation +
+Also, various shapes can be created by combining distance functions.
+
++Figure 7.6: Kaiware daikon +
+This time I used the distance function for simplification, but I think that the Marching cubes method can also be used to use 3D textures with volume data written in them and to visualize various 3D data. ..
For game use, you may be able to create games like ASTORONEER * 2 , which allows you to dig and build terrain .
Everyone, please try to find various expressions with the Marching Cubes method!
http://iquilezles.org/www/articles/distfunctions/distfunctions.htm
+[* 2] ASTRONEER http://store.steampowered.com/app/361420/ASTRONEER/?l=japanese
|
![]() |
|
In this chapter, we will explain the sampling method. This time, we will focus on a sampling method called MCMC (Markov Chain Monte Carlo), which samples multiple appropriate values from a certain probability distribution.
+The simplest method for sampling from a certain probability distribution is the rejection method, but sampling in a three-dimensional space has a large rejected area and cannot withstand actual operation. Therefore, the content of this chapter is that by using MCMC, sampling can be performed efficiently even in high dimensions.
+As for the information about MCMC, on the one hand, systematic information such as books is for statisticians, and there is no guide to implementation for programmers, although it is redundant, and on the other hand, the information on the net has more than 10 lines of sample code. The reality is that there is no content that allows you to quickly and comprehensively understand the theory and implementation, as it is only described and there is no care for the theoretical background. I tried to make the concrete explanations in the following sections as such as possible.
+The explanation of the probability that is the background of MCMC is enough to write one book if it is strictly speaking. This time, with the motto of explaining the minimum theoretical background that can be implemented with peace of mind, we aimed for an intuitive expression with moderate strictness of definition. I think that if you have used mathematics in the first year of university or even a little at work, you can read the program without difficulty.
+ +In this chapter, the Unity project of Unity Graphics Programming https://github.com/IndieVisualLab/Assets/ProceduralModeling in UnityGraphicsProgramming is prepared as a sample program.
+ +To understand the theory of MCMC, we first need to understand the basics of probability. However, there are few concepts to keep in mind in order to understand MCMC this time, only the following four. No likelihood or probability density function required!
+Let's look at them in order.
+ +When an event occurs at establishment P (X), this real number X is called a random variable. For example, when "the probability of getting a 5 on a dice is 1/6", "5" is a random variable and "1/6" is a probability. In general, the previous sentence can be rephrased as "the probability that an X on the dice will appear is P (X)".
+By the way, if you write it a little like a definition, the random variable X is a mapping X that returns a real number X for the element ω (= one event that happened) selected from the sample space Ω (= all the events that can occur). You can write = X (ω).
+ +I added a slightly confusing definition in the latter half of the random variable because it makes it easier to understand the stochastic process on the assumption that the random variable X is represented by the notation X = X (ω). The stochastic process is the one that can be expressed as X = X (ω, t) by adding the time condition to X. In other words, the stochastic process can be thought of as a kind of random variable with a time condition.
+ +The probability distribution shows the correspondence between the random variable X and the probability P (X). It is often represented by a graph with probability P (X) on the vertical axis and X on the horizontal axis.
+ +Each point is a distribution in which the overall distribution does not change even if it transitions. For a transition matrix π with a distribution P, P that satisfies πP = P is called a stationary distribution. This definition alone is confusing, but it is clear from the figure below.
+
++図8.1: stationaryDistribution +
+Now, in this section, we will touch on the concepts that make up MCMC.
As mentioned at the beginning, MCMC is a method of sampling an appropriate value from a certain probability distribution, but more specifically, the Monte Carlo method under the condition that the given distribution is a steady distribution. (Monte Carlo) and Markov chain (Markov chain) sampling method. Below, we will explain the Monte Carlo method, Markov chain, and stationary distribution in that order.
The Monte Carlo method is a general term for numerical calculations and simulations that use pseudo-random numbers.
An example that is often used to introduce numerical calculations using the Monte Carlo method is the following calculation of pi.
float pi;
+float trial = 10000;
+float count = 0;
+
+for(int i=0; i<trial; i++){
+ float x = Random.value;
+ float y = Random.value;
+ if(x*x+y*y <= 1) count++;
+}
+
+pi = 4 * count / trial;
+
+In short, the ratio of the number of trials in a fan-shaped circle in a 1 x 1 square to the total number of trials is the area ratio, so the pi can be calculated from that. As a simple example, this is also the Monte Carlo method.
+ +A Markov chain is a stochastic process that satisfies Markov properties, in which states can be described discretely.
Markov property is the property that the probability distribution of the future state of a stochastic process depends only on the current state and not on the past state.
++Figure 8.2: Markov Chain +
+As shown in the above figure, in the Markov chain, the future state depends only on the current state and does not directly affect the past state.
+ +MCMC needs to use pseudo-random numbers to converge from an arbitrary distribution to a given stationary distribution. This is because if you do not converge to a given distribution, you will sample from a different distribution each time, and if you do not have a stationary distribution, you will not be able to sample well in a chain. In order for an arbitrary distribution to converge to a given distribution, the following two conditions must be met.
+
++図8.3: Irreducibility +
+
++Figure 8.4: Aperiodicity +
+Any distribution that meets these two conditions can converge to a given stationary distribution. This is called the ergodic nature of the Markov process.
+ +Now, it is difficult to check whether the given distribution satisfies the ergonomics mentioned earlier, so in many cases, we will strengthen the conditions and investigate within the range that satisfies the condition of "detailed balance". One of the Markov chain methods that achieves detailed balance is called the metropolis method.
+The metropolis method samples by taking the following two steps.
+The merit of the metropolis method is that even after the transition to the maximum value of the probability distribution is completed, if the value of r is small, the probability value transitions to the smaller one, so sampling proportional to the probability value can be performed around the maximum value.
+By the way, the Metropolis method is a kind of Metropolis-Hastings method (MH method). The Metropolis method uses a symmetrical distribution for the proposed distribution, but the MH method does not.
+ +Let's take a look at the actual code excerpt and see how to implement MCMC.
+First, prepare a three-dimensional probability distribution. This is called the target distribution. This is the "target" distribution because it is the distribution you actually want to sample.
+void Prepare()
+{
+ var sn = new SimplexNoiseGenerator();
+ for (int x = 0; x < lEdge; x++)
+ for (int y = 0; y < lEdge; y++)
+ for (int z = 0; z < lEdge; z++)
+ {
+ var i = x + lEdge * y + lEdge * lEdge * z;
+ var val = sn.noise(x, y, z);
+ data[i] = new Vector4(x, y, z, val);
+ }
+}
+
+This time, we adopted simplex noise as the target distribution.
+Next, actually run MCMC.
+public IEnumerable<Vector3> Sequence(int nInit, int limit, float th)
+{
+ Reset();
+
+ for (var i = 0; i < nInit; i++)
+ Next(th);
+
+ for (var i = 0; i <limit; i ++)
+ {
+ yield return _curr;
+ Next(th);
+ }
+}
+
+public void Reset()
+{
+ for (var i = 0; _currDensity <= 0f && i < limitResetLoopCount; i++)
+ {
+ _curr = new Vector3(
+ Scale.x * Random.value,
+ Scale.y * Random.value,
+ Scale.z * Random.value
+ );
+ _currDensity = Density(_curr);
+ }
+}
+
+Run the process using a coroutine. Since MCMC starts processing from a completely different place when one Markov chain ends, it can be conceptually considered as parallel processing. This time, I use the Reset function to run another process after a series of processes. By doing this, you will be able to sample well even if there are many maxima of the probability distribution.
+Since the first part of the transition is likely to be a point away from the target distribution, this section is burn-in without sampling. When the target distribution is sufficiently approached, sampling and transition set are performed a certain number of times, and when finished, another series of processing is started.
+Finally, it is the process of determining the transition.
Since it is three-dimensional, the proposed distribution uses a trivariate standard normal distribution as follows.
public static Vector3 GenerateRandomPointStandard()
+{
+ var x = RandomGenerator.rand_gaussian(0f, 1f);
+ var y = RandomGenerator.rand_gaussian(0f, 1f);
+ var z = RandomGenerator.rand_gaussian(0f, 1f);
+ return new Vector3(x, y, z);
+}
+
+public static float rand_gaussian(float mu, float sigma)
+{
+ float z = Mathf.Sqrt(-2.0f * Mathf.Log(Random.value))
+ * Mathf.Sin(2.0f * Mathf.PI * Random.value);
+ return mu + sigma * z;
+}
+
+In the Metropolis method, the distribution must be symmetrical, so the mean value is not set to anything other than 0, but if the variance is set to something other than 1, it is derived as follows using the Cholesky decomposition. I will.
+public static Vector3 GenerateRandomPoint(Matrix4x4 sigma)
+{
+ var c00 = sigma.m00 / Mathf.Sqrt(sigma.m00);
+ var c10 = sigma.m10 / Mathf.Sqrt(sigma.m00);
+ var c20 = sigma.m21 / Mathf.Sqrt(sigma.m00);
+ var c11 = Mathf.Sqrt (sigma.m11 - c10 * c10);
+ var c21 = (sigma.m21 - c20 * c10) / c11;
+ var c22 = Mathf.Sqrt(sigma.m22 - (c20 * c20 + c21 * c21));
+ var r1 = RandomGenerator.rand_gaussian(0f, 1f);
+ var r2 = RandomGenerator.rand_gaussian(0f, 1f);
+ var r3 = RandomGenerator.rand_gaussian(0f, 1f);
+ var x = c00 * r1;
+ var y = c10 * r1 + c11 * r2;
+ var z = c20 * r1 + c21 * r2 + c22 * r3;
+ return new Vector3(x, y, z);
+}
+
+To determine the transition destination, take the ratio of the probabilities of the proposed distribution (one point above) next and the immediately preceding point_curr on the target distribution, and if it is larger than a uniform random number, it will transition, otherwise it will not transition. I will.
Since the process of finding the probability value corresponding to the coordinates of the transition destination is heavy (the amount of processing of O (n ^ 3)), the probability value is approximated. Since we are using a distribution in which the target distribution changes continuously this time, the established value is approximately derived by performing a weighted average that is inversely proportional to the distance.
void Next(float threshold)
+{
+ Vector3 next =
+ GaussianDistributionCubic.GenerateRandomPointStandard()
+ + _curr;
+
+ var densityNext = Density(next);
+ bool flag1 =
+ _currDensity <= 0f ||
+ Mathf.Min(1f, densityNext / _currDensity) >= Random.value;
+ bool flag2 = densityNext > threshold;
+ if (flag1 && flag2)
+ {
+ _curr = next;
+ _currDensity = densityNext;
+ }
+}
+
+float Density(Vector3 pos)
+{
+ float weight = 0f;
+ for (int i = 0; i < weightReferenceloopCount; i++)
+ {
+ int id = (int)Mathf.Floor(Random.value * (Data.Length - 1));
+ Vector3 posi = Data[id];
+ float mag = Vector3.SqrMagnitude(pos - posi);
+ weight += Mathf.Exp(-mag) * Data[id].w;
+ }
+ return weight;
+}
+
+This time, the repository also contains a sample of the 3D rejection method (a simple Monte Carlo method as shown in the circle example), so it is a good idea to compare them. With the rejection method, sampling cannot be done well if the rejection standard value is set stronger, whereas with MCMC, similar sampling results can be presented more smoothly. Also, in MCMC, if the width of the random walk for each step is reduced, sampling is performed from a close space in a series of chains, so it is possible to easily reproduce a cluster of plants and flowers.
+ +
|
![]() |
|
In this chapter, we will introduce a video projection method that allows you to experience the experience of being in the CG world by projecting images on multiple surfaces such as the walls and floor of a rectangular parallelepiped room. In addition, as the background, we will explain camera processing in CG and its application examples. The sample project can be found in Assets / Room Projection in Unity Project * 1 of Unity Graphics Programming, so please have a look. In addition, this content has been significantly revised and revised based on the content contributed to the "Mathematics Seminar December 2016" * 2 .
+[* 1] Sample project https://github.com/IndieVisualLab/UnityGraphicsProgramming
[*2] https://www.nippyo.co.jp/shop/magazine/7292.html
Camera processing in general CG is processing that projects a 3D model of the visible range onto a 2D image using perspective projection conversion. The perspective projection conversion is a local coordinate system with the center of each model as the origin, a world coordinate system with the origin at a uniquely determined location in the CG world, a view coordinate system centered on the camera, and a clip coordinate system for clipping (this). Is a 4-dimensional coordinate system in which w also has meaning, and the 3-dimensional version is called NDC (Normalized Device Coordinates ), which is a screen coordinate system that represents the 2-dimensional position of the output screen. The coordinates of the vertices are projected in order.
+
++Figure 9.1: Flow of coordinate transformation +
+In addition, since each of these transformations can be represented by one matrix, it is common practice to multiply the matrices in advance so that some coordinate transformations can be done by multiplying the matrix and the vector once.
+ +In a camera in CG, a quadrangular pyramid with the apex at the camera position and the bottom surface at the camera orientation is called the viewing frustum, and can be illustrated as a 3D volume that represents the projection of the camera.
+
++Figure 9.2: Frustum +
+If the viewing frustums of the two cameras share the apex and the sides are in contact, they will be connected visually even if the projection surfaces are facing different directions, and the perspectives when viewed from the apex will be the same. I will.
+
++Figure 9.3: Touching frustum (placed slightly apart for clarity) +
+This can be understood by considering the view frustum as a set of innumerable lines of sight and thinking that the lines of sight are continuous (= images that are consistent in perspective can be projected). This idea was extended to five cameras, and the angle of view was adjusted so that the five view frustums shared the apex and were in contact with the adjacent view frustums, thereby corresponding to each surface of the room. You can generate a video. Theoretically, 6 faces including the ceiling are possible, but this time we consider it as a projector installation space and assume 5 faces excluding the ceiling.
+
++Figure 9.4: Five viewing frustums corresponding to the room +
+By viewing from this apex, that is, the location corresponding to all camera positions, you can view images that are consistent in perspective regardless of the direction of the room.
+ +The projection matrix (hereinafter referred to as Proj ) is a matrix that transforms from the view coordinate system to the clip coordinate system.
+It is expressed as follows by the formula.
+C = Proj * V ++
Furthermore C each element of C_ {w} will position coordinates in NDC by dividing.
+NDC = (\frac{C_{x}}{C_{w}},\frac{C_{y}}{C_{w}},\frac{C_{z}}{C_{w}})
+
+In addition, C_ {w} = -V_ {z} (make Proj so that). Since the front direction of the view coordinate system is the Z minus direction, it is minus. In NDC, the display range is -1 \ leq x, y, z \ leq 1, and this conversion scales V_ {x, y} according to V_ { z} to obtain a perspective expression.
+Now let's think about how to make Proj . Let N be the coordinate of the upper right point of nearClipPlane and F be the coordinate of the upper right point of farClipPlane in the view coordinate system .
+
++Figure 9.5: N, F +
+First of all, if you pay attention to x ,
+Considering
+Proj [0,0] = \ frac {N_ {z}} {N_ {x}}
+
+If so, it looks good. Since the ratio of x and z does not change, any x, z such as Proj [0] [0] = \ frac {F_ {z}} {F_ {x}} can be used at the right end of the view frustum.
+Similarly
+Proj [1,1] = \ frac {N_ {z}} {N_ {y}}
+
+Can also be obtained.
+A little ingenuity is required for z . The calculation related to z in Proj * V is as follows.
+C_ {z} = Proj [2,2] * V_ {z} + Proj [2,3] * V_ {w} (however, V_ {w} = 1)
+
+NDC_ {z} = \ frac {C_ {z}} {C_ {w}} (however, C_ {w} = -V_ {z})
+
+Here, I want to convert N_ {z} → -1, F_ {z} → 1 , so I put a = Proj [2,2], b = Proj [2,3].
+-1 = \frac{1}{N_{z}} (aN_{z} +b),
+1 = \frac{1}{F_{z}} (aF_{z} +b)
+
+A solution can be obtained from this system of equations.
+Proj [2,2] = a = \ frac {F_ {z} + N_ {z}} {F_ {z} -N_ {z}},
+Proj [2,3] = b = \ frac {-2F_ {z} N_ {z}} {F_ {z} -N_ {z}}
+
+Also, I want C_ {w} = -V_ {w}
+Proj[3,2] = -1 ++
will do.
+Therefore, the required Proj has the following form.
+Proj = \left(
+\begin{array}{cccc}
+ \frac{N_{z}}{N_{x}} & 0 & 0 & 0\\
+ 0 & \frac{N_{z}}{N_{y}} & 0 & 0\\
+ 0 & 0 & \frac{F_{z}+N_{z}}{F_{z}-N_{z}} & \frac{-2F_{z}N_{z}}{F_{z}-N_{z}} \\
+ 0 & 0 & -1 & 0
+\end{array}
+\right)
+
+Some of the people who have dealt with projection matrices in shaders may feel uncomfortable with the contents so far. Actually, the handling of the projection matrix of Unity is complicated, and the contents so far are the explanation about Camera.projectionMatrix. This value is OpenGL compliant regardless of platform * 3 . This is why -1 \ leq NDC_ {z} \ leq 1 and C_ {w} = -V_ {w} .
+[*3] https://docs.unity3d.com/ScriptReference/GL.GetGPUProjectionMatrix.html
However, Camera.projectionMatrix is not always used for perspective projection conversion as it is converted to a platform-dependent form when it is passed to the shader in Unity. In particular, the range and orientation of NDC_ {z} (that is, the handling of the Z buffer) are diverse and easy to get caught * 4 .
+[*4] https://docs.unity3d.com/Manual/SL-PlatformDifferences.html
The shape of the bottom of the view frustum, or projection plane, depends on the camera's fov (fieldOfView) and aspect (aspect ratio) . In Unity's camera, the angle of view is published in the Inspector, but the aspect ratio is not published, so you need to edit it from the code. The code to calculate the angle of view and aspect ratio from faceSize (the size of the surface of the room) and distance (distance from the viewpoint to the surface) is as follows.
+Listing 9.1: Finding the angle of view and aspect ratio
+camera.aspect = faceSize.x / faceSize.y; +camera.fieldOfView = 2f * Mathf.Atan2(faceSize.y * 0.5f, distance) + * Mathf.Rad2Deg; ++
Note that Mathf.Atan2 () is used to find half the angle of fov with radian, doubled, and corrected to degree for substitution in Camera.fieldOfView.
+ +Consider the case where the viewpoint is not in the center of the room. If the projection plane can be translated vertically and horizontally with respect to the viewpoint, the same effect as moving the viewpoint with respect to the projection plane can be obtained. In the real world, this corresponds to a function called lens shift that adjusts the projection position of the image with a projector or the like .
+
++Figure 9.6: Lens shift +
+Looking back at the mechanism by which the camera performs perspective projection, what part of the lens shift is the process? When projecting to NDC with a projection matrix, it seems good to shift x and y. Let's look at the Projection matrix again.
+Proj = \left(
+\begin{array}{cccc}
+ \frac{N_{z}}{N_{x}} & 0 & 0 & 0\\
+ 0 & \frac{N_{z}}{N_{y}} & 0 & 0\\
+ 0 & 0 & \frac{F_{z}+N_{z}}{F_{z}-N_{z}} & \frac{-2F_{z}N_{z}}{F_{z}-N_{z}} \\
+ 0 & 0 & -1 & 0
+\end{array}
+\right)
+
+As long as C_ {x} and C_ {y} are out of alignment , I want to put something in the translation components of the matrix, Proj [0,3] and Pro [1,3] , but later C_ {w} Considering that it is divided by, the correct answer is to put it in Proj [0,2], Pro [1,2] .
+Proj = \left(
+\begin{array}{cccc}
+ \frac{N_{z}}{N_{x}} & 0 & LensShift_{x} & 0\\
+ 0 & \frac{N_{z}}{N_{y}} & LensShift_{y} & 0\\
+ 0 & 0 & \frac{F_{z}+N_{z}}{F_{z}-N_{z}} & \frac{-2F_{z}N_{z}}{F_{z}-N_{z}} \\
+ 0 & 0 & -1 & 0
+\end{array}
+\right)
+
+Since the unit of LensShift is NDC, the size of the projection plane is normalized to -1 to 1. The code looks like this:
+Listing 9.2: Reflect lens shift in projection matrix
+var shift = new Vector2( + positionOffset.x / faceSize.x, + positionOffset.y / faceSize.y +) * 2f; +var projectionMatrix = camera.projectionMatrix; +projectionMatrix[0,2] = shift.x; +projectionMatrix[1,2] = shift.y; +camera.projectionMatrix = projectionMatrix; ++
Note that once set to Camera.projectionMatrix, subsequent changes to Camera.fieldOfView will not be reflected unless Camera.ResetProjectionMatrix () is called. *Five
+[*5] https://docs.unity3d.com/ScriptReference/Camera-projectionMatrix.html
It is assumed that the viewer's viewpoint position can be tracked in a rectangular parallelepiped room. Since the size of the projection surface of the viewing frustum can be translated by the method in the previous section, the viewing frustum is moved so that the viewpoint position is the apex of the viewing frustum and the wall surface or floor surface is the projection surface. You can ask for it. By making each camera such a viewing frustum, it is possible to create an image for each projection plane. By projecting this image into an actual room, the viewer will be able to see the CG world with perspective.
+
++Figure 9.7: Room simulation (overhead view) +
+
++Figure 9.8: Room simulation (first person view) +
+In this chapter, we introduced a projection method that matches perspectives on multiple projection planes by applying a projection matrix. I think that it can be said that it is a VR with a non-approach similar to the recent HMD type in that it makes a wide range of the field of view a dynamically responsive image instead of placing the display in front of you. In addition, this method does not deceive binocular parallax or eye focus, so it may not be possible to see stereoscopically as it is, and it may look like a "moving picture projected on the wall". It seems that we need to devise a little more to increase the immersive feeling.
+A mechanism called "CAVE" * 6 that combines the same method with stereoscopic vision is known.
+[*6] https://en.wikipedia.org/wiki/Cave_automatic_virtual_environment
|
![]() |
|
I try to hit all the balls that come in, such as installations, signage, the Web (front end / back end), and smartphone apps.
+ + +While living only with the momentum and atmosphere, I suddenly became an interactive artist / engineer, and it became very difficult. I manage to do it while studying with the help of the people around me.
+ + +Interaction engineer. In the field of video expression such as installation, signage, stage production, music video, concert video, VJ, etc., we are producing content that makes use of real-time and procedural characteristics. I have been active several times in a unit called Aqueduct with sugi-cho and mattatz.
+Former technical artist of a game development company. I like art, design and music, so I turned to interactive art. My hobbies are samplers, synths, musical instruments, records, and equipment. I started Twitter.
+ + +Interactive artist / engineer and student. While studying snow physics simulation at university, he also works in engineering. Recently I'm having an affair with Touch Designer. Let's talk on twitter.
+Interaction engineer, programmer of small fish gale, loose fluffy force, anything shop that makes anything. My favorite school classroom is the drawing room or the library.
+ + +An interactive artist / engineer who works in an atmosphere. I like interactive content more than three meals. I like potatoes and don't eat radish sprouts. I often post Gene videos on Twitter. I do VJ once in a while.
+Interactive engineer. I also do web production and graphic design work individually. Please contact twitter for production requests.
+ + +Former game developer, current interactive artist / engineer. When I tried to eat breakfast to be careful about my health, I lost about 2 kg for some reason.
+ + +A person who makes interactive art in Unity. Freelance. hi@sugi.cc
+ \ No newline at end of file diff --git a/html-translated/vol1/Contributors_files/cleardot.gif b/html-translated/vol1/Contributors_files/cleardot.gif new file mode 100644 index 0000000..1d11fa9 Binary files /dev/null and b/html-translated/vol1/Contributors_files/cleardot.gif differ diff --git a/html-translated/vol1/Contributors_files/element_main.js b/html-translated/vol1/Contributors_files/element_main.js new file mode 100644 index 0000000..4c5de3c --- /dev/null +++ b/html-translated/vol1/Contributors_files/element_main.js @@ -0,0 +1,486 @@ +(function(){/* + + Copyright The Closure Library Authors. + SPDX-License-Identifier: Apache-2.0 +*/ +var aa='" style="background-image:url(',ba="-disabled",ca="-document.getElementById('",da="/translate_a/t",ea="/translate_suggestion?client=",fa='
|
![]() |
|
This book is mainly a book that explains the technology related to graphics programming by Unity. Graphics programming is broad in a nutshell, and many books have been published that cover only Shader techniques. This book also contains articles on various topics that the authors are interested in, but the visual results should be easy to see and useful for creating your own effects. In addition, the source code explained in each chapter is available at https://github.com/IndieVisualLab/UnityGraphicsProgramming, so you can read this manual while executing it at hand.
+The difficulty level varies depending on the article, and depending on the amount of knowledge of the reader, some content may be unsatisfactory or too difficult. Depending on your knowledge, it's a good idea to read articles on the topic you are interested in. For those who usually do graphics programming at work, I hope it will lead to more effect drawers, and students are interested in visual coding, I have touched Processing and openFrameworks, but I still have 3DCG. For those who are feeling a high threshold, I would be happy if it would be an opportunity to introduce Unity and learn about the high expressiveness of 3DCG and the start of development.
+IndieVisualLab is a circle created by colleagues (& former colleagues) in the company. In-house, we use Unity to program the contents of exhibited works in the category generally called media art, and we are using Unity, which is a bit different from the game system. In this book, knowledge that is useful for using Unity in the exhibited works may be scattered.
+ +If you have any impressions, concerns, or other requests regarding this book (such as wanting to read the explanation about 〇〇), please feel free to use the Web form ( https://docs.google.com/forms/d/e/1FAIpQLSdxeansJvQGTWfZTBN_2RTuCK_kRqhA6QHTZKVXHCijQnC8zw/ Please let us know via viewform ) or email (lab.indievisual@gmail.com).
\ No newline at end of file diff --git a/html-translated/vol1/Preface_files/cleardot.gif b/html-translated/vol1/Preface_files/cleardot.gif new file mode 100644 index 0000000..1d11fa9 Binary files /dev/null and b/html-translated/vol1/Preface_files/cleardot.gif differ diff --git a/html-translated/vol1/Preface_files/element_main.js b/html-translated/vol1/Preface_files/element_main.js new file mode 100644 index 0000000..4c5de3c --- /dev/null +++ b/html-translated/vol1/Preface_files/element_main.js @@ -0,0 +1,486 @@ +(function(){/* + + Copyright The Closure Library Authors. + SPDX-License-Identifier: Apache-2.0 +*/ +var aa='" style="background-image:url(',ba="-disabled",ca="-document.getElementById('",da="/translate_a/t",ea="/translate_suggestion?client=",fa='
|
![]() |
|
In this chapter, we will develop GPU Voxelizer, a program that uses GPU to make voxels of meshes in real time.
+The sample in this chapter is "RealTime GPUBasedVoxelizer" from
https://github.com/IndieVisualLab/UnityGraphicsProgramming2
.
First, after confirming the voxelization procedure and the obtained results based on the implementation on the CPU, we will explain the implementation method on the GPU and introduce an example of effects that apply high-speed voxelization.
+ +A voxel represents a basic unit in a three-dimensional reciprocal lattice space. It can be imagined as an increase in the dimension of the pixel (Pixel) used as the basic unit of the two-dimensional normal lattice space, and it is named Voxel in the sense of Pixel with Volume. Voxels can express volume, and each voxel may have a data format that stores values such as concentration, which may be used for visualization and analysis of medical and scientific data.
+Also, in the game, Minecraft * 1 is listed as using voxels.
+It takes time to create a detailed model and stage, but if it is a voxel model, it can be created with relatively little effort, and even if it is free, there are excellent editors such as MagicaVoxel * 2, and the model looks like a 3D pixel art. Can be created.
+[*1] https://minecraft.net
[*2] http://ephtracy.github.io/
I will explain the voxelization algorithm based on the implementation on the CPU. The CPU implementation is described in CPUVoxelizer.cs.
+ +The general flow of voxelization is as follows.
+Voxelization in CPU is a static function of CPUVoxelizer class
+CPUVoxelizer.cs
+public class CPUVoxelizer
+{
+ public static void Voxelize (
+ Mesh mesh,
+ int resolution,
+ out List<Vector3> voxels,
+ out float unit,
+ bool surfaceOnly = false
+ ) {
+ ...
+ }
+ ...
+}
+
+Execute by calling. If you specify the mesh and resolution you want to voxel in the argument and execute it, the voxel array voxels and the unit representing the size of one voxel are returned via the reference argument.
+In the following, I will explain what is done inside the Voxelize function along the general flow.
+ +To make voxels, first set the voxel resolution. The finer the resolution, the smaller the cube will be built, so a detailed voxel model can be generated, but it requires more calculation time.
+
++Figure 1.1: Differences in voxel resolution +
+Specifies the range to voxelize the target mesh model. If you specify the BoundingBox (the smallest rectangular parallelepiped that fits all the vertices of the model) of the mesh model as the voxelization range, you can voxelize the entire mesh model.
+
++Figure 1.2: Mesh BoundingBox +
+It should be noted here that if the BoundingBox of the mesh model is used as it is as the voxelization range, problems will occur when voxelizing a mesh that has a surface that exactly overlaps the BoundingBox, such as a Cube mesh. ..
+As will be described in detail later, when voxels are created, the intersection of the triangles and voxels that make up the mesh is determined. However, if the triangles and voxel surfaces overlap exactly, the intersection may not be determined correctly.
+Therefore, the range in which the BoundingBox of the mesh model is expanded by "half the length of the unit length that constitutes one voxel" is specified as the voxelization range.
+CPUVoxelizer.cs
+mesh.RecalculateBounds(); +var bounds = mesh.bounds; + +// Calculate the unit lengths that make up one voxel from the specified resolution +float maxLength = Mathf.Max( + bounds.size.x, + Mathf.Max(bounds.size.y, bounds.size.z) +); +unit = maxLength / resolution; + +// Half the unit length +var hunit = unit * 0.5f; + +// "Half the length of the unit length that makes up one voxel" Expanded range +// Scope of voxels + +// Minimum value of bounds to voxelize +var start = bounds.min - new Vector3 (unit, unit, unit); + +// Maximum value of bounds to voxelize +var end = bounds.max + new Vector3 (unit, unit, unit); + +// Size of bounds to voxelize +var size = end - start; ++
The sample code provides a Voxel_t structure as a structure that represents voxels.
+Voxel.cs
+[StructLayout(LayoutKind.Sequential)]
+public struct Voxel_t {
+ public Vector3 position; // Voxel position
+ public uint fill; // Flag whether voxels should be filled
+ public uint front; // Flag whether the triangle that intersects the voxel is the front when viewed from the determined direction
+ ...
+}
+
+A three-dimensional array of this Voxel_t is generated, and voxel data is stored in it.
+CPUVoxelizer.cs
+// Determine the size of 3D voxel data based on the unit length of voxels and the range of voxelization +var width = Mathf.CeilToInt(size.x / unit); +var height = Mathf.CeilToInt(size.y / unit); +var depth = Mathf.CeilToInt(size.z / unit); +var volume = new Voxel_t[width, height, depth]; ++
Also, in order to refer to the position and size of each voxel in the subsequent processing, generate an AABB array that matches the 3D voxel data in advance.
+CPUVoxelizer.cs
+var boxes = new Bounds[width, height, depth];
+var voxelUnitSize = Vector3.one * unit;
+for(int x = 0; x < width; x++)
+{
+ for(int y = 0; y < height; y++)
+ {
+ for(int z = 0; z < depth; z++)
+ {
+ var p = new Vector3(x, y, z) * unit + start;
+ var aabb = new Bounds(p, voxelUnitSize);
+ boxes[x, y, z] = aabb;
+ }
+ }
+}
+
+AABB (Axis-Aligned Bounding Box) is a rectangular parallelepiped boundary figure whose sides are parallel to the XYZ axes in 3D space.
+AABB is often used for collision detection, and in some cases it is used for collision detection between two meshes or for simple collision detection between a certain mesh and a light beam.
+If you want to make a strict collision detection for a mesh, you have to make a judgment for all the triangles that make up the mesh, but if you only use AABB including the mesh, you can calculate at high speed, which is convenient.
+
++Figure 1.3: Collision detection between AABBs of two polygonal objects +
+Generate voxels located on the surface of the mesh as shown in the figure below.
+
++Figure 1.4: First, generate voxels located on the surface of the mesh, and then generate voxels based on it so as to fill the contents of the mesh. +
+To find the voxels located on the surface of the mesh, it is necessary to determine the intersection of each of the triangles that make up the mesh and the voxels.
+ +SAT (Separating Axis Theorem) is used to determine the intersection of a triangle and a voxel. The intersection determination algorithm using SAT is not limited to triangles and voxels, but can be used for general purposes as intersection determination between convex surfaces.
+The SAT has proved that:
+If you want a straight line with the entire object A on one side and the entire object B on the other, object A and object B will not intersect. Such a straight line that separates two objects is called a separation straight line, and the separation line is always orthogonal to the separation axis.
+If the SAT finds an axis (separation axis) where the projections of two convex surfaces do not overlap, it can be derived that the two convex surfaces do not intersect because there is a straight line that separates the two convex surfaces. Conversely, if no separation axis is found, it can be determined that the two convex surfaces intersect. (If the shape is concave, it may not intersect even if the separation axis is not found.)
+When a convex shape is projected onto an axis, the shadow of that shape appears as if it were projected onto a line that represents that axis. This can be represented as a line segment on the axis and can be represented by the range interval [min, max].
+
++Figure 1.5: Convex shape projected onto a certain axis and the range of the convex shape projected on the axis (min, max) +
+As shown in the figure below, when there are two convex separation straight lines, the projection sections of the convex shape with respect to the separation axis orthogonal to the straight lines do not overlap.
+
++Figure 1.6: If there is a straight line that separates the two convex shapes, the projection sections on the axes orthogonal to the straight line do not overlap. +
+However, even with the same two convex surfaces, projections on other non-separable axes may overlap, as shown in the figure below.
+
++Figure 1.7: When projecting on an axis orthogonal to a straight line that does not separate the two convex shapes, the projections may overlap. +
+For some shapes, the possible axes of separation are obvious, and to determine the intersection of two such shapes A and B, two for each of the possible axes of separation. Crossing can be determined by projecting the shape and checking whether the two projection sections [Amin, Amax] and [Bmin, Bmax] overlap each other. Expressed in the formula, if Amax <Bmin or Bmax <Amin, the two intervals do not overlap.
+The axis that can be the separation axis between the convex surfaces is
+From this, the axis that can be the separation axis between the triangle and the voxel (AABB) is
+Therefore, for each of these 13 axes, the intersection of the triangle and the voxel is determined by determining whether or not the projections overlap.
+Since it may be useless to judge the intersection of one triangle and all voxel data, calculate the AABB including the triangle and judge the intersection with the voxels contained in it. ..
+CPUVoxelizer.cs
+// Calculate the triangle AABB
+var min = tri.bounds.min - start;
+var max = tri.bounds.max - start;
+int iminX = Mathf.RoundToInt(min.x / unit);
+int iminY = Mathf.RoundToInt(min.y / unit);
+int iminZ = Mathf.RoundToInt(min.z / unit);
+int imaxX = Mathf.RoundToInt(max.x / unit);
+int imaxY = Mathf.RoundToInt(max.y / unit);
+int imaxZ = Mathf.RoundToInt(max.z / unit);
+iminX = Mathf.Clamp(iminX, 0, width - 1);
+iminY = Mathf.Clamp(iminY, 0, height - 1);
+iminZ = Mathf.Clamp(iminZ, 0, depth - 1);
+imaxX = Mathf.Clamp(imaxX, 0, width - 1);
+imaxY = Mathf.Clamp(imaxY, 0, height - 1);
+imaxZ = Mathf.Clamp(imaxZ, 0, depth - 1);
+
+// Judge the intersection with voxels in the triangular AABB
+for(int x = iminX; x <= imaxX; x++) {
+ for(int y = iminY; y <= imaxY; y++) {
+ for(int z = iminZ; z <= imaxZ; z++) {
+ if(Intersects(tri, boxes[x, y, z])) {
+ ...
+ }
+ }
+ }
+}
+
+The Intersects (Triangle, Bounds) function is used to determine the intersection of a triangle and a voxel.
+CPUVoxelizer.cs
+public static bool Intersects(Triangle tri, Bounds aabb)
+{
+ ...
+}
+
+In this function, the intersection judgment is performed for the above 13 axes, but the three normals of AABB are known (since they have sides along the XYZ axes, they are simply the X axis (1, 0, 0), Y-axis (0, 1, 0), Z-axis (0, 0, 1) normals), or so that the center of AABB is at the origin (0, 0, 0) The intersection judgment is optimized by translating the coordinates of the triangle and AABB.
+CPUVoxelizer.cs
+// Get the center coordinates of AABB and the extents of each side +Vector3 center = aabb.center, extents = aabb.max - center; + +// Translate the coordinates of the triangle so that the center of AABB is at the origin (0, 0, 0) +Vector3 v0 = tri.a - center, + v1 = tri.b - center, + v2 = tri.c - center; + +// Get the vector representing the three sides of the triangle +Vector3 f0 = v1 - v0, + f1 = v2 - v1, + f2 = v0 - v2; + ++
First, we will make an intersection judgment based on the nine cross products obtained from the combination of the three sides of the triangle and the three orthogonal sides of AABB, but the direction of the three sides of AABB will be the XYZ axes. You can take advantage of the parallelism and omit the calculation to get the cross product.
+CPUVoxelizer.cs
+// Since the sides of AABB are the direction vectors x (1, 0, 0), y (0, 1, 0), z (0, 0, 1), respectively,
+// You can get 9 different cross products without doing any calculations
+Vector3
+ a00 = new Vector3 (0, -f0.z, f0.y), // Cross product of X axis and f0
+ a01 = new Vector3 (0, -f1.z, f1.y), // X and f1
+ a02 = new Vector3(0, -f2.z, f2.y), // Xとf2
+ a10 = new Vector3(f0.z, 0, -f0.x), // Yとf0
+ a11 = new Vector3(f1.z, 0, -f1.x), // Yとf1
+ a12 = new Vector3(f2.z, 0, -f2.x), // Yとf2
+ a20 = new Vector3(-f0.y, f0.x, 0), // Zとf0
+ a21 = new Vector3(-f1.y, f1.x, 0), // Zとf1
+ a22 = new Vector3(-f2.y, f2.x, 0); // Zとf2
+
+// Perform intersection judgment for 9 axes (described later)
+// (If any one of the axes does not intersect, the triangle and AABB do not intersect, so false is returned)
+if (
+ !Intersects(v0, v1, v2, extents, a00) ||
+ !Intersects(v0, v1, v2, extents, a01) ||
+ !Intersects(v0, v1, v2, extents, a02) ||
+ !Intersects(v0, v1, v2, extents, a10) ||
+ !Intersects(v0, v1, v2, extents, a11) ||
+ !Intersects(v0, v1, v2, extents, a12) ||
+ !Intersects(v0, v1, v2, extents, a20) ||
+ !Intersects(v0, v1, v2, extents, a21) ||
+ !Intersects(v0, v1, v2, extents, a22)
+)
+{
+ return false;
+}
+
+
+The following function projects the triangle and AABB on these axes to determine the intersection.
+CPUVoxelizer.cs
+protected static bool Intersects(
+ Vector3 v0,
+ Vector3 v1,
+ Vector3 v2,
+ Vector3 extents,
+ Vector3 axis
+)
+{
+ ...
+}
+
+The point to note here is that the optimization is performed by bringing the center of AABB to the origin. It is not necessary to project all the vertices of AABB on the axis, and the interval on the axis can be obtained simply by projecting the vertex with the maximum value for the XYZ axes of AABB, that is, the extents of half the length of each side. I will.
+The value r obtained by projecting extents represents the interval [-r, r] on the projection axis of AABB, which means that the projection can be calculated only once for AABB.
+CPUVoxelizer.cs
+// Project the vertices of the triangle on the axis +float p0 = Vector3.Dot(v0, axis); +float p1 = Vector3.Dot(v1, axis); +float p2 = Vector3.Dot(v2, axis); + +// Project the extents with the maximum value for the XYZ axes of AABB on the axis to get the value r +// Since the section of AABB is [-r, r], it is not necessary to project all vertices for AABB. +float r = + extents.x * Mathf.Abs(axis.x) + + extents.y * Mathf.Abs(axis.y) + + extents.z * Mathf.Abs(axis.z); + +// Triangular projection section +float minP = Mathf.Min(p0, p1, p2); +float maxP = Mathf.Max(p0, p1, p2); + +// Determine if the triangular section and the AABB section overlap +return !((maxP < -r) || (r < minP)); + ++
Next to the discrimination based on the nine cross products, the discrimination is performed based on the three normals of AABB.
+Using the characteristic that the normal of AABB is parallel to the XYZ axes, the coordinate values are translated so as to bring the center of AABB to the origin, so the minimum value for the XYZ component of each vertex of the triangle is simply. Crossing judgment can be performed simply by comparing the maximum value and extends.
+CPUVoxelizer.cs
+// X axis
+if (
+ Mathf.Max(v0.x, v1.x, v2.x) < -extents.x ||
+ Mathf.Min(v0.x, v1.x, v2.x) > extents.x
+)
+{
+ return false;
+}
+
+// Y axis
+if (
+ Mathf.Max (v0.y, v1.y, v2.y) <-extents.y ||
+ Mathf.Min (v0.y, v1.y, v2.y)> extents.y
+)
+{
+ return false;
+}
+
+// Z axis
+if (
+ Mathf.Max(v0.z, v1.z, v2.z) < -extents.z ||
+ Mathf.Min(v0.z, v1.z, v2.z) > extents.z
+)
+{
+ return false;
+}
+
+Lastly, regarding the triangular normal, we are making a judgment about the intersection of the Plane with the triangular normal and AABB.
+CPUVoxelizer.cs
+var normal = Vector3.Cross(f1, f0).normalized; +var pl = new Plane(normal, Vector3.Dot(normal, tri.a)); +return Intersects(pl, aabb); ++
The Intersects (Plane, Bounds) function determines the intersection of Plane and AABB.
+CPUVoxelizer.cs
+public static bool Intersects(Plane pl, Bounds aabb)
+{
+ Vector3 center = aabb.center;
+ var extents = aabb.max - center;
+
+ // Project the extents on the normal of Plane
+ var r =
+ extents.x * Mathf.Abs(pl.normal.x) +
+ extents.y * Mathf.Abs (pl.normal.y) +
+ extents.z * Mathf.Abs(pl.normal.z);
+
+ // Calculate the distance between Plane and the center of AABB
+ var s = Vector3.Dot(pl.normal, center) - pl.distance;
+
+ // Determine if s is in the range [-r, r]
+ return Mathf.Abs(s) <= r;
+}
+
+If the intersecting voxels can be determined for one triangle, the fill flag of the voxel data is set, and the front flag is set to indicate whether the triangle is front or back when viewed from the determined direction. (The front flag will be described later)
+Some voxels may intersect both front-facing and back-facing triangles, in which case the front flag should prioritize the back.
+CPUVoxelizer.cs
+if(Intersects(tri, boxes[x, y, z])) {
+ // Get voxels at intersecting (x, y, z)
+ var voxel = volume[x, y, z];
+
+ // Set the voxel position
+ voxel.position = boxes[x, y, z].center;
+
+ if(voxel.fill & 1 == 0) {
+ // If the voxels are not yet filled
+ // Flag the triangle that intersects the voxel for the front
+ voxel.front = front;
+ } else {
+ // If the voxel is already filled with other triangles
+ // Give priority to the flag on the back
+ voxel.front = voxel.front & front;
+ }
+
+ // Flag to fill voxels
+ voxel.fill = 1;
+ volume[x, y, z] = voxel;
+}
+
+The front flag is required for the "process to fill the contents of the mesh" described later, and sets whether it is the front or the back when viewed from the "direction to fill the contents".
+In the sample code, the contents of the mesh are filled in the forward (0, 0, 1) direction, so it is determined whether the triangle is in front when viewed from forward (0, 0, 1).
+If the inner product of the normal of the triangle and the direction to fill the voxels is 0 or less, it means that the triangle is the front when viewed from that direction.
+CPUVoxelizer.cs
+public class Triangle {
+ public Vector3 a, b, c; // 3 points that make up a triangle
+ public bool frontFacing; // Flag whether the triangle is a surface when viewed from the direction of filling the voxels
+ public Bounds bounds; // Triangular AABB
+
+ public Triangle (Vector3 a, Vector3 b, Vector3 c, Vector3 dir) {
+ this.a = a;
+ this.b = b;
+ this.c = c;
+
+ // Determine if the triangle is front when viewed from the direction of filling the voxels
+ var normal = Vector3.Cross(b - a, c - a);
+ this.frontFacing = (Vector3.Dot(normal, dir) <= 0f);
+
+ ...
+ }
+}
+
+Now that we have calculated the voxel data located on the mesh surface, we will fill in the inside.
+
++Figure 1.8: State after generating voxel data located on the mesh surface +
+Search for voxels that are facing forward when viewed from the direction of filling the voxels.
+Empty voxels will pass through as shown in the figure below.
+
++Figure 1.9: Search for voxels facing forward when viewed from the voxel-filling direction Empty voxels pass through (arrows fill voxels and frames represent voxel positions being searched) +
+Once you find a voxel that is facing the front, proceed through the voxel that is facing the front.
+
++Figure 1.10: Finding a voxel facing the front (the line coming out of the mesh surface is the mesh normal, and in the figure the mesh normal and the voxel filling direction are opposite, so the position of the frame You can see that the voxel is located in the front) +
+
++Figure 1.11: Go through a voxel facing the front +
+After passing through the voxels facing the front, you will reach the inside of the mesh.
+
++Figure 1.12: Passing through a voxel facing the front and reaching the inside of the mesh +
+Proceed through the inside of the mesh and fill the voxels that have arrived.
+
++Figure 1.13: Fill the reached voxels as they fill the inside of the mesh +
+Then, when you reach the voxel facing the back when viewed from the direction of filling the voxel, you can see that the inside of the mesh has been filled. Go through the voxels facing the back, and when you reach the outside of the mesh, you will start searching for the voxels facing the front again.
+
++Figure 1.14: Proceed through the voxel facing backwards from the direction of filling the voxel and then out of the mesh +
+As determined in the previous section, the inside is filled in the forward (0, 0, 1) direction, so in a 3D voxel array, the inside is filled in the z direction.
+The process of filling the contents starts from volume [x, y, 0] on the front side in the z direction and proceeds to volume [x, y, depth -1].
+CPUVoxelizer.cs
+// Fill the inside of the mesh
+for(int x = 0; x < width; x++)
+{
+ for(int y = 0; y < height; y++)
+ {
+ // Fill the inside of the mesh from the front side in the z direction to the back side
+ for(int z = 0; z < depth; z++)
+ {
+ ...
+ }
+ }
+}
+
+Based on the front flag (front or back in the z direction) already written in the voxel data, the process proceeds according to the above-mentioned flow of filling voxels.
+CPUVoxelizer.cs
+...
+// Fill the inside of the mesh from the front side in the z direction to the back side
+for(int z = 0; z < depth; z++)
+{
+ // Ignore if (x, y, z) is empty
+ if (volume[x, y, z].IsEmpty()) continue;
+
+ // Go through the voxels located in front
+ int ifront = z;
+ for(; ifront < depth && volume[x, y, ifront].IsFrontFace(); ifront++) {}
+
+ // If you go to the end, it's over
+ if(ifront >= depth) break;
+
+ // Find the voxels located on the back
+ int iback = ifront;
+
+ // Go inside the mesh
+ for (; iback < depth && volume[x, y, iback].IsEmpty(); iback++) {}
+
+ // If you go to the end, it's over
+ if (iback >= depth) break;
+
+ // Determine if (x, y, iback) is on the back
+ if(volume[x, y, iback].IsBackFace()) {
+ // Follow the voxels located on the back
+ for (; iback < depth && volume[x, y, iback].IsBackFace(); iback++) {}
+ }
+
+ // Fill voxels from (x, y, ifront) to (x, y, iback)
+ for(int z2 = ifront; z2 < iback; z2++)
+ {
+ var p = boxes [x, y, z2] .center;
+ var voxel = volume[x, y, z2];
+ voxel.position = p;
+ voxel.fill = 1;
+ volume[x, y, z2] = voxel;
+ }
+
+ // Proceed through the loop until it finishes processing (x, y, iback)
+ z = iback;
+}
+
+Up to this point, we have obtained voxel data filled with the contents of the mesh.
+Since the processed 3D voxel data contains empty voxels, CPUVoxelizer.Voxelize returns only the voxels that make up the surface of the mesh and the filled contents.
+CPUVoxelizer.cs
+// Get non-empty voxels
+voxels = new List<Voxel_t>();
+for(int x = 0; x < width; x++) {
+ for(int y = 0; y < height; y++) {
+ for(int z = 0; z < depth; z++) {
+ if(!volume[x, y, z].IsEmpty())
+ {
+ voxels.Add(volume[x, y, z]);
+ }
+ }
+ }
+}
+
+In CPUVoxelizerTest.cs, a mesh is constructed using the voxel data obtained by CPUVoxelizer, and the voxels are visualized.
+
++Figure 1.15: Demo of voxel data obtained by CPUVoxelizer.Voxelize visualized as a mesh (CPUVoxelizerTest.scene) +
+The VoxelMesh class describes the process of constructing a mesh based on the voxel data array Voxel_t [] and the unit length information of one voxel.
+CPUVoxelizerTest.cs in the previous section uses this class to generate voxel mesh.
+VoxelMesh.cs
+public class VoxelMesh {
+
+ public static Mesh Build (Voxel_t[] voxels, float size)
+ {
+ var hsize = size * 0.5f;
+ var forward = Vector3.forward * hsize;
+ var back = -forward;
+ var up = Vector3.up * hsize;
+ var down = -up;
+ var right = Vector3.right * hsize;
+ var left = -right;
+
+ var vertices = new List<Vector3>();
+ var normals = new List<Vector3>();
+ var triangles = new List<int>();
+
+ for(int i = 0, n = voxels.Length; i < n; i++)
+ {
+ if(voxel[i].fill == 0) continue;
+
+ var p = voxels[i].position;
+
+ // The vertices of the eight corners that make up the Cube that represents one voxel
+ var corners = new Vector3[8] {
+ p + forward + left + up,
+ p + back + left + up,
+ p + back + right + up,
+ p + forward + right + up,
+
+ p + forward + left + down,
+ p + back + left + down,
+ p + back + right + down,
+ p + forward + right + down,
+ };
+
+ // Build the 6 faces that make up the Cube
+
+ // up
+ AddTriangle(
+ corners[0], corners[3], corners[1],
+ up, vertices, normals, triangles
+ );
+ AddTriangle(
+ corners[2], corners[1], corners[3],
+ up, vertices, normals, triangles
+ );
+
+ // down
+ AddTriangle(
+ corners[4], corners[5], corners[7],
+ down, vertices, normals, triangles
+ );
+ AddTriangle(
+ corners[6], corners[7], corners[5],
+ down, vertices, normals, triangles
+ );
+
+ // right
+ AddTriangle(
+ corners[7], corners[6], corners[3],
+ right, vertices, normals, triangles
+ );
+ AddTriangle(
+ corners[2], corners[3], corners[6],
+ right, vertices, normals, triangles
+ );
+
+ // left
+ AddTriangle(
+ corners[5], corners[4], corners[1],
+ left, vertices, normals, triangles
+ );
+ AddTriangle(
+ corners[0], corners[1], corners[4],
+ left, vertices, normals, triangles
+ );
+
+ // forward
+ AddTriangle(
+ corners[4], corners[7], corners[0],
+ forward, vertices, normals, triangles
+ );
+ AddTriangle(
+ corners[3], corners[0], corners[7],
+ forward, vertices, normals, triangles
+ );
+
+ // back
+ AddTriangle(
+ corners[6], corners[5], corners[2],
+ forward, vertices, normals, triangles
+ );
+ AddTriangle(
+ corners[1], corners[2], corners[5],
+ forward, vertices, normals, triangles
+ );
+ }
+
+ var mesh = new Mesh ();
+ mesh.SetVertices (vertices);
+
+ // Apply 32bit index format if the number of vertices exceeds the number that can be supported by 16bit
+ mesh.indexFormat =
+ (vertices.Count <= 65535)
+ ? IndexFormat.UInt16 : IndexFormat.UInt32;
+ mesh.SetNormals(normals);
+ mesh.SetIndices(triangles.ToArray(), MeshTopology.Triangles, 0);
+ mesh.RecalculateBounds();
+ return mesh;
+ }
+}
+
+From here, I will explain how to execute voxelization implemented by CPU Voxelizer faster using GPU.
+The voxelization algorithm implemented by CPUVoxelizer can be parallelized for each coordinate in the lattice space separated by the unit length of voxels on the XY plane.
+
++Figure 1.16: Lattice space delimited by unit length of voxels on the XY plane. Voxelization can be parallelized for each lattice, so GPU implementation is possible. +
+If you allocate each process that can be parallelized to the GPU thread, you can execute the process at high speed thanks to the high-speed parallel computing of the GPU.
+The implementation of voxelization on GPU is described in GPU Boxelizer.cs and Voxelizer.compute.
+(The basics of Compute Shader, which appears in this section and is indispensable for GPGPU programming in Unity, are explained in Unity Graphics Programming vol.1 "Introduction to Compute Shader")
+Voxelization on GPU is a static function of GPUVoxelizer class
+GPUVoxelizer.cs
+public class GPUVoxelizer
+{
+ public static GPUVoxelData Voxelize (
+ ComputeShader voxelizer,
+ Mesh mesh,
+ int resolution
+ ) {
+ ...
+ }
+}
+
+Execute by calling. When executed with Voxelizer.compute, the mesh to be voxelized, and the resolution specified as arguments, GPUVoxelData indicating voxel data is returned.
+ +Follow the general flow of voxelization (1) to (3) to set up the data required for voxel generation.
+GPUVoxelizer.cs
+public static GPUVoxelData Voxelize (
+ ComputeShader voxelizer,
+ Mesh mesh,
+ int resolution
+) {
+ // Same process as CPUVoxelizer.Voxelize -------
+ mesh.RecalculateBounds();
+ var bounds = mesh.bounds;
+
+ float maxLength = Mathf.Max(
+ bounds.size.x,
+ Mathf.Max(bounds.size.y, bounds.size.z)
+ );
+ var unit = maxLength / resolution;
+
+ var hunit = unit * 0.5f;
+
+ var start = bounds.min - new Vector3 (unit, unit, unit);
+ var end = bounds.max + new Vector3 (unit, unit, unit);
+ var size = end - start;
+
+ int width = Mathf.CeilToInt(size.x / unit);
+ int height = Mathf.CeilToInt(size.y / unit);
+ int depth = Mathf.CeilToInt(size.z / unit);
+ // ------- So far the same as CPUVoxelizer.Voxelize
+ ...
+}
+
+The array of Voxel_t is defined as ComputeBuffer so that it can be handled on the GPU. The point to note here is that the Voxel_t array generated as a 3D array is defined as a 1D array in the CPU implementation.
+This is because it is difficult for GPUs to handle multidimensional arrays, so define it as a one-dimensional array and get the index on the one-dimensional array from the three-dimensional position (x, y, z) in Compute Shader. By doing so, the one-dimensional array is processed like a three-dimensional array.
+GPUVoxelizer.cs
+// Generate a ComputeBuffer representing a Voxel_t array +var voxelBuffer = new ComputeBuffer( + width * height * depth, + Marshal.SizeOf(typeof(Voxel_t)) +); +var voxels = new Voxel_t[voxelBuffer.count]; +voxelBuffer.SetData (voxels); // Initialize ++
Transfer these set up data to the GPU side.
+GPUVoxelizer.cs
+// Transfer voxel data to GPU side
+voxelizer.SetVector("_Start", start);
+voxelizer.SetVector("_End", end);
+voxelizer.SetVector("_Size", size);
+
+voxelizer.SetFloat("_Unit", unit);
+voxelizer.SetFloat("_InvUnit", 1f / unit);
+voxelizer.SetFloat ("_ HalfUnit", unit);
+voxelizer.SetInt("_Width", width);
+voxelizer.SetInt("_Height", height);
+voxelizer.SetInt("_Depth", depth);
+
+Generate a Compute Buffer that represents the mesh in order to determine the intersection of the triangles and voxels that make up the mesh.
+GPUVoxelizer.cs
+// Generate a ComputeBuffer that represents the vertex array of the mesh +var vertices = mesh.vertices; +var vertBuffer = new ComputeBuffer( + vertices.Length, + Marshal.SizeOf(typeof(Vector3)) +); +vertBuffer.SetData(vertices); + +// Generate a ComputeBuffer that represents a triangular array of meshes +var triangles = mesh.triangles; +var triBuffer = new ComputeBuffer( + triangles.Length, + Marshal.SizeOf(typeof(int)) +); +triBuffer.SetData(triangles); ++
In the process of generating voxels located on the surface of the mesh on the GPU, after generating voxels that intersect the front-facing triangle, the voxels that intersect the back-facing triangle are generated.
+This is because the value of the front flag written to the voxel may not be uniquely determined when multiple triangles intersect for the voxel at the same position.
+One thing to keep in mind when using GPU parallel computing is the indefiniteness of the results due to multiple threads accessing the same data at the same time.
+In the process of generating this surface, priority is given to the value of the front flag being the back (false), and voxel generation is executed in the order of front → back to prevent indeterminacy of the result.
+Transfer the mesh data you just generated to the GPU kernel SurfaceFront, which creates voxels that intersect the front-facing triangles.
+GPUVoxelizer.cs
+// Transfer mesh data to GPU kernel SurfaceFront
+var surfaceFrontKer = new Kernel(voxelizer, "SurfaceFront");
+voxelizer.SetBuffer(surfaceFrontKer.Index, "_VoxelBuffer", voxelBuffer);
+voxelizer.SetBuffer(surfaceFrontKer.Index, "_VertBuffer", vertBuffer);
+voxelizer.SetBuffer(surfaceFrontKer.Index, "_TriBuffer", triBuffer);
+
+// Set the number of triangles that make up the mesh
+var triangleCount = triBuffer.count / 3; // (the number of vertex indexes that make up the triangle / 3) is the number of triangles
+voxelizer.SetInt("_TriangleCount", triangleCount);
+
+This process is performed in parallel for each triangle that makes up the mesh. Set the kernel thread group to (triangleCount / number of kernel threads + 1, 1, 1) so that all triangles are processed and run the kernel.
+GPUVoxelizer.cs
+// Build a voxel that intersects a front-facing triangle +voxelizer.Dispatch( + surfaceFrontKer.Index, + triangleCount / (int)surfaceFrontKer.ThreadX + 1, + (int)surfaceFrontKer.ThreadY, + (int)surfaceFrontKer.ThreadZ +); ++
Since the SurfaceFront kernel only processes triangles that are facing the front, it checks the front and back of the triangle, returns to finish the process if it is the back, and builds the mesh surface if it is the front. Is running.
+Voxelizer.compute
+[numthreads(8, 1, 1)]
+void SurfaceFront (uint3 id : SV_DispatchThreadID)
+{
+ // return if the number of triangles is exceeded
+ int idx = (int)id.x;
+ if(idx >= _TriangleCount) return;
+
+ // Get the vertex position of the triangle and the front and back flags
+ float3 va, vb, vc;
+ bool front;
+ get_triangle(idx, va, vb, vc, front);
+
+ // return if it is on the back
+ if (!front) return;
+
+ // Build a mesh surface
+ surface(va, vb, vc, front);
+}
+
+The get_triangle function gets the vertex position and front / back flag of the triangle based on the mesh data (_TriBuffer representing the vertex index that constitutes the triangle and _VertBuffer representing the vertex) passed from the CPU to the GPU side.
+Voxelizer.compute
+void get_triangle(
+ int idx,
+ out float3 va, out float3 vb, out float3 vc,
+ out bool front
+)
+{
+ int ia = _TriBuffer[idx * 3];
+ int ib = _TriBuffer[idx * 3 + 1];
+ int ic = _TriBuffer[idx * 3 + 2];
+
+ va = _VertBuffer [ia];
+ vb = _VertBuffer[ib];
+ vc = _VertBuffer[ic];
+
+ // Determine if the triangle is front or back when viewed from the forward (0, 0, 1) direction
+ float3 normal = cross ((vb - va), (vc - vb));
+ front = dot(normal, float3(0, 0, 1)) < 0;
+}
+
+The surface function that determines the intersection of a voxel and a triangle and writes the result to the voxel data takes time to acquire the index of the voxel data generated as a one-dimensional array, but the content of the process is implemented on the CPU Voxelizer. It will be almost the same as.
+Voxelizer.compute
+void surface (float3 va, float3 vb, float3 vc, bool front)
+{
+ // Calculate the triangle AABB
+ float3 tbmin = min (min (va, vb), vc);
+ float3 tbmax = max(max(va, vb), vc);
+
+ float3 bmin = tbmin - _Start;
+ float3 bmax = tbmax - _Start;
+ int iminX = round(bmin.x / _Unit);
+ int iminY = round(bmin.y / _Unit);
+ int iminZ = round(bmin.z / _Unit);
+ int imaxX = round(bmax.x / _Unit);
+ int imaxY = round(bmax.y / _Unit);
+ int imaxZ = round(bmax.z / _Unit);
+ iminX = clamp(iminX, 0, _Width - 1);
+ iminY = clamp(iminY, 0, _Height - 1);
+ iminZ = clamp(iminZ, 0, _Depth - 1);
+ imaxX = clamp(imaxX, 0, _Width - 1);
+ imaxY = clamp(imaxY, 0, _Height - 1);
+ imaxZ = clamp(imaxZ, 0, _Depth - 1);
+
+ // Judge the intersection with voxels in the triangular AABB
+ for(int x = iminX; x <= imaxX; x++) {
+ for(int y = iminY; y <= imaxY; y++) {
+ for(int z = iminZ; z <= imaxZ; z++) {
+ // Generate AABB for voxels located at (x, y, z)
+ float3 center = float3(x, y, z) * _Unit + _Start;
+ AABB aabb;
+ aabb.min = center - _HalfUnit;
+ aabb.center = center;
+ aabb.max = center + _HalfUnit;
+ if(intersects_tri_aabb(va, vb, vc, aabb))
+ {
+ // Get the index of a one-dimensional voxel array from the position of (x, y, z)
+ uint vid = get_voxel_index(x, y, z);
+ Voxel voxel = _VoxelBuffer[vid];
+ voxel.position = get_voxel_position(x, y, z);
+ voxel.front = front;
+ voxel.fill = true;
+ _VoxelBuffer[vid] = voxel;
+ }
+ }
+ }
+ }
+}
+
+Now that we have generated voxels for the front-facing triangles, let's move on to the back-facing triangles.
+Transfer the mesh data to the GPU kernel SurfaceBack, which generates voxels that intersect the triangle facing the back, and execute it as before.
+GPUVoxelizer.cs
+var surfaceBackKer = new Kernel(voxelizer, "SurfaceBack"); +voxelizer.SetBuffer(surfaceBackKer.Index, "_VoxelBuffer", voxelBuffer); +voxelizer.SetBuffer(surfaceBackKer.Index, "_VertBuffer", vertBuffer); +voxelizer.SetBuffer(surfaceBackKer.Index, "_TriBuffer", triBuffer); +voxelizer.Dispatch( + surfaceBackKer.Index, + triangleCount / (int)surfaceBackKer.ThreadX + 1, + (int)surfaceBackKer.ThreadY, + (int)surfaceBackKer.ThreadZ +); ++
The processing of SurfaceBack is the same as SurfaceFront except that it returns a return when the triangle is facing the front. By running SurfaceBack after SurfaceFront, the voxel's front flag will be overridden by SurfaceBack, even if there are voxels that intersect both the front-facing triangle and the back-facing triangle. It will be prioritized to face the back.
+Voxelizer.compute
+[numthreads(8, 1, 1)]
+void SurfaceBack (uint3 id : SV_DispatchThreadID)
+{
+ int idx = (int)id.x;
+ if(idx >= _TriangleCount) return;
+
+ float3 va, vb, vc;
+ bool front;
+ get_triangle(idx, va, vb, vc, front);
+
+ // return if front
+ if (front) return;
+
+ surface(va, vb, vc, front);
+}
+
+The volume kernel is used to fill the inside of the mesh.
+The Volume kernel prepares and executes a thread for each coordinate in the lattice space separated by the unit length of voxels on the XY plane. In other words, in the case of CPU implementation, the place where the double loop was executed for XY coordinates is parallelized by GPU and speeded up.
+GPUVoxelizer.cs
+// Transfer voxel data to Volume kernel +var volumeKer = new Kernel(voxelizer, "Volume"); +voxelizer.SetBuffer(volumeKer.Index, "_VoxelBuffer", voxelBuffer); + +// Fill the inside of the mesh +voxelizer.Dispatch( + volumeKer.Index, + width / (int)volumeKer.ThreadX + 1, + height / (int)volumeKer.ThreadY + 1, + (int)volumeKer.ThreadZ +); ++
The Volume kernel implementation is similar to the one implemented in GPU Foxelizer.
+Voxelizer.compute
+[numthreads(8, 8, 1)]
+void Volume (uint3 id : SV_DispatchThreadID)
+{
+ int x = (int)id.x;
+ int y = (int)id.y;
+ if(x >= _Width) return;
+ if(y >= _Height) return;
+
+ for (int z = 0; z < _Depth; z++)
+ {
+ Voxel voxel = _VoxelBuffer[get_voxel_index(x, y, z)];
+ // Almost the same processing as in CPUVoxelizer.Voxelize continues
+ ...
+ }
+}
+
+Once the voxel data is obtained in this way, it discards the mesh data that is no longer needed and generates GPUVoxel Data with the data needed to create the voxel visual representation.
+GPUVoxelizer.cs
+// Discard mesh data that is no longer needed +vertBuffer.Release(); +triBuffer.Release(); + +return new GPUVoxelData(voxelBuffer, width, height, depth, unit); ++
This completes voxelization by GPU implementation. Voxel data is actually visualized using GPU VoxelData in GPUVoxelizerTest.cs.
+ +In the test scene, Voxelizer is executed at the time of Play, so it is difficult to understand the speed difference between CPU implementation and GPU implementation, but GPU implementation has achieved considerable speedup.
+Performance depends greatly on the execution environment, the number of polygons in the mesh to be voxelized, and the resolution of voxelization.
+Under these conditions, the GPU implementation is running 50 times faster than the CPU implementation.
+ +Introducing an application example (GPUVoxelParticleSystem) using the GPU-implemented ParticleSystem.
+The GPU Voxel Particle System uses the Compute Buffer, which represents the voxel data obtained from the GPU Voxelizer, to calculate the position of particles in the Compute Shader.
+I am creating an effect in the flow.
+
++Figure 1.17: Application example using ParticleSystem of GPU implementation (GPUVoxelParticleSystem) +
+By making a large number of particles appear from the voxel position, a visual like an animation model composed of particles is realized.
+Voxels can be applied to the animation model frame by frame only because of the speedup by implementing the GPU, and in order to expand the range of visual expressions that can be used in real time, such speedup on the GPU is indispensable. It has become a thing.
+ +In this chapter, we introduced the algorithm for voxelizing the mesh model using CPU implementation as an example, and even speeded up voxelization by GPU implementation.
+We took the approach of generating voxels using the intersection judgment of triangles and voxels, but there is also a method of constructing voxel data by rendering the model from the XYZ directions into a 3D texture by parallel projection.
+The method introduced in this chapter has a problem in how to apply a texture to the voxelized model, but if the method renders the model to a 3D texture, coloring the voxels is easier and more accurate. You may be able to do it.
+ +
|
![]() |
|
This chapter describes the theory and implementation of Screen Space Reflection as an application of ImageEffect. When constructing a three-dimensional space, reflections and reflections are useful for expressing reality along with shadows. However, despite the simplicity of the phenomena we see in our daily lives, reflections and reflections are enormous calculations when trying to faithfully reproduce physical phenomena using ray tracing (described later) in the world of 3DCG. It is also an expression that requires quantity. Recently, Octan Renderer has become available in Unity, and when producing as a video work, it has become possible to produce quite photorealistic effects in Unity, but in real-time rendering it is still necessary to devise a pseudo reproduction. There is.
+There are several techniques for expressing reflections with real-time rendering, but in this chapter we will introduce a technique called Screen Space Reflection (SSR) that belongs to the post-effects.
+As for the structure of this chapter, we will first explain the blur processing used in the sample program in advance as a shoulder break-in for post effects. After that, I will explain SSR while breaking it down into the smallest possible processing units.
+In addition, the sample of this chapter is in "SSR" of
https://github.com/IndieVisualLab/UnityGraphicsProgramming2
.
In this section, we will explain the blur processing. If you include anti-aliasing, you need to understand the procedure that blurring is very complicated, but this time it is a basic process because it is a shoulder break-in. The basis of blur processing is to homogenize the color of texels by multiplying each texel (pixels after rasterization * 4 ) of the image to be processed by a matrix that refers to the texels around it. I will continue. The matrix that references the texels around this is called the kernel. The kernel is a matrix that determines the proportion of texel colors mixed.
+Gaussian blur is the most commonly used blur treatment. As the name implies, this refers to the process of using a Gaussian distribution in the kernel. Read the Gaussian Blur implementation diagonally to get a feel for how it works in post-effects.
+The Gaussian kernel mixes the brightness around the pixel to be processed at a rate that follows a Gaussian distribution. By doing this, it is possible to suppress the blurring of the contour part where the brightness changes non-linearly.
+As a review of mathematics, the Gaussian distribution can be expressed by the following formula.
+G\left( x\right) =\dfrac {1}{\sqrt {2 \pi \sigma ^{2}}}\exp \left( -\dfrac {x^{2}}{2\sigma ^{2}}\right)
+
+Since the Gaussian distribution can be approximated to the binomial distribution here, the Gaussian distribution can be substituted by the combination of weighting according to the binomial distribution as shown below (see footnote * 2 for the approximation of the Gaussian and binomial distributions ).
+GaussianBlur.shader
+float4 x_blur (v2f i) : SV_Target
+{
+ float weight [5] = { 0.2270270, 0.1945945, 0.1216216, 0.0540540, 0.0162162 };
+ float offset [5] = { 0.0, 1.0, 2.0, 3.0, 4.0 };
+ float2 size = _MainTex_TexelSize;
+ fixed4 col = tex2D(_MainTex, i.uv) * weight[0];
+ for(int j=1; j<5; j++)
+ {
+ col += tex2D(_MainTex, i.uv + float2(offset[j], 0) * size) * weight[j];
+ col += tex2D(_MainTex, i.uv - float2(offset[j], 0) * size) * weight[j];
+ }
+ return col;
+}
+
+The above code is only in the x direction, but the processing is almost the same in the y direction. Here, the blur in the x and y directions is divided into two directions, and the number of brightness acquisitions is reduced from n * n = n ^ 2 times to n * 2 + 1 = 2n + 1 times. Because you can.
+
++Figure 10.1: Confirmation that Blur composition in each direction correctly blurs +
+On the script side, OnRenderImageBlit alternately between src and temporary RenderTexture in each direction of xy, and finally Blit from src to dst and output. On MacOS, Blitt was possible only with src, but on Windows, the result was not output, so RenderTexture.GetTemporaryI am using. (For OnRenderImage and Blit, refer to the introduction to ImageEffect in the previous chapter.)
GaussianBlur.cs
+void OnRenderImage (RenderTexture src, RenderTexture dst)
+{
+ var rt = RenderTexture.GetTemporary(src.width, src.height, 0, src.format);
+
+ for (int i = 0; i < blurNum; i++)
+ {
+ Graphics.Blit(src, rt, mat, 0);
+ Graphics.Blit(rt, src, mat, 1);
+ }
+ Graphics.Blit(src, dst);
+
+ RenderTexture.ReleaseTemporary(rt);
+}
+
+This is the end of the explanation of Gaussian blur. Now that you have a sense of how post-effects are performed, I will explain SSR from the next section.
+ +SSR is a technique that attempts to reproduce reflections and reflections within the range of post effects. All that is required for SSR is the image itself taken by the camera, the depth buffer in which the depth information is written, and the normal buffer in which the normal information is written. Depth buffer and normal buffer are collectively called G-buffer and are indispensable for Deferred rendering such as SSR. (For Deferred Rendering, there is a great explanation in the introduction to ImageEffect in the previous chapter, so please refer to that.)
+This is a premise when reading this section, but in this section, we will proceed with the explanation on the premise of basic knowledge about ray tracing. Ray tracing is a big theme that I can write another chapter even at the introductory level, so unfortunately I will omit the explanation here. However, if you do not understand what ray tracing is, you can not understand the following contents, so if you do not understand it, there is a good introduction book "Ray Tracing in One Weekend" * 3 by Peter Shirley, so it is recommended that you read that first. I will.
+In addition, kode80's "Screen Space Reflections in Unity 5 * 6 " is famous as a commentary text for the Unity implementation of SSR . Also, in Japanese text, there is "I tried to implement Screen Space Reflection in Unity * 8 ". In this section, what is explained in the above text is simplified as much as possible, and explanation of branch and leaf techniques is omitted. If you read the source code and find any questions, try to hit them.
+ +The basic idea of SSR is to use ray tracing techniques to simulate the relationship between a camera, a reflective surface, and an object (light source).
+Unlike ordinary optics, SSR reproduces reflections on the reflecting surface by fetching the color on the reflecting surface after identifying the light source by calculating back from the path of light incident on the camera.
+
++Figure 10.2: Differences between real-life optics and SSR light thinking +
+SSR does this for each pixel of the camera.
+The outline of the process can be summarized as follows.
+The procedure is difficult to explain in the figure, but it is complicated to explain in words. Let's disassemble it.
+ +First, pass the matrix for converting the screen coordinate system and the world coordinate system to the shader. _ViewProjIs the transformation matrix from the world coordinate system to the screen coordinate system, and _InvViewProjis the inverse matrix.
SSR.cs
+void OnRenderImage (RenderTexture src, RenderTexture dst)
+{
+
+ ....
+
+ // world <-> screen matrix
+ var view = cam.worldToCameraMatrix;
+ var proj = GL.GetGPUProjectionMatrix(cam.projectionMatrix, false);
+ var viewProj = proj * view;
+ mat.SetMatrix("_ViewProj", viewProj);
+ mat.SetMatrix("_InvViewProj", viewProj.inverse);
+
+ ....
+
+}
+
+Now, using the transformation matrix passed, the normal vector and the reflection vector can be obtained. Let's take a look at the processing of the corresponding shader.
+SSR.shader
+float4 reflection (v2f i) : SV_Target
+{
+ float2 uv = i.screen.xy / i.screen.w;
+ float depth = SAMPLE_DEPTH_TEXTURE(_CameraDepthTexture, uv);
+
+ ...
+
+ float2 screenpos = 2.0 * uv - 1.0;
+ float4 pos = mul(_InvViewProj, float4(screenpos, depth, 1.0));
+ pos /= pos.w;
+ float3 camDir = normalize(pos - _WorldSpaceCameraPos);
+ float3 normal = tex2D(_CameraGBufferTexture2, uv) * 2.0 - 1.0;
+ float3 refDir = reflect(camDir, normal);
+
+ ....
+
+ if (_ViewMode == 1) col = float4((normal.xyz * 0.5 + 0.5), 1);
+ if (_ViewMode == 2) col = float4((refDir.xyz * 0.5 + 0.5), 1);
+
+ ....
+
+ return col;
+ }
+
+First, the depth of the corresponding pixel is _CameraDepthTexturewritten in, and this is used. Next, from the position information and seismic intensity information on the screen, the position of the polygon in the corresponding pixel in the world coordinate system can be found, so poshold it in. posThen, _WorldSpaceCameraPossince the vector toward the camera is known, the reflection vector can be known from this and the normal information.
From the script attached to the main camera, you can see where the normal and reflection vectors are facing. Since each vector is standardized between -1 and 1, color information with a value less than or equal to 0 is not displayed. When the x-axis component is large, the vector is displayed in reddish, when the y-axis component is large, it is displayed in greenish, and when the z-axis component is large, it is displayed in bluish. Please set ViewMode to Normalor Reflectionand check.
Now let's look at the process of performing ray tracing.
+SSR.shader
+float4 reflection(v2f i) : SV_Target
+{
+
+ ...
+
+ [loop]
+ for (int n = 1; n <= _MaxLoop; n++)
+ {
+ float3 step = refDir * _RayLenCoeff * (lod + 1);
+ ray += step * (1 + rand(uv + _Time.x) * (1 - smooth));
+
+ float4 rayScreen = mul (_ViewProj, float4 (ray, 1.0));
+ float2 rayUV = rayScreen.xy / rayScreen.w * 0.5 + 0.5;
+ float rayDepth = ComputeDepth(rayScreen);
+ float worldDepth = (lod == 0)?
+ SAMPLE_DEPTH_TEXTURE(_CameraDepthTexture, rayUV) :
+ tex2Dlod (_CameraDepthMipmap, float4 (rayUV, 0, lod))
+ + _BaseRaise * lod;
+
+ ...
+
+ if(rayDepth < worldDepth)
+ {
+
+ ....
+
+ return outcol;
+ }
+ }
+}
+
+Variables related to the processing explained later are also mixed, but please read it without worrying about it. Inside the loop, first stretch the ray by a step and then put it back in the screen coordinate system. We will compare the depth of the ray in the screen coordinate system with the depth written to the depth buffer and return the color if the ray is deeper. (1.0 when the depth is the closest, because the smaller the further away, rayDepthbut worldDepthis smaller than the will to a determination that Ray is in the back.)
Also, if the number of loops is undecided, HLSL will throw an error, so if you want to pass the number of loops from the script [loop], you need to write the attribute at the beginning.
The skeleton of ray tracing is now complete. The basic processing is not so difficult once you have an image. However, in order to reproduce beautiful reflections, it is necessary to add some processing from now on. Are the following four points that need to be improved?
+For post-effects, including antialiasing, techniques for efficient processing are rather essential. Now that you understand the gist of the process, let's look at the technique for establishing SSR as a video.
+ +Below, we will explain how to improve processing efficiency by using Mipmap, referring to the article * 7 of Chalmers University of Technology . (See footnote for what Mipmap is * 9 ) Ray tracing basically determines the step width of the ray and gradually advances the ray, but by using Mipmap, the step of the ray until the intersection with the object is judged. The width can be variable. By doing this, you will be able to fly rays far away even with a limited number of loops, and processing efficiency will also increase.
+We have prepared a demoscene that uses Mipmap from RenderTexture, so let's check it from there.
+Mipmap.cs
+public class Mipmap : MonoBehaviour
+{
+ Material mat;
+ RenderTexture rt;
+ [SerializeField] Shader shader;
+ [SerializeField] int lod;
+
+ void OnEnable()
+ {
+ mat = new Material(shader);
+ rt = new RenderTexture(Screen.width, Screen.height, 24);
+ rt.useMipMap = true;
+ }
+
+ void OnDisable()
+ {
+ Destroy(mat);
+ rt.Release();
+ }
+
+ void OnRenderImage (RenderTexture src, RenderTexture dst)
+ {
+ mat.SetInt("_LOD", lod);
+ Graphics.Blit(src, rt);
+ Graphics.Blit(rt, dst, mat);
+ }
+}
+
+Since mipmap cannot be set for ready-made RenderTexture, here, srcafter creating a new RenderTexture and copying it, processing is added.
Mipmap.shader
+sampler2D _MainTex;
+float4 _MainTex_ST;
+int _LOD;
+
+....
+
+fixed4 frag (v2f i) : SV_Target
+{
+ return tex2Dlod(_MainTex, float4(i.uv, 0, _LOD));
+}
+
+tex2Dlod(_MainTex, float4(i.uv, 0, _LOD))You can get the Mipmap according to the LOD with.
If you raise the LOD from the script attached to the camera on the scene, you can see that the image becomes grainy.
+
++Figure 10.3: Comparison of increased LOD and Mipmap image quality +
+Now that you have confirmed how to use Mipmap, let's see how Mipmap is used in the SSR scene.
+SSR.shader
+[loop]
+for (int n = 1; n <= _MaxLoop; n++)
+{
+ float3 step = refDir * _RayLenCoeff * (lod + 1);
+ ray += step;
+
+ ....
+
+ if(rayDepth < worldDepth)
+ {
+ if(lod == 0)
+ {
+ if (rayDepth + _Thickness > worldDepth)
+ {
+ float sign = -1.0;
+ for (int m = 1; m <= 8; ++m)
+ {
+ ray += sign * pow(0.5, m) * step;
+ rayScreen = mul (_ViewProj, float4 (ray, 1.0));
+ rayUV = rayScreen.xy / rayScreen.w * 0.5 + 0.5;
+ rayDepth = ComputeDepth(rayScreen);
+ worldDepth = SAMPLE_DEPTH_TEXTURE(_CameraDepthTexture, rayUV);
+ sign = (rayDepth < worldDepth) ? -1 : 1;
+ }
+ refcol = tex2D(_MainTex, rayUV);
+ }
+ break;
+ }
+ else
+ {
+ ray -= step;
+ lod--;
+ }
+ }
+ else if(n <= _MaxLOD)
+ {
+ lod ++;
+ }
+ calcTimes = n;
+}
+if (_ViewMode == 3) return float4(1, 1, 1, 1) * calc / _MaxLoop;
+
+....
+
+
+I will proceed with the explanation using the figure in the article of Chalmers.
+
++Figure 10.4: Calculation method using Mipmap +
+As shown in the figure, the LOD is raised while carefully judging the intersection for the first few times. As long as there is no intersection with other meshes, we will proceed with large steps. If there is an intersection, Unity's MipMap will roughen the pixels while taking the average value, so unlike the case of the article, the ray may go too far. Therefore, move back by one unit step and advance the ray again with one smaller LOD. Finally, by making an intersection judgment on the image with LOD = 0, the moving distance of the ray can be extended and the processing can be made more efficient.
+From the script attached to the main camera, you can see how much the amount of calculation changes when you raise the LOD. The larger the amount of calculation, the whiter it looks, and the smaller the amount of calculation, the darker it looks. Set ViewMode and CalcCountchange the LOD to check the change in the amount of calculation.
++Figure 10.5: Difference in computational complexity due to changes in LOD (closer to black, smaller computational complexity) +
+Let's see how to improve the accuracy near the intersection by binary tree search. Check from the code immediately.
+SSR.shader
+if (lod == 0)
+{
+ if (rayDepth + _Thickness > worldDepth)
+ {
+ float sign = -1.0;
+ for (int m = 1; m <= 8; ++m)
+ {
+ ray += sign * pow(0.5, m) * step;
+ rayScreen = mul (_ViewProj, float4 (ray, 1.0));
+ rayUV = rayScreen.xy / rayScreen.w * 0.5 + 0.5;
+ rayDepth = ComputeDepth(rayScreen);
+ worldDepth = SAMPLE_DEPTH_TEXTURE(_CameraDepthTexture, rayUV);
+ sign = (rayDepth < worldDepth) ? -1 : 1;
+ }
+ refcol = tex2D(_MainTex, rayUV);
+ }
+ break;
+}
+
+Immediately after the intersection, it is behind the intersected object, so first retract the ray. After that, while checking the context of the ray and the mesh, change the direction of travel of the ray either forward or backward. At the same time, by shortening the step width of the ray, it is possible to identify the intersection with the mesh with less error.
+ +The methods so far have not taken into account the differences in the materials of the objects in the screen. Therefore, there is a problem that all objects are reflected to the same extent. Therefore, use G-buffer again. _CameraGBufferTexture1.wSince the smoothness of the material is stored in, use this.
SSR.shader
+if (_ViewMode == 8) + return float4(1, 1, 1, 1) * tex2D(_CameraGBufferTexture1, uv).w; + +.... + +return + (col * (1 - smooth) + refcol * smooth) * _ReflectionRate + + col * (1 - _ReflectionRate); ++
If you change the smoothness value of the material attached to an object in the scene, you can see that only that object changes the degree of reflection. SmoothnessYou can also list the smoothness in the scene by setting the ViewMode of the script attached to the main camera . The whitish, the greater the smoothness.
This is the part using the Gaussian blur explained in the first section. If the step width of the ray is not small enough, you may not be able to get the reflection well even if you perform a binary tree search. If the step width of the ray is reduced, the total length of the ray will be shortened and the amount of calculation will increase, so it is not enough to just reduce the step width, but it should be kept to an appropriate size. The part where the reflection could not be obtained well is blurred to make it look like it.
+SSR.shader
+float4 xblur(v2f i) : SV_Target
+{
+ float2 uv = i.screen.xy / i.screen.w;
+ float2 size = _ReflectionTexture_TexelSize;
+ float smooth = tex2D(_CameraGBufferTexture1, uv).w;
+
+ // compare depth
+ float depth = SAMPLE_DEPTH_TEXTURE(_CameraDepthTexture, uv);
+ float depthR =
+ SAMPLE_DEPTH_TEXTURE(_CameraDepthTexture, uv + float2(1, 0) * size);
+ float depthL =
+ SAMPLE_DEPTH_TEXTURE(_CameraDepthTexture, uv - float2(1, 0) * size);
+
+ if (depth <= 0) return tex2D(_ReflectionTexture, uv);
+
+ float weight[5] = { 0.2270270, 0.1945945, 0.1216216, 0.0540540, 0.0162162 };
+ float offset[5] = { 0.0, 1.0, 2.0, 3.0, 4.0 };
+
+ float4 originalColor = tex2D(_ReflectionTexture, uv);
+ float4 blurredColor = tex2D(_ReflectionTexture, uv) * weight[0];
+
+ for (int j = 1; j < 5; ++j)
+ {
+ blurredColor
+ += tex2D(_ReflectionTexture, uv + float2(offset[j], 0) * size)
+ * weight[j];
+
+ blurredColor
+ += tex2D(_ReflectionTexture, uv - float2(offset[j], 0) * size)
+ * weight[j];
+ }
+
+ float4 o = (abs(depthR - depthL) > _BlurThreshold) ? originalColor
+ : blurredColor * smooth + originalColor * (1 - smooth);
+ return o;
+}
+
+Again, from the reason described above xblurand yblurhave divided the processing out. Also, since we only want to blur the contours within the same reflective surface, we try not to blur the contours. If the difference between the left and right depths is large, it is judged to be the contour part. (Then yblurevaluates the difference between the top and bottom.)
The result of adding the processing up to this point is as follows.
+
++Figure 10.6: Results +
+As a bonus, I will introduce a technique that makes it look as if a non-existent object is reflected using two cameras, a main camera and a sub camera.
+SSRMainCamera.shader
+float4 reflection(v2f i) : SV_Target
+{
+
+ ....
+
+ for (int n = 1; n <= 100; ++n)
+ {
+ float3 ray = n * step;
+ float3 rayPos = pos + ray;
+ float4 vpPos = mul (_ViewProj, float4 (rayPos, 1.0));
+ float2 rayUv = vpPos.xy / vpPos.w * 0.5 + 0.5;
+ float rayDepth = vpPos.z / vpPos.w;
+ float subCameraDepth = SAMPLE_DEPTH_TEXTURE(_SubCameraDepthTex, rayUv);
+
+ if (rayDepth < subCameraDepth && rayDepth + thickness > subCameraDepth)
+ {
+ float sign = -1.0;
+ for (int m = 1; m <= 4; ++m)
+ {
+ rayPos += sign * pow(0.5, m) * step;
+ vpPos = mul (_ViewProj, float4 (rayPos, 1.0));
+ rayUv = vpPos.xy / vpPos.w * 0.5 + 0.5;
+ rayDepth = vpPos.z / vpPos.w;
+ subCameraDepth = SAMPLE_DEPTH_TEXTURE(_SubCameraDepthTex, rayUv);
+ sign = rayDepth - subCameraDepth < 0 ? -1 : 1;
+ }
+ col = tex2D (_SubCameraMainTex, rayUv);
+ }
+ }
+ return col * smooth + tex2D(_MainTex, uv) * (1 - smooth);
+}
+
+It is made simple with as little extra processing as possible. The point is that SAMPLE_DEPTH_TEXTURE(_CameraDepthTexture, uv)it SAMPLE_DEPTH_TEXTURE(_SubCameraDepthTex, rayUv)is used instead of for depth evaluation, and the object information to be referenced _SubCameraMainTexis also obtained from. _CameraDepthTexture, _SubCameraDepthTexIs set as a global texture from the sub camera.
The downside is that each camera casts shadows on objects that shouldn't be visible. It may not be very practical, but it's a little interesting effect.
+
++Figure 10.7: Method using two cameras +
+This is the end of the explanation of SSR.
+Since SSR is a technique that requires a large amount of processing capacity, it is not realistic to reflect objects in all positions cleanly. Therefore, the point is to improve the appearance of the reflection of the object of interest and to make the trivial reflection look like it with less processing. In addition, the screen size to be rendered is directly linked to the amount of calculation, so it is important to search for the points that will be established as an image while considering the expected screen size and GPU performance. Check the role and trade-offs of each parameter by adjusting the parameters while moving the objects in the scene.
+In addition, the Mipmap, binary tree search, how to use the camera buffer, and many other detailed techniques mentioned above can be applied not only to SSR but also to various places. I would be happy if there is some content that is helpful to the readers.
+[*1] http://rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling/
[*2] https://ja.wikipedia.org/wiki/%E4%BA%8C%E9%A0%85%E5%88%86%E5%B8%83
[*3] https://www.amazon.co.jp/gp/product/B01B5AODD8
[*4] https://msdn.microsoft.com/ja-jp/library/bb219690(v=vs.85).aspx
[*5] https://www.sciencelearn.org.nz/resources/48-reflection-of-light
[*6] http://www.kode80.com/blog/2015/03/11/screen-space-reflections-in-unity-5/
[*7] http://www.cse.chalmers.se/edu/year/2017/course/TDA361/ Advanced%20Computer%20Graphics/Screen-space%20reflections.pdf
[*8] http://tips.hecomi.com/entry/2016/04/04/022550
[*9] https://answers.unity.com/questions/441984/what-is-mip-maps-pictures.html
[*10] https://docs.unity3d.com/Manual/RenderTech-DeferredShading.html
|
![]() |
|
In this chapter, we will show you how to use GPU to create a trail. The sample in this chapter is "GPU Based Trail" from https://github.com/IndieVisualLab/UnityGraphicsProgramming2 .
+ +The trajectory of a moving object is called a trail. In a broad sense, it includes car ruts, ship tracks, ski spurs, etc., but what is impressive in CG is a light trail expression that draws a curve like a car tail lamp or a homing laser in a shooting game. ..
+ +Two types of trails are provided as standard in Unity.
+[*1] https://docs.unity3d.com/ja/current/Manual/class-TrailRenderer.html
[*2] https://docs.unity3d.com/Manual/PartSysTrailsModule.html
Since this chapter focuses on how to create the Trail itself, we will not use these functions, and by implementing it on the GPU, it will be possible to express more than the Trails module.
+
++Figure 2.1: Sample code execution screen. Show 10000 Trails +
+Now let's create a Trail.
+ +There are three main structures used.
+GPUTrails.cs
+public struct Trail
+{
+ public int currentNodeIdx;
+}
+
+Each Trail structure corresponds to one Trail. currentNodeIdx Stores the index of the last written Node buffer.
+GPUTrails.cs
+public struct Node
+{
+ public float time;
+ public Vector3 pos;
+}
+
+Node structures are control points in the Trail. It stores the location of the Node and the time it was updated.
+GPUTrails.cs
+public struct Input
+{
+ public Vector3 pos;
+}
+
+The Input structure is the input for one frame from the emitter (the one that leaves the trajectory). Here, it's just the position, but I think it would be interesting to add colors and so on.
+ +Initialize the buffer used by GPUTrails.Start ()
+GPUTrails.cs
+trailBuffer = new ComputeBuffer(trailNum, Marshal.SizeOf(typeof(Trail))); +nodeBuffer = new ComputeBuffer(totalNodeNum, Marshal.SizeOf(typeof(Node))); +inputBuffer = new ComputeBuffer(trailNum, Marshal.SizeOf(typeof(Input))); ++
Initializing trailBuffers for trailNum. In other words, this program processes multiple Trails at once. In nodeBuffer, Nodes for all Trails are handled together in one buffer. Indexes 0 to nodeNum-1 are the first, nodeNum to 2 * nodeNum-1 are the second, and so on. The inputBuffer also holds trailNums and manages the input of all trails.
+GPUTrails.cs
+var initTrail = new Trail() { currentNodeIdx = -1 };
+var initNode = new Node() { time = -1 };
+
+trailBuffer.SetData(Enumerable.Repeat(initTrail, trailNum).ToArray());
+nodeBuffer.SetData(Enumerable.Repeat(initNode, totalNodeNum).ToArray());
+
+The initial value is put in each buffer. Set Trail.currentNodeIdx and Node.time to negative numbers, and use them later to determine whether they are unused. Since all values of inputBuffer are written in the first update, there is no need to initialize and there is no touch.
+ +Here's how to use the Node buffer.
+ +
++Figure 2.2: Initial state +
+Nothing has been entered yet.
+ +
++Figure 2.3: Input +
+It will be input one node at a time. I have an unused Node.
+ +
++Figure 2.4: Loop +
+When all the Nodes are exhausted, the returning Nodes will be overwritten at the beginning. It is used like a ring buffer.
+ +From here, it will be called every frame. Enter the position of the emitter to add and update Nodes.
+First, update the inputBuffer externally. This can be any process. At first ComputeBuffer.SetData(), it may be easier and better to calculate with the CPU . The sample code moves particles in a simple GPU implementation and treats them as emitters.
The particles in the sample code move in search of the force received by Curl Noise. As you can see, Curl Noise is very convenient because you can easily create pseudo-fluid-like movements. Of this book - "Neuss' s Arco 's squirrel' s-time commentary for the pseudo-fluid Curl Noise" Chapter 6 in @sakope See all means because I have been described in detail.
+GPUTrailParticles.cs
+void Update()
+{
+ cs.SetInt(CSPARAM.PARTICLE_NUM, particleNum);
+ cs.SetFloat(CSPARAM.TIME, Time.time);
+ cs.SetFloat(CSPARAM.TIME_SCALE, _timeScale);
+ cs.SetFloat(CSPARAM.POSITION_SCALE, _positionScale);
+ cs.SetFloat(CSPARAM.NOISE_SCALE, _noiseScale);
+
+ var kernelUpdate = cs.FindKernel(CSPARAM.UPDATE);
+ cs.SetBuffer(kernelUpdate, CSPARAM.PARTICLE_BUFFER_WRITE, _particleBuffer);
+
+ var updateThureadNum = new Vector3(particleNum, 1f, 1f);
+ ComputeShaderUtil.Dispatch(cs, kernelUpdate, updateThureadNum);
+
+
+ var kernelInput = cs.FindKernel(CSPARAM.WRITE_TO_INPUT);
+ cs.SetBuffer(kernelInput, CSPARAM.PARTICLE_BUFFER_READ, _particleBuffer);
+ cs.SetBuffer(kernelInput, CSPARAM.INPUT_BUFFER, trails.inputBuffer);
+
+ var inputThreadNum = new Vector3(particleNum, 1f, 1f);
+ ComputeShaderUtil.Dispatch(cs, kernelInput, inputThreadNum);
+}
+
+I'm running two kernels.
+Now, let's update nodeBuffer by referring to inputBuffer.
+GPUTrailParticles.cs
+void LateUpdate()
+{
+ cs.SetFloat(CSPARAM.TIME, Time.time);
+ cs.SetFloat(CSPARAM.UPDATE_DISTANCE_MIN, updateDistaceMin);
+ cs.SetInt(CSPARAM.TRAIL_NUM, trailNum);
+ cs.SetInt(CSPARAM.NODE_NUM_PER_TRAIL, nodeNum);
+
+ var kernel = cs.FindKernel(CSPARAM.CALC_INPUT);
+ cs.SetBuffer(kernel, CSPARAM.TRAIL_BUFFER, trailBuffer);
+ cs.SetBuffer(kernel, CSPARAM.NODE_BUFFER, nodeBuffer);
+ cs.SetBuffer(kernel, CSPARAM.INPUT_BUFFER, inputBuffer);
+
+ ComputeShaderUtil.Dispatch(cs, kernel, new Vector3(trailNum, 1f, 1f));
+}
+
+On the CPU side, all you have to do is Dispatch () ComputeShader, passing the required parameters. The processing on the main ComputeShader side is as follows.
+GPUTrail.compute
+[numthreads(256,1,1)]
+void CalcInput (uint3 id : SV_DispatchThreadID)
+{
+ uint trailIdx = id.x;
+ if ( trailIdx < _TrailNum)
+ {
+ Trail trail = _TrailBuffer[trailIdx];
+ Input input = _InputBuffer[trailIdx];
+ int currentNodeIdx = trail.currentNodeIdx;
+
+ bool update = true;
+ if ( trail.currentNodeIdx >= 0 )
+ {
+ Node node = GetNode(trailIdx, currentNodeIdx);
+ float dist = distance(input.position, node.position);
+ update = dist > _UpdateDistanceMin;
+ }
+
+ if ( update )
+ {
+ Node node;
+ node.time = _Time;
+ node.position = input.position;
+
+ currentNodeIdx++;
+ currentNodeIdx %= _NodeNumPerTrail;
+
+ // write new node
+ SetNode(node, trailIdx, currentNodeIdx);
+
+ // update trail
+ trail.currentNodeIdx = currentNodeIdx;
+ _TrailBuffer[trailIdx] = trail;
+ }
+ }
+}
+
+Let's take a closer look.
+uint trailIdx = id.x; +if ( trailIdx < _TrailNum) ++
First, I'm using the argument id as the Trail index. Due to the number of threads, it may be called with ids equal to or greater than the number of Trails, so I play something outside the range with an if statement.
+int currentNodeIdx = trail.currentNodeIdx;
+
+bool update = true;
+if ( trail.currentNodeIdx >= 0 )
+{
+ Node node = GetNode(trailIdx, currentNodeIdx);
+ update = distance(input.position, node.position) > _UpdateDistanceMin;
+}
+
+Trail.currentNodeIdxI am checking next . If it is negative, it is an unused Trail.
GetNode() Is a function that gets the specified Node from _NodeBuffer. Since the index calculation is the source of mistakes, it is functionalized.
The Trail, which is already in use, compares the distance between the latest Node and the input position and states that _UpdateDistanceMinit will be updated if it is farther away and will not be updated if it is closer. Although it depends on the behavior of the emitter, the input at almost the same position as the previous Node is usually in a state of being almost stopped and moving with a slight error, so if you try to generate a Trail by converting these into Nodes in a lawful manner, between consecutive Nodes The direction is very different and it is often quite dirty. Therefore, at a very short distance, I dare to skip without adding Node.
GPUTrail.compute
+if ( update )
+{
+ Node node;
+ node.time = _Time;
+ node.position = input.position;
+
+ currentNodeIdx++;
+ currentNodeIdx %= _NodeNumPerTrail;
+
+ // write new node
+ SetNode(node, trailIdx, currentNodeIdx);
+
+ // update trail
+ trail.currentNodeIdx = currentNodeIdx;
+ _TrailBuffer[trailIdx] = trail;
+}
+
+Finally, I'm updating _NodeBuffer and _TrailBuffer. The Trail stores the index of the entered Node as currentNodeIdx. When the number of Nodes per Trail is exceeded, it is returned to zero so that it becomes a ring buffer. Node stores the time and position of the input.
+Well, this completes the logical processing of Trail. Next, let's look at the process of drawing from this information.
+ +Drawing a Trail is basically a process of connecting Nodes with a line. Here, I will try to keep the individual trails as simple as possible and focus on quantity. Therefore, we want to reduce the number of polygons as much as possible, so we will generate the line as a plate polygon facing the camera.
+ +The method to generate the plate polygon facing the camera is as follows.
+
++Figure 2.5: Node column +
+From a Node column like this
+
++Figure 2.6: Vertices generated from Node +
+Finds the vertices that are moved from each node by the specified width in the direction perpendicular to the line of sight.
+
++Figure 2.7: Polygonization +
+Connect the generated vertices to make a polygon. Let's take a look at the actual code.
+ +On the CPU side, the process is simply to pass the parameters to the material and perform DrawProcedual ().
+GPUTrailRenderer.cs
+void OnRenderObject ()
+{
+ _material.SetInt(GPUTrails.CSPARAM.NODE_NUM_PER_TRAIL, trails.nodeNum);
+ _material.SetFloat(GPUTrails.CSPARAM.LIFE, trails._life);
+ _material.SetBuffer(GPUTrails.CSPARAM.TRAIL_BUFFER, trails.trailBuffer);
+ _material.SetBuffer(GPUTrails.CSPARAM.NODE_BUFFER, trails.nodeBuffer);
+ _material.SetPass(0);
+
+ var nodeNum = trails.nodeNum;
+ var trailNum = trails.trailNum;
+ Graphics.DrawProcedural(MeshTopology.Points, nodeNum, trailNum);
+}
+
+Parameters trails._lifethat have not appeared until now have appeared. This is used for processing that compares the lifetime of the Node with the generation time that the Node itself has, and makes it transparent after this amount of time. By doing this, you can express that the end of the trail disappears smoothly.
Since there are no meshes or polygons to input, Graphics.DrawProcedural()we issue a command to draw a model with trails.nodeNum vertices in batches of trails.trailNum instances.
GPUTrails.shader
+vs_out vert (uint id : SV_VertexID, uint instanceId : SV_InstanceID)
+{
+ vs_out Out;
+ Trail trail = _TrailBuffer[instanceId];
+ int currentNodeIdx = trail.currentNodeIdx;
+
+ Node node0 = GetNode(instanceId, id-1);
+ Node node1 = GetNode(instanceId, id); // current
+ Node node2 = GetNode(instanceId, id+1);
+ Node node3 = GetNode(instanceId, id+2);
+
+ bool isLastNode = (currentNodeIdx == (int)id);
+
+ if ( isLastNode || !IsValid(node1))
+ {
+ node0 = node1 = node2 = node3 = GetNode(instanceId, currentNodeIdx);
+ }
+
+ float3 pos1 = node1.position;
+ float3 pos0 = IsValid(node0) ? node0.position : pos1;
+ float3 pos2 = IsValid(node2) ? node2.position : pos1;
+ float3 pos3 = IsValid(node3) ? node3.position : pos2;
+
+ Out.pos = float4(pos1, 1);
+ Out.posNext = float4(pos2, 1);
+
+ Out.dir = normalize(pos2 - pos0);
+ Out.dirNext = normalize(pos3 - pos1);
+
+ float ageRate = saturate((_Time.y - node1.time) / _Life);
+ float ageRateNext = saturate((_Time.y - node2.time) / _Life);
+ Out.col = lerp(_StartColor, _EndColor, ageRate);
+ Out.colNext = lerp(_StartColor, _EndColor, ageRateNext);
+
+ return Out;
+}
+
+First is the processing of vertex shader. Outputs information about the current Node and the next Node corresponding to this thread.
+GPUTrails.shader
+Node node0 = GetNode(instanceId, id-1); +Node node1 = GetNode(instanceId, id); // current +Node node2 = GetNode(instanceId, id+1); +Node node3 = GetNode(instanceId, id+2); ++
The current node is set to node1, and a total of four nodes are referenced, including the previous node0, the previous node2, and the second node3.
+GPUTrails.shader
+bool isLastNode = (currentNodeIdx == (int)id);
+
+if ( isLastNode || !IsValid(node1))
+{
+ node0 = node1 = node2 = node3 = GetNode(instanceId, currentNodeIdx);
+}
+
+If the current Node is terminal or has not yet been entered, treat nodes 0-3 as a copy of the terminal Node. In other words, all Nodes beyond the end that have no information yet are treated as "folded" to the end. By doing this, it can be sent as it is to the subsequent polygon generation processing.
+GPUTrails.shader
+float3 pos1 = node1.position; +float3 pos0 = IsValid(node0) ? node0.position : pos1; +float3 pos2 = IsValid(node2) ? node2.position : pos1; +float3 pos3 = IsValid(node3) ? node3.position : pos2; + +Out.pos = float4(pos1, 1); +Out.posNext = float4(pos2, 1); ++
Now, extract the location information from the four Nodes. Please note that all but the current Node (node1) may be blank. It may be a little surprising that node0 is not entered, but this is possible because node0 points to the last node in the buffer going back in the ring buffer when currentNodeIdx == 0. Again, copy the location of node1 to fold it to the same location. The same applies to nodes2 and 3. Of these, pos1 and pos2 are output toward the geometry shader.
+GPUTrails.shader
+Out.dir = normalize(pos2 - pos0); +Out.dirNext = normalize(pos3 - pos1); ++
Furthermore, the direction vector of pos0 → pos2 is output as the tangent at pos1, and the direction vector of pos1 → pos3 is output as the tangent at pos2.
+GPUTrails.shader
+float ageRate = saturate((_Time.y - node1.time) / _Life); +float ageRateNext = saturate((_Time.y - node2.time) / _Life); +Out.col = lerp(_StartColor, _EndColor, ageRate); +Out.colNext = lerp(_StartColor, _EndColor, ageRateNext); ++
Finally, the color is calculated by comparing the write time of node1 and node2 with the current time.
+ +GPUTrails.shader
+[maxvertexcount(4)]
+void geom (point vs_out input[1], inout TriangleStream<gs_out> outStream)
+{
+ gs_out output0, output1, output2, output3;
+ float3 pos = input[0].pos;
+ float3 dir = input [0] .dir;
+ float3 posNext = input[0].posNext;
+ float3 dirNext = input[0].dirNext;
+
+ float3 camPos = _WorldSpaceCameraPos;
+ float3 toCamDir = normalize(camPos - pos);
+ float3 sideDir = normalize(cross(toCamDir, dir));
+
+ float3 toCamDirNext = normalize(camPos - posNext);
+ float3 sideDirNext = normalize(cross(toCamDirNext, dirNext));
+ float width = _Width * 0.5;
+
+ output0.pos = UnityWorldToClipPos(pos + (sideDir * width));
+ output1.pos = UnityWorldToClipPos(pos - (sideDir * width));
+ output2.pos = UnityWorldToClipPos(posNext + (sideDirNext * width));
+ output3.pos = UnityWorldToClipPos(posNext - (sideDirNext * width));
+
+ output0.col =
+ output1.col = input[0].col;
+ output2.col =
+ output3.col = input[0].colNext;
+
+ outStream.Append (output0);
+ outStream.Append (output1);
+ outStream.Append (output2);
+ outStream.Append (output3);
+
+ outStream.RestartStrip();
+}
+
+Next is the processing of geometry shader. The polygon is finally generated from the information for two Nodes passed from the vertex shader. From 2 pos and dir, 4 positions = quadrangle are obtained and output as TriangleStream.
+GPUTrails.shader
+float3 camPos = _WorldSpaceCameraPos; +float3 toCamDir = normalize(camPos - pos); +float3 sideDir = normalize(cross(toCamDir, dir)); ++
The outer product of the direction vector (toCameraDir) from pos to the camera and the tangent vector (dir) is obtained, and this is set as the width of the line (sideDir).
+GPUTrails.shader
+output0.pos = UnityWorldToClipPos(pos + (sideDir * width)); +output1.pos = UnityWorldToClipPos(pos - (sideDir * width)); ++
Find the vertices that have moved in the positive and negative sideDir directions. Here, we have completed the coordinate transformation to make it a Clip coordinate system and pass it to the fragment shader. By performing the same processing for posNext, a total of four vertices were obtained.
+GPUTrails.shader
+output0.col = +output1.col = input[0].col; +output2.col = +output3.col = input[0].colNext; ++
Add color to each vertex to complete.
+ +GPUTrails.shader
+fixed4 frag (gs_out In) : COLOR
+{
+ return In.col;
+}
+
+Finally, the fragment shader. It's as simple as it gets. It just outputs the color (laughs)
+ +I think that the Trail has been generated. This time, the processing was only for colors, but I think that it can be applied in various ways, such as adding textures and changing the width. Also, as the source code is separated from GPUTrails.cs and GPUTRailsRenderer.cs, the GPUTrails.shader side is just a process of drawing by looking at the buffer, so if you prepare _TrailBuffer and _NodeBuffer, it is not limited to Trail but actually line-shaped. It can be used for display. This time it was just a trail added to _NodeBuffer, but I think that by updating all Nodes every frame, it is possible to express something like a tentacle.
+ +This chapter has provided the simplest possible example of Trail's GPU implementation. While debugging becomes difficult with the GPU, it enables overwhelming physical expression that cannot be done with the CPU. I hope that as many people as possible can experience that "Uhyo!" Feeling through this book. Also, I think Trail is an expression of an interesting area with a wide range of applications, such as "displaying a model" and "drawing with an algorithm in screen space". I think that the understanding gained in this process will be useful when programming various video expressions, not limited to Trail.
\ No newline at end of file diff --git a/html-translated/vol2/Chapter 2 _ GPU-Based Trail_files/cleardot.gif b/html-translated/vol2/Chapter 2 _ GPU-Based Trail_files/cleardot.gif new file mode 100644 index 0000000..1d11fa9 Binary files /dev/null and b/html-translated/vol2/Chapter 2 _ GPU-Based Trail_files/cleardot.gif differ diff --git a/html-translated/vol2/Chapter 2 _ GPU-Based Trail_files/element_main.js b/html-translated/vol2/Chapter 2 _ GPU-Based Trail_files/element_main.js new file mode 100644 index 0000000..4c5de3c --- /dev/null +++ b/html-translated/vol2/Chapter 2 _ GPU-Based Trail_files/element_main.js @@ -0,0 +1,486 @@ +(function(){/* + + Copyright The Closure Library Authors. + SPDX-License-Identifier: Apache-2.0 +*/ +var aa='" style="background-image:url(',ba="-disabled",ca="-document.getElementById('",da="/translate_a/t",ea="/translate_suggestion?client=",fa='
|
![]() |
|
This year, I participated in a hackathon called Art Hack Day 2018 * 1 where I personally created a visual work using Unity.
++Figure 3.1: Visual part of Already There +
+In my work, I used the technique of drawing a wireframe polygon using the Geometry Shader. In this chapter, we will explain the method. The sample in this chapter is "Geometry Wireframe" from https://github.com/IndieVisualLab/UnityGraphicsProgramming2 .
+[*1] Art Hack Day 2018 http://arthackday.jp/
I think that LineRenderer and GL are often used to draw lines in Unity, but this time I will use Graphics.DrawProcedural assuming that the amount of drawing will increase later.
First of all, let's draw a simple sine wave. Take a look at the sample SampleWaveLine scene .
++Figure 3.2: SampleWaveLine scene +
+For now, press the play button and run it, and you should see an orange sine wave in the Game view. Select the WabeLine object in the Hierarchy window and move the Vertex Num slider on the RenderWaveLine component in the Inspector window to change the smoothness of the sine wave. The implementation of the RenderWaveLine class looks like this:
+Listing 3.1: RenderWaveLine.cs
+using UnityEngine;
+
+[ExecuteInEditMode]
+public class RenderWaveLine : MonoBehaviour {
+ [Range(2,50)]
+ public int vertexNum = 4;
+
+ public Material material;
+
+ private void OnRenderObject ()
+ {
+ material.SetInt("_VertexNum", vertexNum - 1);
+ material.SetPass(0);
+ Graphics.DrawProcedural(MeshTopology.LineStrip, vertexNum);
+ }
+}
+
+Graphics.DrawProcedural runs immediately after the call, so it must be called inside the OnRenderObject. OnRenderObject is called after all cameras have rendered the scene. The first argument of Graphics.DrawProcedural is MeshTopology . MeshTopology is a specification of how to configure the mesh. There are six configurations that can be specified: Triangles (triangle polygon), Quads (square polygon), Lines (line connecting two points), LineStrip (connecting all points continuously), and Points (independent points). The second argument is the number of vertices .
This time, I want to place the vertices on the line of the sine wave and connect the lines, so I use MeshTopology.LineStrip . The second argument, vertexNum, specifies the number of vertices used to draw the sine wave. As you may have noticed here, I haven't passed an array of vertex coordinates to Shader anywhere. The vertex coordinates are calculated in the following Shader Vertex Shader (vertex shader). Next is WaveLine.shader.
Listing 3.2: WaveLine.shader
+Shader "Custom/WaveLine"
+{
+ Properties
+ {
+ _Color ("Color", Color) = (1,1,1,1)
+ _ScaleX ("Scale X", Float) = 1
+ _ScaleY ("Scale Y", Float) = 1
+ _Speed ("Speed",Float) = 1
+ }
+ SubShader
+ {
+ Tags { "RenderType"="Opaque" }
+ LOD 100
+
+ Pass
+ {
+ CGPROGRAM
+ #pragma vertex vert
+ #pragma fragment frag
+ #pragma target 3.5
+
+ #include "UnityCG.cginc"
+
+ #define PI 3.14159265359
+
+ struct v2f
+ {
+ float4 vertex : SV_POSITION;
+ };
+
+ float4 _Color;
+ int _VertexNum;
+ float _ScaleX;
+ float _ScaleY;
+ float _Speed;
+
+ v2f vert (uint id : SV_VertexID)
+ {
+ float div = (float)id / _VertexNum;
+ float4 pos = float4((div - 0.5) * _ScaleX,
+ sin(div * 2 * PI + _Time.y * _Speed) * _ScaleY, 0, 1);
+
+ v2f o;
+ o.vertex = UnityObjectToClipPos(pos);
+ return o;
+ }
+
+ fixed4 frag (v2f i) : SV_Target
+ {
+ return _Color;
+ }
+ ENDCG
+ }
+ }
+}
+
+SV_VertexID (vertex ID) is passed to the argument of the Vertex Shader function vert. The vertex ID is a serial number unique to the vertex. Feeling that if you pass the number of vertices to be used as the second argument of Graphics.DrawProcedural, Vertex Shader will be called for the number of vertices, and the vertex ID of the argument will be a value from 0 to -1. is.
In Vertex Shader, the ratio from 0 to 1 is calculated by dividing the vertex ID by the number of vertices. The vertex coordinates (pos) are calculated based on the calculated ratio. The coordinates on the sine wave are obtained by giving the ratio obtained earlier in the calculation of the Y coordinate to the sin function. By adding _Time.y, we also animate the change in height as time progresses. Since the vertex coordinates are calculated in Vertex Shader, there is no need to pass the vertex coordinates from the C # side. Then, UnityObjectToClipPos is passing the coordinates converted from the object space to the clip space of the camera to the Fragment Shader.
Next, let's draw a polygon. To draw a polygon, you need vertices for each corner. It can be done by connecting vertices and closing as in the previous section, but this time I will draw a polygon from one vertex using Geometry Shader. For details on Geometry Shader, refer to "Chapter 6 Growing Grass with Geometry Shader" in UnityGraphicsProgramming vol.1 * 2 . Roughly speaking, the Geometry Shader is a shader that can increase the number of vertices, located between the Vertex Shader and the Fragment Shader.
+Take a look at the sample SamplePolygonLine scene .
+
++Figure 3.3: SamplePolygonLine scene +
+When you press the play button and run it, the triangle should rotate in the Game view. You can increase or decrease the number of triangle angles by selecting the PolygonLine object in the Hierarchy window and moving the Vertex Num slider on the SinglePolygon2D component in the Inspector window. The implementation of the SimglePolygon2D class looks like this:
+Listing 3.3: SinglePolygon2D.cs
+ using UnityEngine;
+
+[ExecuteInEditMode]
+public class SinglePolygon2D : MonoBehaviour {
+
+ [Range(2, 64)]
+ public int vertexNum = 3;
+
+ public Material material;
+
+ private void OnRenderObject ()
+ {
+ material.SetInt("_VertexNum", vertexNum);
+ material.SetMatrix("_TRS", transform.localToWorldMatrix);
+ material.SetPass(0);
+ Graphics.DrawProcedural(MeshTopology.Points, 1);
+ }
+}
+
+It has almost the same implementation as the RenderWaveLine class.
There are two major differences. The first is that the first argument of Graphics.DrawProcedural is changed from MeshTopology.LineStrip to MeshTopology.Points . The other is that the second argument of Graphics.DrawProcedural is fixed at 1 . In the RenderWaveLine class in the previous section, MeshTopology.LineStrip was specified because the lines were drawn by connecting the vertices, but this time I want to pass only one vertex and draw a polygon, so MeshTopology.PointsIs specified. This is because the minimum number of vertices required for drawing changes depending on the MeshTopology specification, and if it is less than that, nothing is drawn. MeshTopology.Lines and MeshTopology.LineStrip are 2 because they are lines, MeshTopology.Triangles are 3 because they are triangles, and MeshTopology.Points are 1 because they are points. By the way, in the part of material.SetMatrix ("_TRS", transform.localToWorldMatrix) ;, the matrix converted from the local coordinate system of the GameObject to which the SinglePolygon2D component is assigned to the world coordinate system is passed to the shader. By multiplying this by the vertex coordinates in the shader, the transform of the GameObject, that is, the coordinates (position), orientation (rotation), and size (scale) will be reflected in the drawn figure.
Next, let's take a look at the implementation of SinglePolygonLine.Shader.
+Listing 3.4: SinglePolygonLine.shader
+Shader "Custom/Single Polygon Line"
+{
+ Properties
+ {
+ _Color ("Color", Color) = (1,1,1,1)
+ _Scale ("Scale", Float) = 1
+ _Speed ("Speed",Float) = 1
+ }
+ SubShader
+ {
+ Tags { "RenderType"="Opaque" }
+ LOD 100
+
+ Pass
+ {
+ CGPROGRAM
+ #pragma vertex vert
+ #pragma geometry geom // Declaration of Geometry Shader
+ #pragma fragment frag
+ #pragma target 4.0
+
+ #include "UnityCG.cginc"
+
+ #define PI 3.14159265359
+
+ // Output structure
+ struct Output
+ {
+ float4 pos : SV_POSITION;
+ };
+
+ float4 _Color;
+ int _VertexNum;
+ float _Scale;
+ float _Speed;
+ float4x4 _TRS;
+
+ Output vert (uint id : SV_VertexID)
+ {
+ Output o;
+ o.pos = mul (_TRS, float4 (0, 0, 0, 1));
+ return o;
+ }
+
+ // Geometry shader
+ [maxvertexcount(65)]
+ void geom(point Output input[1], inout LineStream<Output> outStream)
+ {
+ Output o;
+ float rad = 2.0 * PI / (float)_VertexNum;
+ float time = _Time.y * _Speed;
+
+ float4 pos;
+
+ for (int i = 0; i <= _VertexNum; i++) {
+ pos.x = cos(i * rad + time) * _Scale;
+ pos.y = sin (i * rad + time) * _Scale;
+ pos.z = 0;
+ pos.w = 1;
+ o.pos = UnityObjectToClipPos (pos);
+
+ outStream.Append(o);
+ }
+ outStream.RestartStrip();
+ }
+
+ fixed4 frag (Output i) : SV_Target
+ {
+ return _Color;
+ }
+ ENDCG
+ }
+ }
+}
+
+A new #pragma geometry geom declaration has been added between the #pragma vertex vert and the #pragma fragment frag . This means declaring a Geometry Shader function named geom. Vertex Shader's vert sets the coordinates of the vertices to the origin (0,0,0,1) for the time being, and multiplies it by the _TRS matrix (the matrix that converts from the local coordinate system to the world coordinate system) passed from C #. It has become like. The coordinates of each vertex of the polygon are calculated in the following Geometry Shader.
+Definition of Geometry Shader
+// Geometry shader + [maxvertexcount(65)] + void geom(point Output input[1], inout LineStream<Output> outStream) ++
The maximum number of vertices output from the Geometry Shader. This time, VertexNum of the SinglePolygonLine class is used to increase the number to 64 vertices, but since a line connecting the 64th vertex to the 0th vertex is required, 65 is specified.
+ +Represents the input information from Vertex Shader. point is a primitive type and means that one vertex is received, Output is a structure name, and input [1] is an array of length 1. Since only one vertex is used this time, I specified point and input [1], but when I want to mess with the vertices of a triangular polygon such as a mesh, I use triangle and input [3].
+ +Represents the output information from the Geometry Shader. LineStream <Output> means to output the line of the Output structure. There are also PointStream and TriangleStream. Next is the explanation inside the function.
+Implementation in function
+Output o;
+float rad = 2.0 * PI / (float)_VertexNum;
+float time = _Time.y * _Speed;
+
+float4 pos;
+
+for (int i = 0; i <= _VertexNum; i++) {
+ pos.x = cos(i * rad + time) * _Scale;
+ pos.y = sin (i * rad + time) * _Scale;
+ pos.z = 0;
+ pos.w = 1;
+ o.pos = UnityObjectToClipPos (pos);
+
+ outStream.Append(o);
+}
+
+outStream.RestartStrip();
+
+In order to calculate the coordinates of each vertex of the polygon, 2π (360 degrees) is divided by the number of vertices to obtain the angle of one corner. The vertex coordinates are calculated using trigonometric functions (sin, cos) in the loop. Output the calculated coordinates as vertices with outStream.Append (o). After looping as many times as _VertexNum to output the vertices, outStream.RestartStrip () ends the current strip and starts the next strip. As long as you add it with Append (), the lines will be connected as LineStream. Execute RestartStrip () to end the current line once. The next time Append () is called, it will not connect to the previous line and a new line will start.
+[* 2] UnityGraphicsProgramming vol.1 https://indievisuallab.stores.jp/items/59edf11ac8f22c0152002588
A regular octahedron is a polyhedron composed of eight equilateral triangles , as shown in Fig. 3.4 . Octahedron Sphere is a sphere created by dividing the three vertices of an equilateral triangle that make up a regular octahedron by spherical linear interpolation * 3 . Whereas normal linear interpolation interpolates so that two points are connected by a straight line, spherical linear interpolation interpolates so that two points pass on a spherical surface as shown in Fig. 3.5 .
+
++Figure 3.4: Octahedron +
+
++Figure 3.5: Octahedron +
+Take a look at the Sample Octahedron Sample scene .
+
++Figure 3.6: SampleWaveLine scene +
+When you press the run button, you should see a slowly rotating octahedron in the center of the Game view. Also, if you change the Level slider of the Geometry Octahedron Sphere component of the Single Octahedron Sphere object in the Hierarchy window, the sides of the octahedron will be split and gradually approach the sphere.
+[* 3] spherical linear interpolation, slerp for short
Next, let's take a look at the implementation. The implementation on the C # side is almost the same as SinplePolygon2D.cs in the previous section, so it will be omitted. OctahedronSphere.shader has a long source, so I will explain only in Geometry Shader.
+Listing 3.5: The beginning of the Gometry Shader in OctahedronSphere.shader
+// Geometry shader + float4 init_vectors[24]; + // 0 : the triangle vertical to (1,1,1) + init_vectors[0] = float4(0, 1, 0, 0); + init_vectors[1] = float4(0, 0, 1, 0); + init_vectors[2] = float4(1, 0, 0, 0); + // 1 : to (1,-1,1) + init_vectors[3] = float4(0, -1, 0, 0); + init_vectors[4] = float4(1, 0, 0, 0); + init_vectors[5] = float4(0, 0, 1, 0); + // 2 : to (-1,1,1) + init_vectors[6] = float4(0, 1, 0, 0); + init_vectors[7] = float4(-1, 0, 0, 0); + init_vectors[8] = float4(0, 0, 1, 0); + // 3 : to (-1,-1,1) + init_vectors[9] = float4(0, -1, 0, 0); + init_vectors[10] = float4(0, 0, 1, 0); + init_vectors[11] = float4(-1, 0, 0, 0); + // 4 : to (1,1,-1) + init_vectors[12] = float4(0, 1, 0, 0); + init_vectors[13] = float4(1, 0, 0, 0); + init_vectors[14] = float4(0, 0, -1, 0); + // 5 : to (-1,1,-1) + init_vectors[15] = float4(0, 1, 0, 0); + init_vectors[16] = float4(0, 0, -1, 0); + init_vectors[17] = float4(-1, 0, 0, 0); + // 6 : to (-1,-1,-1) + init_vectors[18] = float4(0, -1, 0, 0); + init_vectors[19] = float4(-1, 0, 0, 0); + init_vectors[20] = float4(0, 0, -1, 0); + // 7 : to (1,-1,-1) + init_vectors[21] = float4(0, -1, 0, 0); + init_vectors[22] = float4(0, 0, -1, 0); + init_vectors[23] = float4(1, 0, 0, 0); ++
First, as shown in Fig. 3.7 , we define a "normalized" octahedron triangular system that is the initial value.
+
++Figure 3.7: Octahedron vertex coordinates and triangles +
+It is defined in float4 because it is defined as a quaternion.
+Listing 3.6: OctahedronSphere.shader Triangle Spherical Linear Interpolation Split Processing Part
+for (int i = 0; i < 24; i += 3)
+{
+ for (int p = 0; p < n; p++)
+ {
+ // edge index 1
+ float4 edge_p1 = qslerp(init_vectors[i],
+ init_vectors[i + 2], (float)p / n);
+ float4 edge_p2 = qslerp(init_vectors[i + 1],
+ init_vectors[i + 2], (float)p / n);
+ float4 edge_p3 = qslerp(init_vectors[i],
+ init_vectors[i + 2], (float)(p + 1) / n);
+ float4 edge_p4 = qslerp(init_vectors[i + 1],
+ init_vectors[i + 2], (float)(p + 1) / n);
+
+ for (int q = 0; q < (n - p); q++)
+ {
+ // edge index 2
+ float4 a = qslerp(edge_p1, edge_p2, (float)q / (n - p));
+ float4 b = qslerp(edge_p1, edge_p2, (float)(q + 1) / (n - p));
+ float4 c, d;
+
+ if(distance(edge_p3, edge_p4) < 0.00001)
+ {
+ c = edge_p3;
+ d = edge_p3;
+ }
+ else {
+ c = qslerp(edge_p3, edge_p4, (float)q / (n - p - 1));
+ d = qslerp(edge_p3, edge_p4, (float)(q + 1) / (n - p - 1));
+ }
+
+ output1.pos = UnityObjectToClipPos(input[0].pos + mul(_TRS, a));
+ output2.pos = UnityObjectToClipPos(input[0].pos + mul(_TRS, b));
+ output3.pos = UnityObjectToClipPos(input[0].pos + mul(_TRS, c));
+
+ outStream.Append(output1);
+ outStream.Append(output2);
+ outStream.Append(output3);
+ outStream.RestartStrip();
+
+ if (q < (n - p - 1))
+ {
+ output1.pos = UnityObjectToClipPos(input[0].pos + mul(_TRS, c));
+ output2.pos = UnityObjectToClipPos(input[0].pos + mul(_TRS, b));
+ output3.pos = UnityObjectToClipPos(input[0].pos + mul(_TRS, d));
+
+ outStream.Append(output1);
+ outStream.Append(output2);
+ outStream.Append(output3);
+ outStream.RestartStrip();
+ }
+ }
+ }
+}
+
+This is the part where the triangle is divided by spherical linear interpolation. n is the number of triangle divisions. edge_p1 and edge_p2 find the starting point of the triangle, and edge_p3 and ege_p4 find the midpoint of the split edge. The qslerp function is a function that finds spherical linear interpolation. The definition of qslerp is as follows:
+Listing 3.7: Definition of qslerp in Quaternion.cginc
+// a: start Quaternion b: target Quaternion t: ratio
+float4 qslerp(float4 a, float4 b, float t)
+{
+ float4 r;
+ float t_ = 1 - t;
+ float wa, wb;
+ float theta = acos(a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w);
+ float sn = sin(theta);
+ wa = sin (t_ * theta) / sn;
+ wb = sin(t * theta) / sn;
+ rx = wa * ax + wb * bx;
+ ry = wa * ay + wb * by;
+ rz = wa * az + wb * bz;
+ rw = wa * aw + wb * bw;
+ normalize(r);
+ return r;
+}
+
+Next, I will explain the flow of the triangle division process. As an example, it is the flow when the number of divisions is 2 (n = 2).
+
++Figure 3.8: Triangle division process flow 1, calculation of edges_p1 to p4 +
+Figure 3.8 shows the following code.
+Listing 3.8: Calculation of edge_p1 to p4
+for (int p = 0; p < n; p++)
+{
+ // edge index 1
+ float4 edge_p1 = qslerp(init_vectors[i],
+ init_vectors[i + 2], (float)p / n);
+ float4 edge_p2 = qslerp(init_vectors[i + 1],
+ init_vectors[i + 2], (float)p / n);
+ float4 edge_p3 = qslerp(init_vectors[i],
+ init_vectors[i + 2], (float)(p + 1) / n);
+ float4 edge_p4 = qslerp(init_vectors[i + 1],
+ init_vectors[i + 2], (float)(p + 1) / n);
+
+The coordinates of edge_p1 to edge_p4 are obtained from the three points in the init_vectors array. When p = 0, p / n = 0/2 = 0 and edge_p1 = init_vectors [0], edge_p2 = init_vectors [1]. edge_p3 and edge_p4 are between init_vectors [0] and init_vectors [2] and between init_vectors [1] and init_vectors [2] at (p + 1) / n = (0 + 1) / 2 = 0.5, respectively. .. It is a flow that mainly divides the right side of the triangle.
+ +
++Figure 3.9: Triangle division process flow 2, abcd calculation +
+Figure 3.9 shows the following code.
+Listing 3.9: Calculation of coordinates a, b, c, d
+for (int q = 0; q < (n - p); q++)
+{
+ // edge index 2
+ float4 a = qslerp(edge_p1, edge_p2, (float)q / (n - p));
+ float4 b = qslerp(edge_p1, edge_p2, (float)(q + 1) / (n - p));
+ float4 c, d;
+
+ if(distance(edge_p3, edge_p4) < 0.00001)
+ {
+ c = edge_p3;
+ d = edge_p3;
+ }
+ else {
+ c = qslerp(edge_p3, edge_p4, (float)q / (n - p - 1));
+ d = qslerp(edge_p3, edge_p4, (float)(q + 1) / (n - p - 1));
+ }
+
+The coordinates of the vertex abcd are calculated using edge_p1 to p4 obtained in the previous section. It is a flow that mainly divides the left side of the triangle. Depending on the conditions, the coordinates of edge_p3 and edge_p4 will be the same. This happens when the right side of the triangle reaches a stage where it can no longer be divided. In that case, both c and d take the lower right coordinates of the triangle.
+ +
++Figure 3.10: Flow of triangle division processing 3, output triangle abc, triangle cbd +
+Figure 3.10 shows the following code.
+Listing 3.10: Output the triangle connecting the coordinates a, b, c & the triangle connecting the coordinates c, b, d
+output1.pos = UnityObjectToClipPos(input[0].pos + mul(_TRS, a));
+output2.pos = UnityObjectToClipPos(input[0].pos + mul(_TRS, b));
+output3.pos = UnityObjectToClipPos(input[0].pos + mul(_TRS, c));
+
+outStream.Append(output1);
+outStream.Append(output2);
+outStream.Append(output3);
+outStream.RestartStrip();
+
+if (q < (n - p - 1))
+{
+ output1.pos = UnityObjectToClipPos(input[0].pos + mul(_TRS, c));
+ output2.pos = UnityObjectToClipPos(input[0].pos + mul(_TRS, b));
+ output3.pos = UnityObjectToClipPos(input[0].pos + mul(_TRS, d));
+ outStream.Append(output1);
+ outStream.Append(output2);
+ outStream.Append(output3);
+ outStream.RestartStrip();
+}
+
+Convert the calculated coordinates of a, b, c, d to the coordinates for the screen by multiplying by UnityObjectToClipPos or the world coordinate transformation matrix. After that, outStream.Append and outStream.RestartStrip output two triangles connecting a, b, c and c, b, d.
+ +
++Figure 3.11: Flow of triangle division processing 4, when q = 1 +
+When q = 1, a is 1/2 = 0.5, so it is in the middle of edge_p1 and edge_p2, and b is 1/1 = 1, so it is in the position of edge_p2. Since c is 1/1 = 1, edge_p4 is calculated, and d is calculated for the time being, but it is not used because it does not fall under the condition of if (q <(n --p -1)). Outputs a triangle connecting a, b, and c.
+ +
++Figure 3.12: Triangle division process flow 5, when p = 1 +
+This is the flow when the for statement of q ends and p = 1. Since p / n = 1/2 = 0.5, edge_p1 is between init_vectors [0] and init_vectors [2], and edge_p2 is between init_vectors [1] and init_vectors [2]. The subsequent coordinate calculation of a, b, c, d and the output of the triangles a, b, c are the same as the above processing. You have now divided one triangle into four. All the triangles of the octahedron are processed up to the above.
+ +In addition to this, we have prepared three samples that cannot be introduced due to space limitations, so please take a look if you are interested.
+
++図3.13: SampleOctahedronSphereMultiVertexInstancingシーン +
+In this chapter, we explained the application of Geometry Shader for line representation. Geometry Shader usually divides polygons and creates plate polygons of particles, but you should also try to find interesting expressions by using the property of dynamically increasing the number of vertices.
\ No newline at end of file diff --git a/html-translated/vol2/Chapter 3 _ Application of Geometry Shader for Line Representation_files/cleardot.gif b/html-translated/vol2/Chapter 3 _ Application of Geometry Shader for Line Representation_files/cleardot.gif new file mode 100644 index 0000000..1d11fa9 Binary files /dev/null and b/html-translated/vol2/Chapter 3 _ Application of Geometry Shader for Line Representation_files/cleardot.gif differ diff --git a/html-translated/vol2/Chapter 3 _ Application of Geometry Shader for Line Representation_files/element_main.js b/html-translated/vol2/Chapter 3 _ Application of Geometry Shader for Line Representation_files/element_main.js new file mode 100644 index 0000000..4c5de3c --- /dev/null +++ b/html-translated/vol2/Chapter 3 _ Application of Geometry Shader for Line Representation_files/element_main.js @@ -0,0 +1,486 @@ +(function(){/* + + Copyright The Closure Library Authors. + SPDX-License-Identifier: Apache-2.0 +*/ +var aa='" style="background-image:url(',ba="-disabled",ca="-document.getElementById('",da="/translate_a/t",ea="/translate_suggestion?client=",fa='
|
![]() |
|
Hello! It's Sugihiro Nori! I'm sorry I couldn't write an article in the previous "UnityGraphicsPrograming Vol1"! Thanks to Oishi-kun for writing on behalf of me (._.)
+Here, I would like to explain the spray program that I could not write last time. The code of Unity is a little better than the one at the time of Vol1!
+First, let's implement a simple lighting effect by ourselves without using Unity's Built-in lights. Then, as an application, I will explain the development of the process of painting 3D objects with a spray. The concept of this chapter is to follow the flow of creating your own functions by referring to UnityCG.cginc and built-in processing and applying them to new functions. I think.
+ +The samples in this chapter are in the ProjectionSpray folder at
https://github.com/IndieVisualLab/UnityGraphicsProgramming2 .
Light in Unity is extremely useful. Just install a light object and the world will be brighter. When you select a shadow from the Inspector, a shadow map is automatically created and the shadow is cast from the object.
+First of all, we will implement the light independently while watching how Unity implements the light.
+ +In Unity, you can download the material shaders included by default and the internally used CGINC files from the Unity Download Archive.
+It will be helpful when writing your own shader, and you can learn more about CG depiction, so I recommend you to download and see it!
+
++Figure 4.1: https://unity3d.com/jp/get-unity/download/archive +
+The writing-related files that may be relevant in this chapter are:
+Let's take a look at the basic Lambert Lighting. ( Listing 4.1 ) Mr. Lambert thought.
+Listing 4.1: Lighting.cginc
+ 1: struct SurfaceOutput {
+ 2: fixed3 Albedo;
+ 3: fixed3 Normal;
+ 4: fixed3 Emission;
+ 5: half Specular;
+ 6: fixed Gloss;
+ 7: fixed Alpha;
+ 8: };
+ 9: ~~
+10: inline fixed4 UnityLambertLight (SurfaceOutput s, UnityLight light)
+11: {
+12: fixed diff = max (0, dot (s.Normal, light.dir));
+13:
+14: fixed4 c;
+15: c.rgb = s.Albedo * light.color * diff;
+16: c.a = s.Alpha;
+17: return c;
+18: }
+
+In the actual lighting calculation, the diffuse value is calculated from the inner product of the direction from the light to the mesh and the normal direction of the mesh. Listing 4.1
+fixed diff = max (0, dot (s.Normal, light.dir));
For Unity Lights that are undefined in Lighting.cginc, they are defined in UnityLightingCommon.cginc and contain information about the color and direction of the lights. Listing 4.2
+Listing 4.2: UnityLightingCommon.cginc
+ 1: struct UnityLight
+ 2: {
+ 3: half3 color;
+ 4: half3 you;
+ 5:
+ 6: // Deprecated: Ndotl is now calculated on the fly
+ 7: // and is no longer stored. Do not used it.
+ 8: half ndotl;
+ 9: };
+
+Looking at the actual lighting process, I found that the calculation of lighting requires mesh normal information, so let's take a quick look at how to display mesh normal information in Shader. ..
+See the scene in 00_viewNormal.unity in the sample project .
+The object has a material that outputs normal information as a color, and its shader is shown in Listing 4.3 .
+Listing 4.3: simple-showNormal.shader
+ 1: struct appdata
+ 2: {
+ 3: float4 vertex : POSITION;
+ 4: float3 normal: NORMAL;
+ 5: };
+ 6:
+ 7: struct v2f
+ 8: {
+ 9: float3 worldPos : TEXCOORD0;
+10: float3 normal : TEXCOORD1;
+11: float4 vertex : SV_POSITION;
+12: };
+13:
+14: v2f vert (appdata v)
+15: {
+16: v2f o;
+17: o.vertex = UnityObjectToClipPos(v.vertex);
+18: o.normal = UnityObjectToWorldNormal(v.normal);
+19: return o;
+20: }
+21:
+22: half4 frag (v2f i) : SV_Target
+23: {
+24: fixed4 col = half4(i.normal,1);
+25: return col;
+26: }
+
+The vertex shader (v2f vert) calculates the normal direction of the mesh in the world coordinate system and passes it to the fragment shader (half4 frag). In the fragment shader, the passed normal information is converted to color with the x component as R, the y component as G, and the z component as B, and output as it is. Listing 4.3
+Even if the part looks black on the image, the normal x may actually have a negative value. Figure 4.2
+
++図4.2: 00_viewNormal.unity +
+The mesh is now ready for lighting.
+There is a built-in utility function in UnityCG.cginc that makes it easy to write shaders. For example, the vertex position used in Listing 4.3UnityObjectToClipPos transforms from the object (local) coordinate system to the clip coordinate system. Also, UnityObjectToWorldNormalthe function of is converting the normal direction from the object coordinate system to the world coordinate system.
For other functions, please refer to UnityCG.cginc or the official manual as it is convenient for writing shaders. https://docs.unity3d.com/ja/current/Manual/SL-BuiltinFunctions.html
+Also, if you want to know more about coordinate transformation and each coordinate system, you may be able to learn more by referring to Unity Graphics Programming vol.1 and Mr. Fukunaga's "Chapter 9 Multi Plane Perspective Projection".
+See the scene at 01_pointLight.unity in the sample project .
+
++図4.3: 01_pointLight.unity +
+A point light source is a light source that illuminates all directions from a certain point. The scene has a Buddha mesh object and a PointLight object. The PointLight object has a script ( Listing 4.4 ) for sending light information to the mesh, and based on that light information, the lighting result is displayed as a material ( Listing 4.5 ).
+Listing 4.4: PointLightComponent.cs
+ 1: using UnityEngine;
+ 2:
+ 3: [ExecuteInEditMode]
+ 4: public class PointLightComponent : MonoBehaviour
+ 5: {
+ 6: static MaterialPropertyBlock mpb;
+ 7:
+ 8: public Renderer targetRenderer;
+ 9: public float intensity = 1f;
+10: public Color color = Color.white;
+11:
+12: void Update()
+13: {
+14: if (targetRenderer == null)
+15: return;
+16: if (mpb == null)
+17: mpb = new MaterialPropertyBlock();
+18:
+19: targetRenderer.GetPropertyBlock(mpb);
+20: mpb.SetVector("_LitPos", transform.position);
+21: mpb.SetFloat("_Intensity", intensity);
+22: mpb.SetColor("_LitCol", color);
+23: targetRenderer.SetPropertyBlock(mpb);
+24: }
+25:
+26: private void OnDrawGizmos()
+27: {
+28: Gizmos.color = color;
+29: Gizmos.DrawWireSphere(transform.position, intensity);
+30: }
+31: }
+
+This component passes the position, intensity and color of the light to the target mesh. And, the material to perform the writing process is set based on the received information.
+Values are set from CSharp for each property of "_LitPos", "_LitCol"and "_Intensity"of the material .
Listing 4.5: simple-pointLight.shader
+ 1: Shader "Unlit/Simple/PointLight-Reciever"
+ 2: {
+ 3: Properties
+ 4: {
+ 5: _LitPos("light position", Vector) = (0,0,0,0)
+ 6: _LitCol("light color", Color) = (1,1,1,1)
+ 7: _Intensity("light intensity", Float) = 1
+ 8: }
+ 9: SubShader
+10: {
+11: Tags { "RenderType"="Opaque" }
+12: LOD 100
+13:
+14: Pass
+15: {
+16: CGPROGRAM
+17: #pragma vertex vert
+18: #pragma fragment frag
+19:
+20: #include "UnityCG.cginc"
+21:
+22: struct appdata
+23: {
+24: float4 vertex : POSITION;
+25: float3 normal: NORMAL;
+26: };
+27:
+28: struct v2f
+29: {
+30: float3 worldPos : TEXCOORD0;
+31: float3 normal : TEXCOORD1;
+32: float4 vertex : SV_POSITION;
+33: };
+34:
+35: half4 _LitPos, _LitCol;
+36: half _Intensity;
+37:
+38: v2f vert (appdata v)
+39: {
+40: v2f o;
+41: o.vertex = UnityObjectToClipPos(v.vertex);
+42: o.worldPos = mul(unity_ObjectToWorld, v.vertex).xyz;
+43: // Pass the position of the mesh in the world coordinate system to the fragment shader
+44: o.normal = UnityObjectToWorldNormal(v.normal);
+45: return o;
+46: }
+47:
+48: fixed4 frag (v2f i) : SV_Target
+49: {
+50: half3 to = i.worldPos - _LitPos;
+51: // Vector from light position to mesh position
+52: half3 lightDir = normalize(to);
+53: half dist = length(to);
+54: half atten =
+55: _Intensity * dot(-lightDir, i.normal) / (dist * dist);
+56:
+57: half4 col = max(0.0, atten) * _LitCol;
+58: return col;
+59: }
+60: ENDCG
+61: }
+62: }
+63: }
+
+The lighting calculation is based on the basic Lambert Lighting ( Listing 4.1 ) calculation, and the intensity is attenuated in inverse proportion to the square of the distance. Listing 4.5
+half atten = _Intensity * dot(-lightDir, i.normal) / (dist * dist);
It's a simple system of one light for one model, but I was able to implement the lighting process.
+
++図4.4: 01_pointLight.unity +
+Next, let's implement the spotlight. Unlike point lights, spotlights are directional lights that emit light in one direction.
+
++図4.5: 02_spotLight.unity +
+We are using standard Unity lights here just for the Gizmo display of the spotlights. Figure 4.5
+Since the spotlight is directional, the direction of the light and the spot angle (angle) information will be added to the point light in addition to the position information. This information is of light worldToLightMatrix, projectionMatrixrespectively, as Matrix4x4(that's Shader float4x4Passes in the properties of).
In addition, the spotlight can also set a Light Cookie. (Unity has a default LightCookie, but I couldn't select it from the editor, so I'm using the Default-Particle texture)
+Listing 4.6: SpotLightComponent.cs
+ 1: using UnityEngine;
+ 2:
+ 3: [ExecuteInEditMode]
+ 4: public class SpotLightComponent : MonoBehaviour
+ 5: {
+ 6: static MaterialPropertyBlock mpb;
+ 7:
+ 8: public Renderer targetRenderer;
+ 9: public float intensity = 1f;
+10: public Color color = Color.white;
+11: [Range(0.01f, 90f)] public float angle = 30f;
+12: public float range = 10f;
+13: public Texture cookie;
+14:
+15: void Update()
+16: {
+17: if (targetRenderer == null)
+18: return;
+19: if (mpb == null)
+20: mpb = new MaterialPropertyBlock();
+21:
+22: // Calculating projectionMatrix
+23: var projMatrix = Matrix4x4.Perspective(angle, 1f, 0f, range);
+24: var worldToLightMatrix = transform.worldToLocalMatrix;
+25:
+26: targetRenderer.GetPropertyBlock(mpb);
+27: mpb.SetVector("_LitPos", transform.position);
+28: mpb.SetFloat("_Intensity", intensity);
+29: mpb.SetColor("_LitCol", color);
+30: mpb.SetMatrix("_WorldToLitMatrix", worldToLightMatrix);
+31: mpb.SetMatrix("_ProjMatrix", projMatrix);
+32: mpb.SetTexture("_Cookie", cookie);
+33: targetRenderer.SetPropertyBlock(mpb);
+34: }
+35: }
+
+projectionMatrix is Matrix4x4.Perspective(angle, 1f, 0f, range)calculated by.
Shader calculates and displays the lighting process based on the parameter information received from the spotlight. Listing 4.7
+Listing 4.7: simple-spotLight.shader
+ 1: uniform float4x4 _ProjMatrix, _WorldToLitMatrix;
+ 2:
+ 3: sampler2D _Cookie;
+ 4: half4 _LitPos, _LitCol;
+ 5: half _Intensity;
+ 6:
+ 7: ~~
+ 8:
+ 9: fixed4 frag (v2f i) : SV_Target
+10: {
+11: half3 to = i.worldPos - _LitPos.xyz;
+12: half3 lightDir = normalize(to);
+13: half dist = length(to);
+14: half atten = _Intensity * dot(-lightDir, i.normal) / (dist * dist);
+15:
+16: half4 lightSpacePos = mul(_WorldToLitMatrix, half4(i.worldPos, 1.0));
+17: half4 projPos = mul(_ProjMatrix, lightSpacePos);
+18: projPos.z * = -1;
+19: half2 litUv = projPos.xy / projPos.z;
+20: litUv = litUv * 0.5 + 0.5;
+21: half lightCookie = tex2D(_Cookie, litUv);
+22: lightCookie *=
+23: 0<litUv.x && litUv.x<1 && 0<litUv.y && litUv.y<1 && 0<projPos.z;
+24:
+25: half4 col = max(0.0, atten) * _LitCol * lightCookie;
+26: return col;
+27: }
+28:
+
+You can see that it's basically the same as a point light except for the fragment shader. Listing 4.7
+From the 16th line to the 22nd line, the intensity at each point of the spotlight is calculated. When viewed from the position of the light, the light cookie at that point is sampled to determine whether the point is within the range of the light and the intensity of the light.
+
++図4.6: 02_spotLight.unity +
+For the spotlight cookie processing, UnitySpotCookie()the part in the built-in CGINC, AutoLight.cginc will be helpful.
Listing 4.8: AutoLight.cginc
+ 1: #ifdef SPOT
+ 2: sampler2D _LightTexture0;
+ 3: unityShadowCoord4x4 unity_WorldToLight;
+ 4: sampler2D _LightTextureB0;
+ 5: inline fixed UnitySpotCookie(unityShadowCoord4 LightCoord)
+ 6: {
+ 7: return tex2D(
+ 8: _LightTexture0,
+ 9: LightCoord.xy / LightCoord.w + 0.5
+10: ).w;
+11: }
+12: inline fixed UnitySpotAttenuate(unityShadowCoord3 LightCoord)
+13: {
+14: return tex2D(
+15: _LightTextureB0,
+16: dot(LightCoord, LightCoord).xx
+17: ).UNITY_ATTEN_CHANNEL;
+18: }
+19: #define UNITY_LIGHT_ATTENUATION(destName, input, worldPos) \
+20: unityShadowCoord4 lightCoord = mul( \
+21: unity_WorldToLight, \
+22: unityShadowCoord4(worldPos, 1) \
+23: ); \
+24: fixed shadow = UNITY_SHADOW_ATTENUATION(input, worldPos); \
+25: fixed destName = \
+26: (lightCoord.z > 0) * \
+27: UnitySpotCookie(lightCoord) * \
+28: UnitySpotAttenuate(lightCoord.xyz) * shadow;
+29: #endif
+
+Finally, as a lighting implementation, let's implement a shadow.
+Light comes out of the light, the mesh that is directly exposed to light becomes brighter, there is something else between the light and the mesh, and the mesh that is blocked by light becomes darker. This is the shadow.
+As a procedure, roughly
+It will be in the form of. This time we need a depth texture from the position of the light, so we'll add a Camera component to SpotLight to create the depth texture as seen from the light.
++図4.7: 03_spotLight-withShadow.unity +
+Camera (built-in) is attached to SpotLightComponent (self-made). Figure 4.7
+Listing 4.9: SpotLightWithShadow.cs
+ 1: Shader depthRenderShader {
+ 2: get { return Shader.Find("Unlit/depthRender"); }
+ 3: }
+ 4:
+ 5: new Camera camera
+ 6: {
+ 7: get
+ 8: {
+ 9: if (_c == null)
+10: {
+11: _c = GetComponent<Camera>();
+12: if (_c == null)
+13: _c = gameObject.AddComponent<Camera>();
+14: depthOutput = new RenderTexture(
+15: shadowMapResolution,
+16: shadowMapResolution,
+17: 16,
+18: RenderTextureFormat.RFloat
+19: );
+20: depthOutput.wrapMode = TextureWrapMode.Clamp;
+21: depthOutput.Create();
+22: _c.targetTexture = depthOutput;
+23: _c.SetReplacementShader(depthRenderShader, "RenderType");
+24: _c.clearFlags = CameraClearFlags.Nothing;
+25: _c.nearClipPlane = 0.01f;
+26: _c.enabled = false;
+27: }
+28: return _c;
+29: }
+30: }
+31: Room _c;
+32: RenderTexture depthOutput;
+33:
+34: void Update()
+35: {
+36: if (mpb == null)
+37: mpb = new MaterialPropertyBlock();
+38:
+39: var currentRt = RenderTexture.active;
+40: RenderTexture.active = depthOutput;
+41: GL.Clear(true, true, Color.white * camera.farClipPlane);
+42: camera.fieldOfView = angle;
+43: camera.nearClipPlane = 0.01f;
+44: camera.farClipPlane = range;
+45: camera.Render();
+46: // Camera rendering is done manually in the script
+47: RenderTexture.active = currentRt;
+48:
+49: var projMatrix = camera.projectionMatrix;
+50: // Use the camera's projection matrix
+51: var worldToLightMatrix = transform.worldToLocalMatrix;
+52:
+53: ~~
+54: }
+
+The C # script is almost the same as the shadowless version, but with the camera set to render the depth texture and the ReplacementShader to render the depth. Also, since we have a camera this time Matrix4x4.Perspective, Camera.projectionMatrixwe will use the projection matrix instead of .
The shader for depth texture generation looks like this:
+Listing 4.10: depthRender.shader
+ 1: v2f vert (float4 pos : POSITION)
+ 2: {
+ 3: v2f o;
+ 4: o.vertex = UnityObjectToClipPos(pos);
+ 5: o.depth = abs(UnityObjectToViewPos(pos).z);
+ 6: return o;
+ 7: }
+ 8:
+ 9: float frag (v2f i) : SV_Target
+10: {
+11: return i.depth;
+12: }
+
+The generated depth texture ( Fig. 4.8 ) outputs the z-coordinate value of the position of the object in the light coordinate system (camera coordinate system).
+
++図4.8: light depthTexture +
+depthOutputPass the generated depth texture ( ) to the mesh object and render the object. The calculated part of the shadow of the object looks like this:
Listing 4.11: simple-spotLight-withShadow.shader
+ 1: fixed4 frag (v2f i) : SV_Target
+ 2: {
+ 3: ///diffuse lighting
+ 4: half3 to = i.worldPos - _LitPos.xyz;
+ 5: half3 lightDir = normalize(to);
+ 6: half dist = length(to);
+ 7: half atten = _Intensity * dot(-lightDir, i.normal) / (dist * dist);
+ 8:
+ 9: ///spot-light cookie
+10: half4 lightSpacePos = mul(_WorldToLitMatrix, half4(i.worldPos, 1.0));
+11: half4 projPos = mul(_ProjMatrix, lightSpacePos);
+12: projPos.z * = -1;
+13: half2 litUv = projPos.xy / projPos.z;
+14: litUv = litUv * 0.5 + 0.5;
+15: half lightCookie = tex2D(_Cookie, litUv);
+16: lightCookie *=
+17: 0<litUv.x && litUv.x<1 && 0<litUv.y && litUv.y<1 && 0<projPos.z;
+18:
+19: ///shadow
+20: half lightDepth = tex2D(_LitDepth, litUv).r;
+21: // _LitDepth is passed the depth texture seen from the light
+22: atten *= 1.0 - saturate(10*abs(lightSpacePos.z) - 10*lightDepth);
+23:
+24: half4 col = max(0.0, atten) * _LitCol * lightCookie;
+25: return col;
+26: }
+
+Made the depth texture by the camera tex2D(_LitTexture, litUv).rand lightSpacePos.zis, z value of the vertex position of both viewed from the light object is stored. Since the texture is _LitTexturethe information of the surface seen from the light = the surface exposed to the light, it is judged whether it is a shadow or not by comparing it with the value ( lightDepth) sampled from the depth texture lightSpacePos.z.
atten *= 1.0 - saturate(10*abs(lightSpacePos.z) - 10*lightDepth);
In the code here, the lightDepthhigher lightSpacePos.zthe value, the darker the surface.
++図4.9: 03_spotLight-withShadow.unity +
+You can now see the shadow of the object in the spotlight.
+Using this spotlight and shadow implementation, we will implement a spray function that colors objects in real time.
+Unity scene: in Example, compareMatrix.unity
+Listing 4.12: CompareMatrix.cs
+1: float fov = 30f; + 2: float near = 0.01f; + 3: float far = 1000f; + 4: + 5: camera.fieldOfView = fov; + 6: camera.nearClipPlane = near; + 7: camera.farClipPlane = far; + 8: + 9: Matrix4x4 cameraMatrix = camera.projectionMatrix; +10: Matrix4x4 perseMatrix = Matrix4x4.Perspective( +11: fov, +12: 1f, +13: near, +14: far +15: ); ++
From here, we will apply our own SpotLightComponent to implement the spray function that allows you to paint objects.
+Basically, it draws on the texture of the object based on the value of the lighting intensity. Since the Buddha object used this time does not have uv data, it is not possible to paste the texture as it is, but Unity has a function to generate UV for LightMap.
+
++図4.10: buddha Import Setting +
+If you check the "Generate Lightmap UVs" item in the model Import Setting, UVs for the lightmap will be generated. ( v.uv2 : TEXCOORD1) Create a drawable RenderTexture for this Uv2 and draw it.
See 00_showUv2.unity for a sample scene .
+In order to write to the texture that maps to mesh.uv2, we need to generate a texture that is expanded from the mesh to UV2. First, let's create a shader that expands the vertices of the mesh to the coordinates of UV2.
+
++Figure 4.11: 00_showUv2.unity +
+Selecting a Buddha object in the scene and manipulating the material's "slider" parameter will change the object from its original shape to its Uv2 expanded shape. Coloring is, uv2.xywo color.rghas been assigned to.
Listing 4.13: showUv2.shader
+ 1: float _T;
+ 2:
+ 3: v2f vert(appdata v)
+ 4: {
+ 5: #if UNITY_UV_STARTS_AT_TOP
+ 6: v.uv2.y = 1.0 - v.uv2.y;
+ 7: #endif
+ 8: float4 pos0 = UnityObjectToClipPos(v.vertex);
+ 9: float4 pos1 = float4 (v.uv2 * 2.0 - 1.0, 0.0, 1.0);
+10:
+11: v2f o;
+12: o.vertex = lerp(pos0, pos1, _T);
+13: o.uv2 = v.uv2;
+14: o.worldPos = mul(unity_ObjectToWorld, v.vertex).xyz;
+15: o.normal = UnityObjectToWorldNormal(v.normal);
+16: return o;
+17: }
+18:
+19: half4 frag(v2f i) : SV_Target
+20: {
+21: return half4(i.uv2,0,1);
+22: }
+
+float4 pos1 = float4(v.uv2*2.0 - 1.0, 0.0, 1.0);The value of is the position expanded to Uv2 in the clip coordinate system. Listing 4.13
Since we are passing the value of worldPosand to normalthe fragment shader, we will use this value to handle the lighting in the spotlight calculation.
++Figure 4.12: 00_showUv2.unity +
+You can generate textures expanded from mesh to Uv2!
+ +Now that we're ready, we'll implement the spray functionality. See the scene at 01_projectionSpray.unity .
+
++図4.13: 01_projectionSpray.unity +
+As you run this scene, the black Buddha objects will gradually become colored. Then, when you click on the screen, that part will be sprayed with a colorful color.
+In terms of implementation content, it is an application of the self-made spotlight that has been implemented so far. The spotlight lighting calculation is not used for lighting RenderTextureas it is, but is used for updating. In this example, the texture you wrote is mapped to the one generated for the lightmap mesh.uv2.
DrawableIs a component attached to the object to be sprayed and is being drawn into the texture. ProjectionSprayThe component Materialsets properties for drawing on the texture, such as the position of the spray . As the processing flow , we call in DrawableControllerthe Updatefunction of projectionSpray.Draw(drawable)and draw on the texture.
Material drawMat: Material for drawingUpdateDrawingMat(): Update material settings before drawingDraw(Drawable drawable)Pass : drawMatto drawable.Draw(Material mat)and draw.Listing 4.14: projectionSpray.cs
+ 1: public class ProjectionSpray : MonoBehaviour {
+ 2:
+ 3: public Material drawingMat;
+ 4:
+ 5: public float intensity = 1f;
+ 6: public Color color = Color.white;
+ 7: [Range(0.01f, 90f)] public float angle = 30f;
+ 8: public float range = 10f;
+ 9: public Texture cookie;
+10: public int shadowMapResolution = 1024;
+11:
+12: Shader depthRenderShader {
+13: get { return Shader.Find("Unlit/depthRender"); }
+14: }
+15:
+16: new Camera camera{get{~~}}
+17: Room _c;
+18: RenderTexture depthOutput;
+19:
+20: public void UpdateDrawingMat()
+21: {
+22: var currentRt = RenderTexture.active;
+23: RenderTexture.active = depthOutput;
+24: GL.Clear(true, true, Color.white * camera.farClipPlane);
+25: camera.fieldOfView = angle;
+26: camera.nearClipPlane = 0.01f;
+27: camera.farClipPlane = range;
+28: camera.Render();
+29: // Update depth texture
+30: RenderTexture.active = currentRt;
+31:
+32: var projMatrix = camera.projectionMatrix;
+33: var worldToDrawerMatrix = transform.worldToLocalMatrix;
+34:
+35: drawingMat.SetVector("_DrawerPos", transform.position);
+36: drawingMat.SetFloat("_Emission", intensity * Time.smoothDeltaTime);
+37: drawingMat.SetColor("_Color", color);
+38: drawingMat.SetMatrix("_WorldToDrawerMatrix", worldToDrawerMatrix);
+39: drawingMat.SetMatrix("_ProjMatrix", projMatrix);
+40: drawingMat.SetTexture("_Cookie", cookie);
+41: drawingMat.SetTexture("_DrawerDepth", depthOutput);
+42: // The property name is different, but the information passed is the same as the spotlight.
+43: }
+44:
+45: public void Draw(Drawable drawable)
+46: {
+47: drawable.Draw(drawingMat);
+48: // The drawing process itself is done with Drawable.
+49: // Projection Spray has the Material to draw.
+50: }
+51: }
+
+The object to be drawn by spraying. It has a texture for drawing. Start()In the function RenderTexturecreates a. It uses the classic Ping-pong Buffer.
Let's see the processing of the part that draws on the texture
+Listing 4.15: Drawable.cs
+ 1: // This function is called from projectionSpray.Draw (Drawable drawable)
+ 2: public void Draw(Material drawingMat)
+ 3: {
+ 4: drawingMat.SetTexture("_MainTex", pingPongRts[0]);
+ 5: // Set the current state of the texture to be drawn as the material.
+ 6:
+ 7: var currentActive = RenderTexture.active;
+ 8: RenderTexture.active = pingPongRts[1];
+ 9: // Set the texture to be drawn.
+10: GL.Clear(true, true, Color.clear);
+11: // Clear the texture to be drawn.
+12: drawingMat.SetPass(0);
+13: Graphics.DrawMeshNow(mesh, transform.localToWorldMatrix);
+14: // Updated texture with target mesh and transform values to draw.
+15: RenderTexture.active = currentActive;
+16:
+17: Swap(pingPongRts);
+18:
+19: if(fillCrack!=null)
+20: {
+21: // This is a process to prevent cracks from forming at the joints of Uv.
+22: Graphics.Blit(pingPongRts[0], pingPongRts[1], fillCrack);
+23: Swap(pingPongRts);
+24: }
+25:
+26: Graphics.CopyTexture(pingPongRts[0], output);
+27: // Copy the updated texture to output
+28: }
+
+The point here is that we are updating Graphics.DrawMeshNow(mesh, matrix)using RenderTexture. Since the vertex shader of ( Listing 4.15 ) expands drawingMatthe vertices of mesh.uv2the mesh into the shape of, it is possible to update the texture after passing the vertex position, normal, and transform information of the mesh to the fragment shader. .. ( Listing 4.16 )
Listing 4.16: ProjectionSpray.shader
+ 1: v2f vert (appdata v)
+ 2: {
+ 3: v.uv2.y = 1.0 - v.uv2.y;
+ 4: // Invert and!
+ 5:
+ 6: v2f o;
+ 7: o.vertex = float4(v.uv2*2.0 - 1.0, 0.0, 1.0);
+ 8: // Same process as showUv2!
+ 9: o.uv = v.uv2;
+10: o.worldPos = mul(unity_ObjectToWorld, v.vertex).xyz;
+11: o.normal = UnityObjectToWorldNormal(v.normal);
+12: return o;
+13: }
+14:
+15: sampler2D _MainTex;
+16:
+17: uniform float4x4 _ProjMatrix, _WorldToDrawerMatrix;
+18:
+19: sampler2D _Cookie, _DrawerDepth;
+20: half4 _DrawerPos, _Color;
+21: half _Emission;
+22:
+23: half4 frag (v2f i) : SV_Target
+24: {
+25: ///diffuse
+26: half3 to = i.worldPos - _DrawerPos.xyz;
+27: half3 dir = normalize(to);
+28: half dist = length(to);
+29: half atten = _Emission * dot(-dir, i.normal) / (dist * dist);
+30:
+31: ///spot cookie
+32: half4 drawerSpacePos = mul(
+33: _WorldToDrawerMatrix,
+34: half4(i.worldPos, 1.0)
+35: );
+36: half4 projPos = mul(_ProjMatrix, drawerSpacePos);
+37: projPos.z * = -1;
+38: half2 drawerUv = projPos.xy / projPos.z;
+39: drawerUv = drawerUv * 0.5 + 0.5;
+40: half cookie = tex2D(_Cookie, drawerUv);
+41: cookie *=
+42: 0<drawerUv.x && drawerUv.x<1 &&
+43: 0<drawerUv.y && drawerUv.y<1 && 0<projPos.z;
+44:
+45: ///shadow
+46: half drawerDepth = tex2D(_DrawerDepth, drawerUv).r;
+47: atten *= 1.0 - saturate(10 * abs(drawerSpacePos.z) - 10 * drawerDepth);
+48: // So far, it's the same as spotlight processing!
+49:
+50: i.uv.y = 1 - i.uv.y;
+51: half4 col = tex2D(_MainTex, i.uv);
+52: // _MainTex is assigned drawable.pingPongRts [0]
+53: col.rgb = lerp(
+54: col.rgb,
+55: _Color.rgb,
+56: saturate(col.a * _Emission * atten * cookie)
+57: );
+58: // This is the process of drawing!
+59: // Complementing the original texture to the drawn color according to the calculated lighting intensity.
+60:
+61: col.a = 1;
+62: return col;
+63: // The value is output to drawable.pingPongRts [1]
+64: }
+
+You can now spray the 3D model. ( Fig. 4.14 )
+
++図4.14: 01_projectionSpray.unity +
+If you look at UnityCG.cginc, Lighting.cginc, etc., the built-in processing is written, and it will be a reference to implement various processing, so it is good to see it!
\ No newline at end of file diff --git a/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha-importSetting.png b/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha-importSetting.png new file mode 100644 index 0000000..8469c11 Binary files /dev/null and b/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha-importSetting.png differ diff --git a/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha-spray.png b/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha-spray.png new file mode 100644 index 0000000..e53f582 Binary files /dev/null and b/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha-spray.png differ diff --git a/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha-uv2.png b/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha-uv2.png new file mode 100644 index 0000000..2d7fa3f Binary files /dev/null and b/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha-uv2.png differ diff --git a/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha_normal.png b/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha_normal.png new file mode 100644 index 0000000..1909d87 Binary files /dev/null and b/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha_normal.png differ diff --git a/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha_pointLight.png b/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha_pointLight.png new file mode 100644 index 0000000..e2e8954 Binary files /dev/null and b/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha_pointLight.png differ diff --git a/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha_spotLight.png b/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha_spotLight.png new file mode 100644 index 0000000..6e25212 Binary files /dev/null and b/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha_spotLight.png differ diff --git a/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha_spotShadow.png b/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha_spotShadow.png new file mode 100644 index 0000000..0ff3dff Binary files /dev/null and b/html-translated/vol2/Chapter 4 _ Projection Spray_files/buddha_spotShadow.png differ diff --git a/html-translated/vol2/Chapter 4 _ Projection Spray_files/cleardot.gif b/html-translated/vol2/Chapter 4 _ Projection Spray_files/cleardot.gif new file mode 100644 index 0000000..1d11fa9 Binary files /dev/null and b/html-translated/vol2/Chapter 4 _ Projection Spray_files/cleardot.gif differ diff --git a/html-translated/vol2/Chapter 4 _ Projection Spray_files/depth.png b/html-translated/vol2/Chapter 4 _ Projection Spray_files/depth.png new file mode 100644 index 0000000..05b0058 Binary files /dev/null and b/html-translated/vol2/Chapter 4 _ Projection Spray_files/depth.png differ diff --git a/html-translated/vol2/Chapter 4 _ Projection Spray_files/downloadBuilt-inShader.png b/html-translated/vol2/Chapter 4 _ Projection Spray_files/downloadBuilt-inShader.png new file mode 100644 index 0000000..ea65696 Binary files /dev/null and b/html-translated/vol2/Chapter 4 _ Projection Spray_files/downloadBuilt-inShader.png differ diff --git a/html-translated/vol2/Chapter 4 _ Projection Spray_files/element_main.js b/html-translated/vol2/Chapter 4 _ Projection Spray_files/element_main.js new file mode 100644 index 0000000..4c5de3c --- /dev/null +++ b/html-translated/vol2/Chapter 4 _ Projection Spray_files/element_main.js @@ -0,0 +1,486 @@ +(function(){/* + + Copyright The Closure Library Authors. + SPDX-License-Identifier: Apache-2.0 +*/ +var aa='" style="background-image:url(',ba="-disabled",ca="-document.getElementById('",da="/translate_a/t",ea="/translate_suggestion?client=",fa='
|
![]() |
|
In this chapter, we will explain the noise used in computer graphics. Noise was developed in the 1980s as a new method of image generation for texture mapping. Texture mapping, which attaches an image to an object to create its complexity, is a well-known technique in today's CG, but computers at that time had very limited storage space. Using image data for texture mapping was not compatible with the hardware. Therefore, a method for procedurally generating this noise pattern was devised. Naturally occurring substances and phenomena such as mountains, desert-like terrain, clouds, water surfaces, flames, marble, grain, rocks, crystals, and foam films have visual complexity and regular patterns. .. Noise can generate the best texture patterns for expressing such naturally occurring substances and phenomena, and has become an indispensable technique when procedurally wanting to generate graphics. Typical noise algorithms are Ken Perlin 's achievements, Perlin Noise and Simplex Noise . Here, as a stepping stone to many applications of noise, I would like to explain mainly the algorithms of these noises and the implementation by shaders.
+The sample data in this chapter is from the Common Unity Sample Project.
+Assets/TheStudyOfProceduralNoise
+It is in. Please also refer to it.
+ +The word noise means a noisy sound that can be translated as noise in the field of audio, and also in the field of video, it usually refers to general unnecessary information for the content to be processed or to show image roughness. Also used. Noise in computer graphics is a function that takes an N-dimensional vector as an input and returns a scalar value (one-dimensional value) of a random pattern with the following characteristics.
+
++Figure 5.1: Noise characteristics +
+Noise can be used for the following purposes by receiving an N-dimensional vector as an input.
+
++Figure 5.2: Noise application +
+We will explain the algorithms for Value Noise , Perlin Noise , Improved Perlin Noise , and Simplex Noise .
+ +Although it does not strictly meet the conditions and accuracy of a noise function, we will introduce a noise algorithm called Value Noise , which is the easiest to implement and helps you understand noise.
+ +For two dimensions, define an evenly spaced grid on each of the x and y axes. The grid has a square shape, and at each of these grid points, the value of the pseudo-random number is calculated with reference to the coordinate values of the grid points. In the case of 3D, a grid is defined at equal intervals on each of the x, y, and z axes, and the shape of the grid is a cube.
+
++Figure 5.3: Lattice (2D), Lattice (3D) +
+A random number is a sequence of numbers that are randomly arranged so that they have the same probability of appearing. There are also random numbers called true random numbers and pseudo-random numbers. For example, when rolling a dice, it is impossible to predict the next roll from the previous roll, and such a random number is a true random number. Is called. On the other hand, those with regularity and reproducibility are called pseudo-random numbers (Pseudo Random) . (When a computer generates a random number sequence, it is calculated by a deterministic calculation, so most of the generated random numbers can be said to be pseudo-random numbers.) When calculating noise, the same result can be obtained by using common parameters. Use the pseudo-random number that gives.
+
++Figure 5.4: Pseudo-random numbers +
+By giving the coordinate values of each grid point to the argument of the function that generates this pseudo-random number, the value of the pseudo-random number unique to each grid point can be obtained.
+
++Figure 5.5: Pseudo-random numbers on each grid point +
+There are values A and B, and the value of P between them changes linearly from A to B, and finding that value approximately is called linear interpolation . This is the simplest interpolation method, but if you use it to find the value between the grid points, the change in the value will be sharp at the start and end points of the interpolation (near the grid point).
+Therefore, we use a cubic Hermitian curve as the interpolation factor so that the values change smoothly .
+f\left( t\right) =3t^{2}-2t^{3}
+
+When this is changed t=0from t=1to, the value will be as shown in the lower right figure.
++Figure 5.6: Linear interpolation in a two-dimensional plane (left), cubic Hermitian curve +
+* The cubic Hermitian curve is implemented as a smoothstep function in GLSL and HLSL .
+This interpolation function is used to interpolate the values obtained at each grid point on each axis. In the case of 2D, first interpolate for x at both ends of the grid, then interpolate those values for the y-axis, and perform a total of 3 calculations. In the case of 3D, as shown in the figure below, 4 interpolations are performed for the z-axis, 2 for the y-axis, and 1 for the x-axis, for a total of 7 interpolations.
+
++Figure 5.7: Interpolation (2D space), Interpolation (3D space) +
+I will explain about 2D. Find the coordinates of each grid point.
+floor() ++
The integer part is floor()calculated using a function. floor()Is a function that returns the smallest integer less than or equal to the input real number. When a real number of 1.0 or more is given to the input value, the values 1, 2, 3 ... are obtained and the same values are obtained at equal intervals, so this can be used as the coordinate value of the grid.
Use a frac()function to find the decimal part .
frac() ++
frac()Returns the decimal value of the given real number and takes a value greater than or equal to 0 and less than 1. This allows you to get the coordinate values inside each grid.
// Coordinate values of grid points +float2 i00 = i; +float2 i10 = i + float2 (1.0, 0.0); +float2 i01 = i + float2(0.0, 1.0); +float2 i11 = i + float2 (1.0, 1.0); ++
If you assign the coordinate values obtained above to the fragment colors R and G, you will get the following image. (For the integer part, since it can take a value of 1 or more, it is scaled so that the result does not exceed 1 for visualization.)
+
++Figure 5.8: Integer and minority parts drawn as RG +
+Searching the internet for the random function often returns this function as a result.
+float rand(float2 co)
+{
+ return frac(sin(dot(co.xy, float2(12.9898,78.233))) * 43758.5453);
+}
+
+Looking at the processing one by one, first, the input two-dimensional vector is rounded to one dimension by the inner product to make it easier to handle, and it is given as an argument of the sin function, multiplied by a large number, and the decimal part is obtained. So, this gives us regular and reproducible, but chaotically continuous values.
+The origin of this function is uncertain,
+https://stackoverflow.com/questions/12964279/whats-the-origin-of-this-glsl-rand-one-liner
+According to the report, it originated from a treatise called "On generating random numbers, with help of y = [(a + x) sin (bx)] mod 1" published in 1998 .
+Although it is simple and easy to handle, the cycle in which the same random number sequence appears is short, and if the texture has a large resolution, a pattern that can be visually confirmed occurs, so it is not a very good pseudo-random number.
+// Pseudo-random value on the coordinates of the grid points +float n00 = pseudoRandom(i00); +float n10 = pseudoRandom(i10); +float n01 = pseudoRandom(i01); +float n11 = pseudoRandom(i11); ++
By giving the coordinate value (integer) of each grid point to the argument of the pseudo-random number, the noise value on each grid point is obtained.
+ +// Interpolation function (3rd order Hermitian curve) = smoothstep
+float2 interpolate(float2 t)
+{
+ return t * t * (3.0 - 2.0 * t);
+}
+
+
+// Find the interpolation factor +float2 u = interpolate(f); +// Interpolation of 2D grid +return lerp(lerp(n00, n10, u.x), lerp(n01, n11, u.x), u.y); + ++
interpolate()Calculate the interpolation factor with a predefined function. By using the decimal part of the grid as an argument, you can obtain a curve that changes smoothly near the start and end points of the grid.
lerp()Is a function that performs linear interpolation and stands for Linear Interpolate . It is possible to calculate the linearly interpolated value of the values given to the first and second arguments, and by substituting u obtained as the interpolation coefficient into the third argument, the values between the grids can be connected smoothly.
++Figure 5.9: Interpolation of grid points (two-dimensional space) +
+In the sample project
+TheStudyOfProceduralNoise/Scenes/ShaderExampleList
+When you open the scene, you can see the implementation result of Value Noise . For the code,
+There is an implementation in.
+
++Figure 5.10: Value Noise (2D, 3D, 4D) Drawing result +
+If you look at the result image, you can see that the shape of the grid can be seen to some extent. As you can see, Value Noise is easy to implement, but its isotropic property that its characteristics are invariant when a certain area is rotated is not guaranteed, and it is not enough to be called noise. However, the process of "interpolating the values of pseudo-random numbers obtained from regularly arranged grid points to obtain continuous and smooth values of all points in space" performed in the implementation of Value Noise is , Has the basic algorithmic structure of the noise function.
+ +Perlin Noise is a traditional and representative method of procedural noise and was developed by its name, Ken Perlin . Originally, it was produced in the experiment of texture generation for visual expression of the American science fiction movie "Tron" produced in 1982, which is known as the world's first movie that fully introduced computer graphics, and the result. Was published in a 1985 SIGGRAPH paper entitled "An Image Synthesizer" .
+ +The difference from Value Noise is that the value of the grid point noise is not defined as a one-dimensional value, but as a gradient with a slope (Gradient) . Define a 2D gradient for 2D and a 3D gradient for 3D.
+
++Figure 5.11: Perlin Noise Gradient Vector +
+Inner product is
+\overrightarrow {a}\cdot \overrightarrow {b}=\left| a\right| \left| b\right| \cos \theta
+= \left( a.x\ast b.x\right) +\left( a.y\ast b.y\right)
+
+In the vector operation defined in, the geometric meaning is the ratio of how much the two vectors are oriented in the same direction, and the values taken by the inner product are the same direction → 1 , orthogonal → 0 , and vice versa. Orientation → -1 . In other words, finding the inner product of the gradient and the vector from each grid point toward the point P where you want to find the noise value in the grid means that if those vectors point in the same direction, the high noise value will be different. If you are facing the direction, a small value will be returned.
+
++Figure 5.12: Dot Product (Left) Perlin Noise Gradient and Interpolation Vector (Right) +
+Here, the cubic Hermitian curve is used as a function for interpolation, but Ken Perlin later modified it to a cubic Hermitian curve. We 'll talk about that in the Improved Perlin Noise section.
+ +In the sample project
+TheStudyOfProceduralNoise/Scenes/ShaderExampleList
+If you open the scene, you can see the implementation result of Perlin Noise . For the code,
+There is an implementation in.
+I will post the implementation for 2D.
+// Original Perlin Noise 2D
+float originalPerlinNoise(float2 v)
+{
+ // Coordinates of the integer part of the grid
+ float2 i = floor (v);
+ // Coordinates of the decimal part of the grid
+ float2 f = frac(v);
+
+ // Coordinate values of the four corners of the grid
+ float2 i00 = i;
+ float2 i10 = i + float2 (1.0, 0.0);
+ float2 i01 = i + float2(0.0, 1.0);
+ float2 i11 = i + float2 (1.0, 1.0);
+
+ // Vectors from each grid point inside the grid
+ float2 p00 = f;
+ float2 p10 = f - float2(1.0, 0.0);
+ float2 p01 = f - float2(0.0, 1.0);
+ float2 p11 = f - float2(1.0, 1.0);
+
+ // Gradient of each grid point
+ float2 g00 = pseudoRandom(i00);
+ float2 g10 = pseudoRandom(i10);
+ float2 g01 = pseudoRandom(i01);
+ float2 g11 = pseudoRandom(i11);
+
+ // Normalization (set the magnitude of the vector to 1)
+ g00 = normalize(g00);
+ g10 = normalize(g10);
+ g01 = normalize(g01);
+ g11 = normalize(g11);
+
+ // Calculate the noise value at each grid point
+ float n00 = dot(g00, p00);
+ float n10 = dot(g10, p10);
+ float n01 = dot(g01, p01);
+ float n11 = dot(g11, p11);
+
+ // Interpolation
+ float2 u_xy = interpolate(f.xy);
+ float2 n_x = lerp(float2(n00, n01), float2(n10, n11), u_xy.x);
+ float n_xy = lerp(n_x.x, n_x.y, u_xy.y);
+ return n_xy;
+}
+
+There is no unnatural grid shape as seen in Value Noise , and isotropic noise is obtained. Perlin Noise is also called Gradient Noise because it uses a gradient as opposed to Value Noise .
+
++Figure 5.13: Perlin Noise (2D, 3D, 4D) results +
+Improved Perlin Noise was announced in 2001 by Ken Perlin as an improvement over the shortcomings of Perlin Noise . More details can be found here.
+http://mrl.nyu.edu/~perlin/paper445.pdf
+Currently, most Perlin Noise is implemented based on this Improved Perlin Noise .
+There are two main improvements Ken Perlin has made:
+For Hermite curve interpolation, the original of the Perlin Noise in cubic Hermite curve was used. However, if there is in this third-order equation, (when the differential to a result obtained can be further differentiated, that differentiates this) second-order differential 6-12tis t=0, t=1when 0you do not take. Differentiating the curve gives the slope of the tangent. Another derivative gives that curvature, which is non-zero means there is a slight change. As a result, when used as a normal for bump mapping, adjacent grids and values are not exactly continuous, resulting in visual artifacts.
It is a comparison figure.
+
++Figure 5.14: Interpolation with a cubic Hermitian curve (left) Interpolation with a fifth-order Hermitian curve (right) +
+Sample project
+TheStudyOfProceduralNoise/Scenes/CompareBumpmap
+You can see this by opening the scene.
+Looking at the figure, the person who interpolated by the cubic Hermitian curve on the left shows a visually unnatural normal discontinuity at the boundary of the lattice. To avoid this, use the following fifth-order Hermitian curve .
+f\left( t\right) =6t^{5}-15t^{4}+10t^{3}
+
+Each curve diagram is shown. ① is a cubic Hermitian curve and ② is a 5th order Hermitian curve .
+
++Figure 5.15: 3rd and 5th order Hermitian curves +
+t=0, t=1You can see that you have a smooth change around. Since both the 1st derivative and the 2nd derivative are at t=0or t=1at times 0, continuity is maintained.
Think about 3D. The gradient G is evenly distributed in a spherical shape, but the cubic lattice is short about its axis, long about its diagonal, and has a directional bias in itself. If the gradient is close to parallel to the axis, aligning it with the ones in close proximity can result in unusually high values in those areas due to the close distance, which can result in a spotty noise distribution. In order to remove this gradient bias, we will limit it to the following 12 vectors, with those parallel to the axes and those on the diagonal removed.
+(1,1,0),(-1,1,0),(1,-1,0),(-1,-1,0), +(1,0,1),(-1,0,1),(1,0,-1),(-1,0,-1), +(0,1,1),(0,-1,1),(0,1,-1),(0,-1,-1) ++
++Figure 5.16: Improved Perlin Noise Gradient (3D) +
+From a cognitive psychological point of view, Ken Perlin states that in reality, the point P in the grid gives enough randomness, and the gradient G does not have to be random in all directions. .. In addition, for example, (1, 1, 0)the (x, y, z)inner product of, simply x + ycan be calculated as, to simplify the inner product calculation to be performed later, you can avoid a lot of multiplication. This removes 24 multiplications from the calculation and keeps the calculation cost down.
In the sample project
+TheStudyOfProceduralNoise/Scenes/ShaderExampleList
+If you open the scene, you can see the implementation result of Improved Perlin Noise . For the code,
+This improved Perlin Noise implementation is based on the one published in the paper "Effecient computational noise in GLSL" , which will also be introduced in the next Simplex Noise . (Here it's called Classic Perlin Noise , so it's a bit confusing, but I'm using that name.) This implementation is different from what Ken Perlin described in the paper for gradient calculations, but it gives quite similar results.
+You can check the original implementation of Ken Perlin from the URL below.
+http://mrl.nyu.edu/~perlin/noise/
+
++図5.17: Improved Perlin Noise(2D, 3D, 4D) +
+The figure below compares the noise gradient with the results. The left is the original Perlin Noise and the right is the Improved Perlin Noise .
+
++Figure 5.18: Perlin Noise, Improved Perlin Noise Gradients and Results Comparison +
+Simplex Noise was introduced by Ken Perlin in 2001 as a better algorithm than traditional Perlin Noise .
+Simplex Noise has the following advantages over traditional Perlin Noise .
+Here, "Simplex Noise Demystify"
+http://staffwww.itn.liu.se/~stegu/simplexnoise/simplexnoise.pdf
+I will explain based on the contents of.
+ +Simplex is called a simple substance in the topology of mathematics. A simple substance is the smallest unit that makes a figure. A 0-dimensional simplex is a point , a 1-dimensional simplex is a line segment , a 2-dimensional simplex is a triangle , a 3-dimensional simplex is a tetrahedron , and a 4-dimensional simplex is a 5-cell .
+
++Figure 5.19: Simple substance in each dimension +
+Perlin Noise used a square grid for 2D and a cubic grid for 3D, but Simplex Noise uses this simple substance for the grid.
+In one dimension, the simplest shape that fills the space is evenly spaced lines. In two dimensions, the simplest shape that fills the space is a triangle.
+Two of the tiles made up of these triangles can be thought of as crushed squares along their main diagonal.
+
++Figure 5.20: Two-dimensional simple substance grid +
+In three dimensions, the single shape is a slightly distorted tetrahedron. These six tetrahedra form a cube that is crushed along the main diagonal.
+
++Figure 5.21: Three-dimensional simple substance grid +
+In 4D, single shapes are very difficult to visualize. Its single shape has five corners, and these 24 shapes form a four-dimensional hypercube that collapses along the main diagonal.
+N-dimensional elemental shapes have N + 1 corners, and N! (3! Is 3 × 2 × 1 = 6) shapes fill the N-dimensional hypercube collapsed along the main diagonal. It can be said that.
+The advantage of using a simple substance shape for a grid is that you can define a grid with as few angles as possible with respect to the dimension, so when finding the values of points inside the grid, you will interpolate from the values of the surrounding grid points. It is in a place where the number of calculations can be suppressed. The N-dimensional hypercube has 2 ^ {N} corners, while the N-dimensional elemental shape has only N + 1 corners.
+When trying to find higher dimensional noise values, traditional Perlin Noise requires O (2 ^ {N}) the complexity of the calculations at each corner of the hypercube and the amount of interpolation for each principal axis . ) It's a problem and quickly becomes awkward. On the other hand, with Simplex Noise , the number of vertices of the simplex shape with respect to the dimension is small, so the amount of calculation is limited to O (N ^ {2}) .
+ +With Perlin Noise, the integer part of the coordinates floor()could be used to calculate which grid the point P you want to find is in. For Simplex Noise, follow the two steps below.
For a visual understanding, let's look at a diagram of the two-dimensional case.
+
++Figure 5.22: Deformation of a single grid in two dimensions +
+A single grid of two-dimensional triangles can be distorted into a grid of isosceles triangles by scaling. Two isosceles triangles form a quadrangle with one side length (a single unit refers to this quadrangle). (x, y)By looking at the integer part of the coordinates after moving , you can determine which single unit square the point P for which you want to find the noise value is. Also, by comparing the sizes of x and y from the origin of a single unit, it is possible to know which of the units is the single unit including the point P, and the coordinates of the three single points surrounding the point P are determined.
In the case of 3D, the 3D single lattice is regularly arranged by scaling along its main diagonal so that the 2D equilateral triangle single lattice can be transformed into an isosceles triangular lattice. It can be transformed into a cubic grid. As in the case of two dimensions, you can determine which six units belong to a single unit by looking at the integer part of the coordinates of the moved point P. Furthermore, which unit of the unit belongs to can be determined by comparing the relative size of each axis from the origin of the unit.
+
++Figure 5.23: Rules for determining which single unit the point P belongs to in the 3D case +
+The figure above shows a cube formed by a three-dimensional unit along the main diagonal, and belongs to which unit depending on the size of the coordinate values of point P on the x, y, and z axes. It shows the rules of.
+In the case of 4D, it is difficult to visualize, but it can be thought of as a rule in 2D and 3D. Coordinates of a four-dimensional hypercube that fills space There are (x, y, z, w)4! = 24 combinations of sizes for each axis, which are unique to each of the 24 units in the hypercube, and the point P belongs to which unit. Can be determined.
The figure below is a two-dimensional single grid visualized in fragment color.
+
++Figure 5.24: Single (2D) integer and minority parts +
+In conventional Perlin Noise , the values of points inside the grid are calculated from the values of the surrounding grid points by interpolation. However, with Simplex Noise , instead, the degree of influence of the values of the vertices of each simple substance is calculated by a simple sum calculation. Specifically, the extrapolation of the slope of each corner of a single unit and the product of the functions that decay in a radial circle depending on the distance from each vertex are added.
+Think about two dimensions.
+
++Figure 5.25: Radial circular decay function and its range of influence +
+The value of the point P inside a single unit only affects the values from each of the three vertices of the single unit that surrounds it. The values of the distant vertices have no effect because they decay to 0 before crossing the single boundary containing the point P. In this way, the noise value at point P can be calculated as the sum of the values of the three vertices and their degree of influence.
+
++Figure 5.26: Contribution rate and sum of each vertex +
+The implementation is "Effecient computational noise in GLSL" published by Ian McEwan, David Sheets, Stefan Gustavson and Mark Richardson in 2012.
+https://pdfs.semanticscholar.org/8e58/ad9f2cc98d87d978f2bd85713d6c909c8a85.pdf
+It is shown in the manner according to.
+Currently, if you want to implement noise with a shader, it is an easy-to-use algorithm that is less hardware-dependent, efficient in calculation, and does not require reference to textures. (Probably)
+As of April 2018, the source code is managed at https://github.com/stegu/webgl-noise/ . The original was here ( https://github.com/ashima/webgl-noise ), but Ashima Arts, which currently manages it, doesn't seem to be functioning as a company, so it was cloned by Stefan Gustavson.
+There are three features of the implementation:
+Previously announced noise implementations used tables containing pre-computed index values or bit-swapped hashes for index generation during gradient calculations, but both approaches are shaders. It cannot be said that it is suitable for implementation by. So, for index sorting,
+\left( Ax^{2}+Bx\right) mod\ M
+
+We are proposing a method to use a polynomial with a simple form. (Mod = modulo The number of remainders when a certain number is divided (remainder)) For example, \ left (6x ^ {2} + x \ right) mod \ 9 is (0 1 2 3 4 5 6 7 8)for (0 7 8 3 1 2 6 4 5)0 to 8 inputs. Returns 9 unique numbers from 0 to 8.
To generate an index to distribute the gradient well enough, we need to sort at least hundreds of numbers, so we will choose \ left (34x ^ {2} + x \ right) mod \ 289 .
+This permutation polynomial is a problem of the precision of variables in the shader language , and truncation occurs when 34x ^ {2} + x> 2 ^ {24} , or | x |> 702 in the integer region . .. So, in order to calculate the polynomial for sorting without the risk of overflow, we do a modulo 289 of x before doing the polynomial calculation to limit x to the range 0-288.
+Specifically, it is implemented as follows.
+// Find the remainder of 289
+float3 mod289(float3 x)
+{
+ return x - floor(x * (1.0 / 289.0)) * 289.0;
+}
+
+// Sort by permutation polynomial
+float3 trade-ins (float3 x)
+{
+ return fmod(((x * 34.0) + 1.0) * x, 289.0);
+}
+
+The treatise admits that in 2D and 3D, there is no problem, but in 4D, this polynomial has generated visual artifacts. For 4 dimensions, an index of 289 seems to be inadequate.
+ +Traditional implementations used pseudo-random numbers for gradient calculations, referencing the table containing the indexes and performing bit operations to calculate the pre-calculated gradient indexes. Here, we use a cross-polytope for gradient calculations to get a more efficiently distributed gradient in different dimensions, which is more suitable for shader implementation . A cross-polytope is a generalized shape of a two-dimensional square , a three-dimensional regular octahedron , and a four-dimensional regular six-cell body in each dimension. Each dimension takes a geometric shape as shown in the figure below.
+
++Figure 5.27: Cross-polytope in each dimension +
+Gradient vector at each dimension, if the two-dimensional square , if a three-dimensional regular octahedral , if four-dimensional (truncated portion) 16-cell of the surface and distributed.
+Each dimension and equation are as follows.
+2-D: x0 ∈ [−2, 2], y = 1 − |x0| +if y > 0 then x = x0 else x = x0 − sign(x0) + +3-D: x0, y0 ∈ [−1, 1], z = 1 − |x0| − |y0| +if z > 0 then x = x0, y = y0 +else x = x0 − sign(x0), y = y0 − sign(y0) + +4-D: x0, y0, z0 ∈ [−1, 1], w = 1.5 - | x0 | - | y0 | - | z0 | +if w > 0 then x = x0, y = y0, z = z0 +else x = x0 − sign(x0), y = y0 − sign(y0), z = z0 − sign(z0) ++
Most Perlin Noise implementations used gradient vectors of equal magnitude. However, there is a difference in length between the shortest and longest vectors on the surface of the N-dimensional cross-polytope by the factor of \ sqrt {N} . This does not cause strong artifacts, but at higher dimensions the noise pattern becomes less isotropic without explicit normalization of this vector. Normalization is the process of aligning a vector to 1 by dividing the vector by the size of the vector. Assuming that the magnitude of the gradient vector is r , normalization can be achieved by multiplying the gradient vector by the inverse square root of r \ dfrac {1} {\ sqrt {r}} . Here, to improve performance, this inverse square root is approximately calculated using the Taylor expansion. The Taylor expansion is that in an infinitely differentiable function, if x is in the vicinity of a, it can be approximately calculated by the following formula.
+\sum ^{\infty }_{n=0}\dfrac {f^{\left( n\right) }\left( a\right) }{n!}\left( x-a\right) ^{n}
+
+Finding the first derivative of \ dfrac {1} {\ sqrt {a}}
+\begin{array}{l}
+f\left( a\right) =\dfrac {1}{\sqrt {a}}=a^{-\frac{1}{2}}\\
+f'\left( a\right) =-\dfrac {1}{2}a^{-\frac{3}{2}}\\
+\end{array}
+
+Therefore, the approximate expression in the vicinity of a by Taylor expansion is as follows.
+\sum ^{\infty }_{n=0}\dfrac {f^{\left( n\right) }\left( a\right) }{n!}\left( x-a\right) ^{n}
+
+\begin{array}{l}
+=a^{-\frac{1}{2}}-\frac{1}{2}a^{-\frac{3}{2}}\left( x-a\right)\\
+=\frac{3}{2}a^{-\frac{1}{2}}-\frac{1}{2}a^{-\frac{3}{2}}x\\
+\end{array}
+
+Here, if a = 0.7 (I think it is because the length range of the gradient vector is 0.5 to 1.0), 1.79284291400159 --0.85373472095314 * x is obtained.
+This is what the implementation looks like.
+float3 taylorInvSqrt(float3 r)
+{
+ return 1.79284291400159 - 0.85373472095314 * r;
+}
+
+In the sample project
+TheStudyOfProceduralNoise/Scenes/ShaderExampleList
+When you open the scene, you can see the implementation result of Simplex Noise . The implemented code is
+It is in.
+
++Figure 5.28: Simplex Noise (2D, 3D, 4D) results +
+Simplex Noise gives a slightly grainier result when compared to Perlin Noise .
+ +We have looked at the algorithms and implementations of typical procedural noise methods in detail, but you can see that there are differences in the characteristics of the noise patterns obtained and the calculation costs. When noise is used in a real-time application, when it becomes high resolution, the calculation is performed for each pixel, so this calculation load cannot be ignored, and what kind of calculation is performed. Should be kept in mind to some extent. Nowadays, many noise functions are built into the development environment from the beginning, but it is important to understand the noise algorithm in order to make full use of it. I couldn't explain its application here, but in graphics generation, the application of noise is extremely diverse and has a great effect. (The next chapter will show one example.) We hope this article provides a foothold for countless applications. Finally, I would like to pay tribute to the wisdom that our predecessors have accumulated and primarily to Ken Perlin's outstanding achievements.
+ +
|
![]() |
|
In this chapter, we will explain the GPU implementation of Curl Noise, which is a pseudo-fluid algorithm.
The sample in this chapter is "Curl Noise" from
https://github.com/IndieVisualLab/UnityGraphicsProgramming2
.
Curl Noise is a pseudo-fluid noise algorithm announced in 2007 by Professor Robert Bridson of the University of British Columbia, who is also known as a developer of fluid algorithms such as the FLIP method.
In the previous work "Unity Graphics Programming vol.1", I explained the fluid simulation using the Navier-Stokes equation, but Curl Noise is a pseudo but light load compared to those fluid simulations. It is possible to express fluid.
In particular, with the recent advances in display and projector technology, there is an increasing need for real-time rendering at high resolutions such as 4K and 8K, so low-load algorithms such as Curl Noise can express fluids in high resolution and low resolution. It is a useful option for expressing with machine specifications.
In fluid simulation, the first thing you need is a vector field called the "velocity field". First, let's imagine what a velocity field is like.
Below is an image of the velocity field in two dimensions. You can see that the vector is defined at each point on the plane.
++Figure 6.1: Two-dimensional velocity field observations +
+As shown in the above figure, the state in which each vector is individually defined in each differential interval on the plane is called a vector field, and the one in which each vector is a velocity is called a velocity field.
Even if these are three-dimensional, it is easy to understand if you can imagine that the vector is defined in each differential block in the cube.
Now let's see how Curl Noise derives this velocity field.
The interesting thing about Curl Noise is that it uses gradient noise such as Perlin Noise and Simplex Noise, which was explained in the previous chapter "Introduction to Procedural Noise", as a potential field, and derives the velocity field of the fluid from it.
In this chapter, we will use 3D Simplex Noise as a potential field.
Below, I would like to first unravel the algorithm from the Curl Noise formula.
+\overrightarrow{u} = \nabla \times \psi
+
+The above is the Curl Noise algorithm.
The left side \ overright arrow {u} is the derived velocity vector, the right side \ nabla is the vector differential operator (read as nabla, which acts as an operator of partial differential), and \ psi is the potential field. (3D Simplex Noise in this chapter)
Curl Noise can be expressed as the cross product of the two terms on the right side.
In other words, Curl Noise is Simple x Noise and partial differential of each vector element \ left (\ dfrac {\ partial} {\ partial x} _, \ dfrac {\ partial} {\ partial y} _, \ dfrac {\ partial} { It is the outer product of \ partial z} \ right) , and for those who have learned vector analysis in the past, you can see that it is the shape of rotA itself.
Now let's calculate the outer product of 3D Simplex Noise and partial derivative
\overrightarrow{u} = \left( \dfrac {\partial \psi _3} {\partial y} - \dfrac {\partial \psi _2} {\partial z}_, \dfrac {\partial \psi _1} {\partial z} - \dfrac {\partial \psi _3} {\partial x}_, \dfrac {\partial \psi _2} {\partial x} - \dfrac {\partial \psi _1} {\partial y} \right)
+
+In general, the outer product is characterized by the fact that the two vectors are oriented vertically to each other and their length is the same as the area of the surface stretched by both vectors, but rotA (rotation) in vector analysis. ) Is a simple way to grasp the image of the cross product operation from the above formula, saying, "Look up the vector of the potential field in each twisted partial differential element direction, and pull the terms together, so rotation occurs." It may be easier to grasp the image if you capture it in.
The implementation itself is very simple, looking up the vector from each point of \ psi above , that is, 3D SimplexNoise, while slightly shifting the lookup point in the direction of each element of partial differentiation, and performing the outer product operation like the above formula. Just do.
If you have read the fluid simulation chapter of the previous work "Unity Graphics Programming vol.1", you may be wondering what the law of conservation of mass is.
The law of conservation of mass is that at each point in the velocity field, the inflow and outflow are always balanced, the inflow is outflowed, the outflow is inflowed, and finally the divergence is zero (divergence free). It was a rule.
\nabla \cdot \overrightarrow{u} = 0
+
+This is also mentioned in the paper, but since the gradient noise itself changes gently in the first place (when imagining with a two-dimensional gradation, if the pixel on the left side is thin, the pixel on the right side is dark (As you can see), divergence-free was guaranteed at the time of the potential field. Considering the characteristics of Perlin noise, it is quite natural.
+ +Now, let's implement the CurlNoise function on the GPU with Compute shader or Shader based on the formula.
+#define EPSILON 1e-3
+
+float3 CurlNoise (float3 coord)
+{
+ float3 dx = float3(EPSILON, 0.0, 0.0);
+ float3 dy = float3 (0.0, EPSILON, 0.0);
+ float3 dz = float3(0.0, 0.0, EPSILON);
+
+ float3 dpdx0 = snoise(coord - dx);
+ float3 dpdx1 = snoise(coord + dx);
+ float3 dpdy0 = snoise(coord - dy);
+ float3 dpdy1 = snoise(coord + dy);
+ float3 dpdz0 = snoise(coord - dz);
+ float3 dpdz1 = snoise(coord + dz);
+
+ float x = dpdy1.z - dpdy0.z + dpdz1.y - dpdz0.y;
+ float y = dpdz1.x - dpdz0.x + dpdx1.z - dpdx0.z;
+ float z = dpdx1.y - dpdx0.y + dpdy1.x - dpdy0.x;
+
+ return float3(x, y, z) / EPSILON * 2.0;
+}
+
+As mentioned above, this algorithm can be reduced to a simple four arithmetic operation, so the implementation itself is very easy, and it can be implemented with just this number of lines.
Below is a sample of Curl Noise implemented in the compute shader this time. It is possible to advect particles of particles, add a rising vector to make it look like a flame, and bring out various expressions depending on the idea.
++Figure 6.2: +
+
++Figure 6.3: +
+
++Figure 6.4: +
+In this chapter, we explained the implementation of pseudo-fluid by Curl Noise.
Since it is possible to reproduce a 3D pseudo-fluid with a small load and implementation, it is an algorithm that works especially useful for real-time rendering at high resolution.
In summary, I would like to conclude this chapter with the utmost thanks to Professor Robert Bridson, who is still discovering various techniques, including the Curl Noise algorithm.
I think there were some points that could not be explained and some parts were difficult to understand, but I hope that readers will enjoy graphics programming as well.
|
![]() |
|
In this chapter, we will introduce learning of singular value decomposition and applications using singular value decomposition from the basics and applications of linear algebra. Although I learned linear algebra in high school students and university students, I think that there are many people who do not know how it is used, so I wrote this time. In this chapter, in order to prioritize ease of understanding , explanations are given in two dimensions and within the range of real numbers. Therefore, there are some differences from the actual definition of linear algebra, but we would appreciate it if you could read it as appropriate.
+ +Most readers may have heard the word matrix once (currently, they don't learn matrices in high school ...). Matrix is a number like this: Refers to those arranged vertically and horizontally.
+The horizontal direction is the row , the vertical direction is the column , the diagonal direction is the diagonal , and each number is called a matrix element .
+
++Figure 7.1: Matrix +
+By the way, the matrix is called Matrix in English.
+ +Let's take a quick look at the basics of matrix operations. If you are already familiar with it, you can skip this section.
+ +Similar to the four arithmetic operations of scalars, there are additions, subtractions, multiplications and divisions in matrices. For simplicity, quadratic square matrices * 1 \ mbox {\ boldmath $ A $} and \ mbox {\ boldmath $ B $} , The 2D vector \ mbox {\ boldmath $ c $} is defined below.
+[* 1] Square matrix \ cdots A matrix with the same number of rows and columns.
Matrix addition calculates the sum for each element, as in the formula (\ ref {plus}).
+ +Matrix subtraction calculates the difference element by element, as in the formula (\ ref {minus}).
+ +Matrix multiplication is a bit more complicated, like an expression (\ ref {times}).
+Please note that if you reverse the order of multiplication, the calculation result will also change.
+ +Matrix division uses a concept called the inverse matrix , which is a little different from division in scalars . First of all, scalars have the property that when multiplied by their own reciprocal, they always become 1.
+In other words, the act of division is equivalent to the operation of "multiplying the reciprocal".
+Replacing this with a matrix, we can say that what produces the identity matrix over the matrix is the matrix that represents division. In the matrix, the one corresponding to 1 in the scalar is called the identity matrix and is defined below. ..
+As with the scalar 1 , the value does not change when the identity matrix is applied to any matrix.
+With these in mind, let's consider matrix division. If the inverse matrix is \ mbox {\ boldmath $ M $} ^ {-1} , the definition of the inverse matrix is as follows.
+Derivation is omitted, but the elements of the inverse matrix of the matrix \ mbox {\ boldmath $ A $} are defined below.
+At this time, a_ {00} a_ {11} --a_ {01} a_ {10} is called a determinant (Determinant) and is expressed as det (\ mbox {\ boldmath $ A $}) .
+ +A matrix can transform the coordinates pointed to by a vector by multiplying it by a vector. As you know, in CG, it is mostly used as a coordinate transformation matrix (world, projection, view transformation matrix). The product of and the vector is defined below.
+ +From this section, I will explain the concept of the matrix in the range learned at the university. I think that there are many parts that seem a little difficult, but to understand Shape Matching, this concept is necessary, so do your best. However, since the story in this section is also absorbed inside the matrix operation library, there is no problem in implementing it even if you skip to section \ ref {shape matching}.
+ +The transposed matrix is the swapped rows and columns of the elements and is defined below.
+ +A matrix that satisfies \ mbox {\ boldmath $ A $} ^ {T} = \ mbox {\ boldmath $ A $} is called a symmetric matrix.
+ +Given the square matrix \ mbox {\ boldmath $ A $} ,
+Meet the \ lambda a, \ Mbox {\ Boldmath $ A $} of eigenvalues , \ Mbox {\ Boldmath $ V $} the eigenvectors is called.
+The calculation method of eigenvalues and eigenvectors is shown below. First, the formula (\ ref {eigen}) is transformed.
+Here, using the condition \ mbox {\ boldmath $ v $} \ neq 0 , the expression (\ ref {eigen2}) becomes:
+Becomes. Expanding this expression, \ lambda to become a quadratic equation for, by solving this \ lambda can be calculated. Moreover, each of the calculated \ lambda expressions (\ ref { By substituting into eigen2}), you can calculate the eigenvector \ mbox {\ boldmath $ v $} .
+Since the concept of eigenvalues and eigenvectors is difficult to understand from mathematical formulas alone, I think you should also read the Qiita article (described at the end of the chapter) where eigenvalues were visualized by @ kenmatsu4.
+ +The eigenvalues and eigenvectors in the square matrix \ mbox {\ boldmath $ A $} can be used to represent the matrix \ mbox {\ boldmath $ A $} in different ways. First, the eigenvalues \ lambda are sorted by size . , Create a matrix \ mbox {\ boldmath $ \ Lambda $} with it as a diagonal element . Next, a matrix \ mbox {\ boldmath $ V $} in which the eigenvectors corresponding to each eigenvalue are arranged in order from the left . Then, the expression ( \ ref {eigen} ) can be rewritten using these matrices as follows.
+Furthermore, multiply this by \ mbox {\ boldmath $ V $} ^ {-1} from the right of both sides so that the matrix \ mbox {\ boldmath $ A $} remains on the left side .
+Will be.
+Decomposing a matrix into a form like an expression (\ ref {eigendecomp}) in this way is called eigenvalue decomposition of a matrix.
+ +A set of vectors that are perpendicular to each other and each is a unit vector is called an orthonormal basis. Any vector can be represented using a set of orthonormal bases * 2. In the case of two dimensions, There are two vectors that can be orthonormal bases. For example, the commonly used x-axis and y-axis are \ mbox {\ boldmath $ x $} = (1, 0), \ mbox {\ boldmath $ y $} = Since the set of (0, 1) forms an orthonormal basis, any vector can be represented by this \ mbox {\ boldmath $ x $}, \ mbox {\ boldmath $ y $} . \ Expressing mbox {\ boldmath $ v $} = (4, 13) using orthonormal basis \ mbox {\ boldmath $ x $}, \ mbox {\ boldmath $ y $} , \ mbox {\ boldmath $ v $} = 4 \ mbox {\ boldmath $ x $} + 13 \ mbox {\ boldmath $ y $} .
+[* 2] Formally, it is called a linear combination of vectors.
The Hermitian matrix is defined in the range of complex numbers, which is beyond the scope of this chapter, so I will briefly explain it in the range of real numbers. In the range of real numbers, the Hermitian of the matrix \ mbox {\ boldmath $ A $} The matrix \ mbox {\ boldmath $ A $} ^ {*} simply means that it is a symmetric matrix.
+Will be.
+ +Square matrix \ mbox {\ boldmath $ Q $} column vector Q = (\ mbox {\ boldmath $ q $} _1, \ mbox {\ boldmath $ q $} _2, \ cdots, \ mbox {\ boldmath $ q $ When decomposed into } _n) , the set of these vectors forms an orthonormal system, that is,
+When is true, \ mbox {\ boldmath $ Q $} is said to be an orthogonal matrix. Also, even if the orthogonal matrix is decomposed into row vectors, it has the characteristic of forming an orthonormal system.
+ +A matrix that satisfies is called a unitary matrix. If all the elements of the unitary matrix \ mbox {\ boldmath $ U $} are real ( execution columns), \ mbox {\ boldmath $ U $} ^ {*} = \ mbox Since it becomes {\ boldmath $ U $} ^ {T} , we can see that the real unitary matrix \ mbox {\ boldmath $ U $} is an orthogonal matrix.
+ +Decomposing an arbitrary m \ times n matrix \ mbox {\ boldmath $ A $} into the following form is called singular value decomposition of the matrix.
+Note that \ mbox {\ boldmath $ U $} and \ mbox {\ boldmath $ V $} ^ {T} are orthogonal matrices of m \ times m , and \ mbox {\ boldmath $ \ Sigma $} are m \ times. It is a diagonal matrix of n (diagonal elements are non-negative and arranged in order of magnitude).
+The word "arbitrary" is important, and the eigenvalue decomposition of a matrix is defined only for a square matrix, but the singular value decomposition can also be performed for a square matrix. In the CG world, the matrix to be handled is a square matrix. In most cases, the calculation method is not so different from the eigenvalue decomposition. Also, when \ mbox {\ boldmath $ A $} is a symmetric matrix, the eigenvalues and singular values of \ mbox {\ boldmath $ A $} matches. in addition, \ mbox {\ boldmath $ a $ } ^ {T} \ mbox {\ boldmath $ a $ } of 0 is a positive eigenvalues is not the square root of \ mbox {\ boldmath $ a $ } of singular value is.
+ +The dropped eigenvalue decomposition to program the formula is helpful to the (\ ref {svd}) Formula deformed. Matrices \ mbox {\ boldmath $ A $ } transpose of the left from the matrix of \ mbox {\ boldmath $ A $ } Multiply by ^ {T} to get:
+You will notice that the form is the same as the eigenvalue decomposition. In fact, it is known that the square of the singular value matrix becomes the eigenvalue matrix. Therefore, the calculation of the singular value decomposes the matrix into eigenvalues. This can be done by taking the square root of the eigenvalues. This leads to the incorporation of eigenvalue decomposition into the algorithm, but fortunately it is necessary to solve a quadratic equation to find the eigenvalues. Since the solution formula of the quadratic equation is simple, it is easy to drop it into the program * 3 .
+[* 3] Although there are solution formulas for cubic and quartic equations, they are generally calculated using Newton's method.
\ mbox {\ boldmath $ A $} ^ {T} \ mbox {\ boldmath $ A $} By eigenvalue decomposition, \ mbox {\ boldmath $ \ Sigma $} and \ mbox {\ boldmath $ V $} ^ { Now that T} has been calculated, the remaining \ mbox {\ boldmath $ U $} can be calculated by transforming the formula (\ ref {svd}) as follows.
+Since V is an orthogonal matrix, the transpose and the inverse matrix match.
+This can be expressed programmatically as follows.
+Listing 7.1: Singular Value Decomposition Algorithm (Matrix2x2.cs)
+ 1: /// <summary>
+ 2: /// Singular value decomposition
+ 3: /// </summary>
+ 4: /// <param name="u">Returns rotation matrix u</param>
+ 5: /// <param name="s">Returns sigma matrix</param>
+ 6: /// <param name="v">Returns rotation matrix v(not transposed)</param>
+ 7: public void SVD(ref Matrix2x2 u, ref Matrix2x2 s, ref Matrix2x2 v)
+ 8: {
+ 9: // If it was a diagonal matrix, the singular value decomposition is simply given below.
+10: if (Mathf.Abs(this[1, 0] - this[0, 1]) < MATRIX_EPSILON
+11: && Mathf.Abs(this[1, 0]) < MATRIX_EPSILON)
+12: {
+13: u.SetValue(this[0, 0] < 0 ? -1 : 1, 0,
+14: 0, this[1, 1] < 0 ? -1 : 1);
+15: s.SetValue(Mathf.Abs(this[0, 0]), Mathf.Abs(this[1, 1]));
+16: v.LoadIdentity ();
+17: }
+18:
+19: // Calculate A ^ T * A if it is not a diagonal matrix.
+20: else
+21: {
+22: // 0 Column vector length (non-root)
+23: float i = this[0, 0] * this[0, 0] + this[1, 0] * this[1, 0];
+24: // Length of 1 column vector (non-root)
+25: float j = this[0, 1] * this[0, 1] + this[1, 1] * this[1, 1];
+26: // Inner product of column vectors
+27: float i_dot_j = this[0, 0] * this[0, 1]
+28: + this[1, 0] * this[1, 1];
+29:
+30: // If A ^ T * A is an orthogonal matrix
+31: if (Mathf.Abs(i_dot_j) < MATRIX_EPSILON)
+32: {
+33: // Calculation of diagonal elements of the singular value matrix
+34: float s1 = Mathf.Sqrt(i);
+35: float s2 = Mathf.Abs(i - j) <
+36: MATRIX_EPSILON ? s1 : Mathf.Sqrt(j);
+37:
+38: u.SetValue(this[0, 0] / s1, this[0, 1] / s2,
+39: this[1, 0] / s1, this[1, 1] / s2);
+40: s.SetValue(s1, s2);
+41: v.LoadIdentity ();
+42: }
+43: // If A ^ T * A is not an orthogonal matrix, solve the quadratic equation to find the eigenvalues.
+44: else
+45: {
+46: // Calculation of eigenvalues / eigenvectors
+47: float i_minus_j = i --j; // Difference in column vector length
+48: float i_plus_j = i + j; // sum of column vector lengths
+49:
+50: // Formula for solving quadratic equations
+51: float root = Mathf.Sqrt(i_minus_j * i_minus_j
+52: + 4 * i_dot_j * i_dot_j);
+53: float eig = (i_plus_j + root) * 0.5f;
+54: float s1 = Mathf.Sqrt(eig);
+55: float s2 = Mathf.Abs(root) <
+56: MATRIX_EPSILON ? s1 :
+57: Mathf.Sqrt((i_plus_j - root) / 2);
+58:
+59: s.SetValue(s1, s2);
+60:
+61: // Use the eigenvector of A ^ T * A as V.
+62: float v_s = eig - i;
+63: float len = Mathf.Sqrt(v_s * v_s + i_dot_j * i_dot_j);
+64: i_dot_j / = len;
+65: v_s / = only;
+66: v.SetValue(i_dot_j, -v_s, v_s, i_dot_j);
+67:
+68: // Since v and s have already been calculated, the rotation matrix u is calculated by Av / s.
+69: u.SetValue(
+70: (this[0, 0] * i_dot_j + this[0, 1] * v_s) / s1,
+71: (this[0, 1] * i_dot_j - this[0, 0] * v_s) / s2,
+72: (this[1, 0] * i_dot_j + this[1, 1] * v_s) / s1,
+73: (this[1, 1] * i_dot_j - this[1, 0] * v_s) / s2
+74: );
+75: }
+76: }
+77: }
+
+Singular value decomposition is active in a wide variety of fields, and seems to be used mainly in principal component analysis (PCA) in statistics. There are many cases where it is used in CG.
+ +And so on.
+This time, we will focus on Shape Matching and explain the basic idea.
+[*4] Meshless deformations based on shape matching, Matthias Muller et al., SIGGRAPH 2005
[*5] Reconstructing surfaces of particle-based fluids using anisotropic kernels, Jihun Yu et al., ACM Transaction on Graphics 2013
[*6] A material point method for snow simulation, Alexey Stomakhin et al., SIGGRAPH 2013
\label{shapematching}
+ +Shape Matching is a technique for aligning two different shapes within the range where there is as little error as possible. Currently, a method for simulating an elastic body using Shape Matching is being developed. ..
+This section describes the algorithm for aligning the unicorn object placement to the lion object placement , as shown in Figure 7.2 and Figure 7.3 .
+
++Figure 7.2: Two objects +
+
++Figure 7.3: Aligned results +
+First, define a set of the same number of points on each shape. (Lion's point set is P, Unicorn's point set is Q.)
+At this time, note that those with the same subscript are in the geometrically corresponding positions as shown in Fig. 7.4 .
+
++Figure 7.4: Correspondence of point sets +
+Next, calculate the centroid of each point set.
+Assuming that the center of gravity of the unicorn point set is at the same position as the center of gravity of the lion point set, the rotation matrix \ mbox {\ boldmath $ R $} is applied to the unicorn point set, and the vector \ mbox {\ boldmath $ t Since the result of the $} translation is equal to the center of gravity of the lion, the following equation can be derived.
+When this is transformed,
+And further deformed,
+Will be.
+Therefore, from this equation, if the rotation matrix \ mbox {\ boldmath $ R $} is obtained, the translation vector \ mbox {\ boldmath $ t $} is automatically obtained. Here, the original point Define a set of points from the position of, minus the center of gravity of each.
+This makes it possible to perform calculations with local coordinates with the center of gravity of each point set as the origin.
+Next, the variance-covariance matrix \ mbox {\ boldmath of \ mbox {\ boldmath $ p $} _ {i} ^ {\ prime}, \ mbox {\ boldmath $ q $} _ {i} ^ {\ prime} Calculate $ H $} .
+This variance-covariance matrix \ mbox {\ boldmath $ H $} stores information such as the variability of the two point sets. Here the product of the vectors \ mbox {\ boldmath $ q $} _ {i } ^ {\ prime} {\ mbox {\ boldmath $ p $} _ {i} ^ {\ prime}} ^ {T} is an operation called direct product (outer product), unlike the normal vector internal product operation. The direct product of the vectors produces a matrix. The direct product of the two-dimensional vectors is defined below.
+In addition, the covariance matrix \ mbox {\ boldmath $ H $} is singularly decomposed.
+In the result of singular value decomposition, \ mbox {\ boldmath $ \ sum $} is a matrix representing expansion and contraction, so the desired rotation matrix \ mbox {\ boldmath $ R $} is
+(The detailed derivation method is a little advanced, so I will omit it here.)
+Finally, the translation vector \ mbox {\ boldmath $ t $} can be calculated from the obtained rotation matrix and equation (\ ref {trans}) .
+ +In this implementation, the algorithm in the previous section is just dropped into the code, so detailed explanation is omitted. In addition, all the processing is completed in the Start function in ShapeMaching.cs.
+Listing 7.2: ShapeMatching (ShapeMaching.cs)
+ 1: // Set p, q
+ 2: p = new Vector2[n];
+ 3: q = new Vector2[n];
+ 4: centerP = Vector2.zero;
+ 5: centerQ = Vector2.zero;
+ 6:
+ 7: for(int i = 0; i < n; i++)
+ 8: {
+ 9: Vector2 pos = _destination.transform.GetChild(i).position;
+10: p[i] = pos;
+11: centerP += pos;
+12:
+13: pos = _target.transform.GetChild(i).position;
+14: q[i] = pos;
+15: centerQ += pos;
+16: }
+17: centerP /= n;
+18: centerQ /= n;
+19:
+20: // Calc, p, q!
+21: Matrix2x2 H = new Matrix2x2(0, 0, 0, 0);
+22: for (int i = 0; i < n; i++)
+23: {
+24: p[i] = p[i] - centerP;
+25: q[i] = q[i] - centerQ;
+26:
+27: H += Matrix2x2.OuterProduct(q[i], p[i]);
+28: }
+29:
+30: Matrix2x2 u = new Matrix2x2();
+31: Matrix2x2 s = new Matrix2x2();
+32: Matrix2x2 v = new Matrix2x2();
+33: H.SVD(ref u, ref s, ref v);
+34:
+35: R = v * u.Transpose();
+36: Debug.Log(Mathf.Rad2Deg * Mathf.Acos(R.m00));
+37: t = centerP - R * centerQ;
+
+I was able to safely align the shape of the unicorn with the shape of the lion.
+
++Figure 7.5: Before execution +
+
++Figure 7.6: After execution +
+In this section, we explained the implementation of the Shape Matching method using singular value decomposition. This time, it was implemented in 2D, but it can also be implemented in 3D with the same algorithm. I think that there were many, but I hope that you will take this opportunity to become interested in the application method of matrix arithmetic in the CG field and deepen your learning.
+ +
|
![]() |
|
This chapter focuses on the Space filling problem * 1 and explains Apollonian Gasket , which is one of the methods to solve it .
+Since this chapter focuses on the algorithm explanation of Apollonius Gasket, it deviates a little from the story of graphic programming.
+ +The space filling problem is the problem of finding a method to fill the inside of one closed plane as much as possible with a certain shape without overlapping. This problem is an area that has been studied for a long time, especially in the fields of geometry and combinatorial optimization. Since there are innumerable combinations of what kind of plane to fill with what kind of shape, various methods have been proposed for each combination.
+To give a few examples
+[* 1] Other names such as "tessellation", "packing problem", and "packing problem" are used.
[* 2] Rectangular packing \ cdots Fill the rectangular plane with a rectangle
[* 3] Polygon packing \ cdots Fill the rectangular plane with polygons
[* 4] Circle packing \ cdots Fill the inside of a circular plane with a circle
[* 5] Triangular packing \ cdots Fill the triangular plane with triangles
And so on, there are many other techniques. In this chapter, we will explain about Apollonian Gasket among the above.
+The Space filling problem is known to be NP-hard, and it is currently difficult to always fill the plane 100% with any of the above algorithms. The same is true for the Apollonian Gasket, which cannot completely fill the inside of a circle.
+ +The Apollonian Gasket is a type of fractal figure generated from three circles that touch each other. This is a kind of the earliest fractal figure, and it is said that one of the research results of plane geometry could be one solution of the Space filling problem, not the algorithm proposed to solve the Space filling problem. is. The name is named after Apollonius of Perga, a Greek mortar in BC.
+First, assuming that the three circles that touch each other are C1, C2, and C3, respectively, Apollonius discovered that there are two non-intersecting circles C4 and C5 that touch all of C1, C2, and C3. These C4 and C5 are Apollonius circles for C1, C2 and C3 (details will be described later) .
+
++Figure 8.1: C1, C2, C3 and the two circles C4, C5 in contact with it +
+Now, if we consider C4 as opposed to C1, C2, we can get two new Apollonius circles for C1, C2, and C4. Of these two circles, one will be C3 and the other will be the new circle C6.
+Considering the Apollonius circles for all combinations, such as (C1, C2, C5), (C2, C3, C4), (C1, C3, C4), you can get at least one new circle for each. I can do it. By repeating this infinitely, a set of circles that touch each other is created. This set of circles is the Apollonian Gasket.
+
++Figure 8.2: Apollonian Gasket +
+https://upload.wikimedia.org/wikipedia/commons/e/e6/Apollonian_gasket.svg
+Circles of Apollonius
+It is the locus of the point P when the two fixed points A and B are taken and the point P is taken so that AP: BP = constant . However, apart from this, it refers to the solution to the Apollonius problem and is sometimes called the Apollonius circle, which has a stronger meaning in the Apollonius Gasket.
+Problem of Apollonius
+In Euclidean geometry, the problem is to draw a fourth circle tangent to the given three circles. It is said that there are a maximum of eight solutions for this fourth circle, two of which are always circumscribed at 3 yen, and two circles are always inscribed at 3 yen.
+By the way, the three circles given as a condition do not have to touch each other, and the problem is to draw a fourth circle that touches the three circles.
+From here, I will explain the calculation method of Apollonian Gasket in order while looking at the actual program. A sample program is available on Github, so please download it from there if necessary.
+URL:https://github.com/IndieVisualLab/UnityGraphicsProgramming2
+ +In programming the Apollonian Gasket, this time we have prepared our own class to represent the circle and a structure to handle complex numbers.
++
Circle.cs
+using UnityEngine;
+
+public class Circle
+{
+ public float Curvature
+ {
+ get { return 1f / this.radius; }
+ }
+ public Complex Complex
+ {
+ get; private set;
+ }
+ public float Radius
+ {
+ get { return Mathf.Abs(this.radius); }
+ }
+ public Vector2 Position
+ {
+ get { return this.Complex.Vec2; }
+ }
+
+ private float radius = 0f;
+
+
+ public Circle(Complex complex, float radius)
+ {
+ this.radius = radius;
+ this.Complex = complex;
+ }
+
+ /// ...
+ /// Below, a function to check the relationship between circles is implemented.
+ /// Whether they are in contact, intersect, include, etc.
+ /// ...
+}
+
+Part of the implementation of a class that represents a circle.
+If you have basic programming knowledge, there should be nothing difficult. In addition, Complexis a complex number structure prepared by myself this time , and is Curvaturecalled curvature, both of which are necessary values for calculating the Apollonian Gasket.
+
Complex.cs
+using UnityEngine;
+using System;
+using System.Globalization;
+
+public struct Complex
+{
+ public static readonly Complex Zero = new Complex(0f, 0f);
+ public static readonly Complex One = new Complex(1f, 0f);
+ public static readonly Complex ImaginaryOne = new Complex(0f, 1f);
+
+ public float Real
+ {
+ get { return this.real; }
+ }
+ public float Imaginary
+ {
+ get { return this.imaginary; }
+ }
+ public float Magnitude
+ {
+ get { return Abs(this); }
+ }
+ public float SqrMagnitude
+ {
+ get { return SqrAbs(this); }
+ }
+ public float Phase
+ {
+ get { return Mathf.Atan2(this.imaginary, this.real); }
+ }
+ public Vector2 Vec2
+ {
+ get { return new Vector2(this.real, this.imaginary); }
+ }
+
+ [SerializeField]
+ private float real;
+ [SerializeField]
+ private float imaginary;
+
+
+ public Complex(Vector2 vec2) : this(vec2.x, vec2.y) { }
+
+ public Complex(Complex other) : this(other.real, other.imaginary) { }
+
+ public Complex(float real, float imaginary)
+ {
+ this.real = real;
+ this.imaginary = imaginary;
+ }
+
+ /// ...
+ /// Below, the function to calculate the complex number is implemented.
+ /// Four arithmetic operations, absolute value calculation, etc.
+ /// ...
+}
+
+A structure for handling complex numbers.
+C # has a Complexstructure, but it has been included since .Net 4.0. At the time of writing this chapter, Unity's .Net 4.6 support was in the Experimental stage, so I decided to prepare it myself.
As a prerequisite for calculating the Apollonian Gasket, there must be three circles tangent to each other. Therefore, in this program, three circles with randomly determined radii are generated, and the coordinates are calculated and arranged so that they touch each other.
+ApollonianGaskets.cs
+private void CreateFirstCircles(
+ out Circle c1, out Circle c2, out Circle c3)
+{
+ var r1 = Random.Range (
+ this.firstRadiusMin, this.firstRadiusMax
+ );
+ var r2 = Random.Range (
+ this.firstRadiusMin, this.firstRadiusMax
+ );
+ var r3 = Random.Range(
+ this.firstRadiusMin, this.firstRadiusMax
+ );
+
+ // Get random coordinates
+ var p1 = this.GetRandPosInCircle(
+ this.fieldRadiusMin,
+ this.fieldRadiusMax
+ );
+ c1 = new Circle(new Complex(p1), r1);
+
+ // Calculate the center coordinates of the tangent circle based on p1
+ var p2 = -p1.normalized * ((r1 - p1.magnitude) + r2);
+ c2 = new Circle(new Complex(p2), r2);
+
+ // Calculate the center coordinates of a circle tangent to two circles
+ var p3 = this.GetThirdVertex(p1, p2, r1 + r2, r2 + r3, r1 + r3);
+ c3 = new Circle(new Complex(p3), r3);
+}
+
+private Vector2 GetRandPosInCircle(float fieldMin, float fieldMax)
+{
+ // Get the right angle
+ var theta = Random.Range (0f, Mathf.PI * 2f);
+
+ // Calculate the appropriate distance
+ var radius = Mathf.Sqrt(
+ 2f * Random.Range(
+ 0.5f * fieldMin * fieldMin,
+ 0.5f * fieldMax * fieldMax
+ )
+ );
+
+ // Convert from polar coordinate system to Euclidean plane
+ return new Vector2(
+ radius * Mathf.Cos(theta),
+ radius * Mathf.Sin(theta)
+ );
+}
+
+private Vector2 GetThirdVertex(
+ Vector2 p1, Vector2 p2, float rab, float rbc, float rca)
+{
+ var p21 = p2 - p1;
+
+ // Calculate the angle by the cosine theorem
+ var theta = Mathf.Acos(
+ (rab * rab + rca * rca - rbc * rbc) / (2f * rca * rab)
+ );
+
+ // Calculate and add the angle of the starting point
+ // theta is just an angle in the triangle, not an angle in the plane
+ theta += Mathf.Atan2(p21.y, p21.x);
+
+ // Add the coordinates converted from the polar coordinate system to the Euclidean plane to the starting coordinates.
+ return p1 + new Vector2(
+ rca * Mathf.Cos(theta),
+ rca * Mathf.Sin(theta)
+ );
+}
+
+CreateFirstCirclesBy calling the function, the initial condition of 3 yen is generated.
Randomly into three radial first r1,r2,r3decided, then GetRandPosInCirclethe function r1to determine the center coordinates of a circle having a radius (hereinafter C1). This function returns random coordinates inside a circle that is fieldMingreater fieldMaxthan or equal to the radius of the origin center .
++Figure 8.3: Area where random coordinates are generated +
+Next, r2the center coordinates of a circle with a radius (below C2) to calculate. First , calculate the distance from the origin to the center of C2 by (r1-p1.magnitude) + r2 . By multiplying this by the normalized coordinates of C1 which is sign-inverted, r2the center coordinates of a circle with a radius adjacent to C1 can be obtained.
++Figure 8.4: Position represented by the p2 vector +
+r3The center coordinates of the circle with the last radius (hereinafter C3) are GetThirdVertexcalculated by the function, but this calculation uses the cosine theorem . As most readers may have learned in high school, the cosine theorem is a theorem that holds between the length of the sides of a triangle and the cosine of an internal angle (cos). △ in ABC, a = BC, b = CA, c = AB, alpha = ∠CAB When
a^2 = c^2 + b^2 - 2cbcosα ++
Is the law of cosines.
+
++Figure 8.5: Triangle ABC +
+You may be wondering why you need a triangle to think about the center of a circle, but in fact, the relationship between the three circles makes it possible to think of a very easy-to-use triangle. Considering a triangle whose apex is the center of C1, C2, and C3, since these three circles are in contact with each other, the length of each side of the triangle can be known from the radius of the circle.
+
++Figure 8.6: Triangle ABC and Circles C1, C2, C3 +
+When the law of cosines is transformed
+cosα = \frac{c^2 + b^2 - a^2}{2cb}
+
+Therefore, we can solve the cosine from the lengths of the three sides. Once the angle and distance between the two sides are found, the center coordinates of C3 can be found based on the center coordinates of C1. ..
+You have now generated the three tangent circles you need as an initial condition.
+ +Based on the three circles C1, C2, and C3 generated in the previous section, the circles tangent to them are calculated. Two parameters, radius and center coordinates, are required to create a new circle, so each is calculated.
+ +We first calculate from the radius, which can be calculated by Descartes's circle theorem . Descartes's circle theorem is that for four circles C1, C2, C3, C4 that touch each other, the curvature * 6 is k1, k2, k3, k4, respectively.
+(k_1 + k_2 + k_3 + k_4) ^ 2 = 2 ({k_1} ^ 2 + {k_2} ^ 2 + {k_3} ^ 2 + {k_4} ^ 2)
+
+Is true. This is a quadratic equation for the radii of four circles, but if you organize this equation
+k_4 = k_1 + k_2 + k_3 \ pm 2 \ sqrt {k_1k_2 + k_2k_3 + k_3k_1}
+
+[* 6] The reciprocal of the radius , defined by k = \ pm \ frac {1} {r}
If the three circles C1, C2, and C3 are known, the curvature of the fourth circle C4 can be obtained. Since the curvature is the reciprocal of the radius, the radius of the circle can be known by taking the reciprocal of the curvature.
+Here, the curvature of C4 is obtained by compounding two, but one solution is always positive and the other is either positive or negative. When the curvature of C4 is positive, it is circumscribed to C1, C2, C3, and when it is negative, it is inscribed to C1, C2, C3 (including 3 circles). In other words, the fourth circle C4 can be considered in two patterns, and there is a possibility that both can be considered.
+
++Figure 8.7: Positive and negative curvature +
+The following part is programming this.
+SoddyCircles.cs
+// Curvature calculation +var k1 = this.Circle1.Curvature; +var k2 = this.Circle2.Curvature; +var k3 = this.Circle3.Curvature; + +var plusK = k1 + k2 + k3 + 2f * Mathf.Sqrt (k1 * k2 + k2 * k3 + k3 * k1); +var minusK = k1 + k2 + k3 - 2f * Mathf.Sqrt (k1 * k2 + k2 * k3 + k3 * k1); ++
This Cartesian circle theorem was later rediscovered by a chemist named Sodi, and the C1, C2, C3, and C4 circles are called Sodi's circles.
+Sodi's Circle and Apollonius' Circle
+In the previous section, we talked about the Circle of Apollonius, but I think some of you may have wondered what this is different from the Circle of Sodi.
+The Apollonius circle is a general term for circles that solve the problems of Apollonius. Sodi's circle is a term that refers to four circles that satisfy Descartes' circle theorem.
+In other words, since Sodi's circle is one of the solutions to Apollonius' problem, it is also Apollonius's circle.
+Next is the calculation of the center coordinates, which is calculated by the Cartesian complex number theorem , which has a shape similar to the Cartesian circle theorem . The Cartesian complex number theorem is that the center coordinates of the circles C1, C2, C3, C4 that touch each other on the complex plane are z1, z2, z3, z4, and the curvature is k1, k2, k3, k4.
+(k_1z_1 + k_2z_2 + k_3z_3 + k_4z_4) ^ 2 = 2 ({k_1} ^ 2 {z_1} ^ 2 + {k_2} ^ 2 {z_2} ^ 2 + {z_3} ^ 2 {z_3} ^ 2 + {k_4} ^ 2 {z_4} ^ 2)
+
+Is true. To organize this formula for z4
+z4 = \frac{z_1k_1 + z_2k_2 + z_3k_3 \pm 2\sqrt{k_1k_2z_1z_2 + k_2k_3z_2z_3 + k_3k_1z_3z_1}}{k4}
+
+Since it can be transformed into, the center coordinates of the circle C4 can be obtained with this.
+Here, two curvatures were obtained when calculating the radius, but two can also be obtained by compounding the Cartesian complex number theorem. However, unlike the curvature calculation, one of the two is the correct Sodi circle, so you need to determine which is correct.
+The following part is programming this.
+SoddyCircles.cs
+/// Calculation of center coordinates +var ck1 = Complex.Multiply(this.Circle1.Complex, k1); +var ck2 = Complex.Multiply(this.Circle2.Complex, k2); +var ck3 = Complex.Multiply(this.Circle3.Complex, k3); + +var plusZ = ck1 + ck2 + ck3 + + Complex.Multiply(Complex.Sqrt(ck1 * ck2 + ck2 * ck3 + ck3 * ck1), 2f); +var minusZ = ck1 + ck2 + ck3 + - Complex.Multiply(Complex.Sqrt(ck1 * ck2 + ck2 * ck3 + ck3 * ck1), 2f); + +var recPlusK = 1f / plusK; +var recMinusK = 1f / minusK; + +// Judgment of Sodi's circle +this.GetGasket( + new Circle(Complex.Divide(plusZ, plusK), recPlusK), + new Circle(Complex.Divide(minusZ, plusK), recPlusK), + out c4 +); + +this.GetGasket( + new Circle(Complex.Divide(plusZ, minusK), recMinusK), + new Circle(Complex.Divide(minusZ, minusK), recMinusK), + out c5 +); ++
SoddyCircles.cs
+/// Judgment of Sodi's circle +(c1.IsCircumscribed(c4, CalculationAccuracy) + || c1.IsInscribed(c4, CalculationAccuracy)) && +(c2.IsCircumscribed(c4, CalculationAccuracy) + || c2.IsInscribed(c4, CalculationAccuracy)) && +(c3.IsCircumscribed(c4, CalculationAccuracy) + || c3.IsInscribed(c4, CalculationAccuracy)) ++
Circle.cs
+public bool IsCircumscribed(Circle c, float accuracy)
+{
+ var d = (this.Position - c.Position).sqrMagnitude;
+ var abs = Mathf.Abs(d - Mathf.Pow(this.Radius + c.Radius, 2));
+
+ return abs <= accuracy * accuracy;
+}
+
+public bool IsInscribed(Circle c, float accuracy)
+{
+ var d = (this.Position - c.Position).sqrMagnitude;
+ var abs = Mathf.Abs(d - Mathf.Pow(this.Radius - c.Radius, 2));
+
+ return abs <= accuracy * accuracy;
+}
+
++
Now, based on the initial conditions C1, C2, C3, we have obtained two circles tangent to them (hereinafter C4, C5).
+ +At this point, you can easily calculate the Apollonian Gasket. Simply repeat the calculation performed in "8.4.3 Calculation of the circle tangent to C1, C2, C3" .
+In the previous section, we found the circles C4 and C5 that are tangent to C1, C2 and C3 found in "8.4.2 Calculation of the first three circles" . Next, find the circles that touch (C1, C2, C4) (C1, C2, C5) (C2, C3, C4) (C2, C3, C5) (C3, C1, C4) (C3, C1, C5). I will continue.
+Here, even if the circles are in contact with each other on the combination, they may actually overlap with other circles. Therefore, after determining whether it is the correct Sodi circle, it is also necessary to confirm that it does not overlap all the circles that have been requested so far.
+
++Figure 8.8: Of the circles C7 and C8 tangent to C1, C4 and C6, C8 overlaps C2 and is not included in the Apollonian Gasket. +
+Then you will get a new circle that touches each one. After that, in the same way, we will continue to find new tangent circles for all combinations of the original circle to find the tangent circle and the newly found tangent circle.
+Mathematically, the set of circles obtained by repeating this procedure infinitely is the Apollonian Gasket, but it is not possible to handle the program infinitely. Therefore, in this program, if the radius of the newly obtained tangent circle is less than a certain value, the condition that the processing is completed is given for the combination.
+The following part is programming this.
+ApollonianGaskets.cs
+private void Awake()
+{
+ // Generate the initial condition of 3 yen
+ Circle c1, c2, c3;
+ this.CreateFirstCircles(out c1, out c2, out c3);
+ this.circles.Add(c1);
+ this.circles.Add(c2);
+ this.circles.Add(c3);
+
+ this.soddys.Enqueue(new SoddyCircles(c1, c2, c3));
+
+ while(this.soddys.Count > 0)
+ {
+ // Calculate Sodi's circle
+ var soddy = this.soddys.Dequeue();
+
+ Circle c4, c5;
+ soddy.GetApollonianGaskets(out c4, out c5);
+
+ this.AddCircle(c4, soddy);
+ this.AddCircle(c5, soddy);
+ }
+}
+
+private void AddCircle(Circle c, SoddyCircles soddy)
+{
+ if(c == null || c.Radius <= MinimumRadius)
+ {
+ return;
+ }
+ // If the curvature is negative, no questions asked and added
+ // Circles with negative curvature appear only once
+ else if(c.Curvature < 0f)
+ {
+ this.circles.Add(c);
+ soddy.GetSoddyCircles(c).ForEach(s => this.soddys.Enqueue(s));
+
+ return;
+ }
+
+ // Check if it covers other circles
+ for(var i = 0; i < this.circles.Count; i++)
+ {
+ var o = this.circles[i];
+
+ if(o.Curvature < 0f)
+ {
+ continue;
+ }
+ else if(o.IsMatch(c, CalculationAccuracy) == true)
+ {
+ return;
+ }
+ }
+
+ this.circles.Add(c);
+ soddy.GetSoddyCircles(c).ForEach(s => this.soddys.Enqueue(s));
+}
+
+You have now successfully requested the Apollonian Gasket.
+
++Figure 8.9: Execution result on Unity +
+So far, we have walked through the steps required to calculate the Apollonian Gasket. As I explained at the beginning, the Apollonian Gasket has a stronger meaning as a fractal figure.
+However, if we remove the limitation of the plane this time and jump out into the world of space, it will become difficult to talk about it, and the meaning of filling (packing) from the fractal figure will become stronger. The proposition of sphere-packing space is a field that has been controversial for hundreds of years, including the existence of famous mathematical conjectures such as the Kepler conjecture.
+The Space filling problem is also useful in practical terms. It is applied in a wide range of fields such as optimization of VLSI layout design, optimization of cutting out parts such as cloth, and automation and optimization of UV development.
+This time, I chose the Apollonian Gasket, which is relatively easy to understand and interesting. If you are interested in packing itself, check out the algorithms introduced at the beginning.
+Fill the inside of the object with the object. I think it can be used as a new expression method in unexpected places.
+ +
|
![]() |
|
++Figure 9.1: Negative-Positive Inversion with ImageEffect +
+A simple explanation of how to implement ImageEffect, a technology that applies effects to the output video using a shader (GPU), in Unity. The technology is also known as PostEffect.
+ImageEffect is used for glow effects that express light, anti-aliasing that reduces jaggies, depth of field DOF, and much more. The simplest example would be a color change or modification that also deals with the sample presented here.
+This chapter is written on the assumption that you have some prerequisite knowledge about the basic knowledge and usage of Unlit shader and Surface shader, but since it is the shader with the simplest configuration, even if you do not have the prerequisite knowledge, I think you can read on and use it.
+The sample in this chapter is "Simple Image Effect" from
https://github.com/IndieVisualLab/UnityGraphicsProgramming2
.
The way ImageEffect achieves various effects is, in a nutshell, image processing, that is, by manipulating the screen pixel by pixel, various effects are achieved.
+Speaking of processing pixels by shaders, it is a fragment shader. In essence, implementing an ImageEffect is equivalent to implementing a fragment shader.
+
++Figure 9.2: ImageEffect implementation implements fragment shader +
+In Unity, the processing order of ImageEffect is roughly as follows.
+We have prepared the simplest sample scene. Open the sample "ImageEffectBase" scene to see it. The associated script and other resources have the same name.
+A similar sample has a resource with the same name as the ImageEffect scene, but be aware that it will be discussed later.
+When you open the sample, the image projected by the camera in the scene will be negatively and positively inverted by ImageEffect. This is equivalent to the shader for ImageEffect that Unity generates by default, but the actual source code is slightly different.
+Make sure the "ImageEffectBase" script is attached to the "Main Camera" in the sample scene. In addition, "ImageEffectBase" references a material with the same name, and that material has a shader with the same name.
+ +First of all, I will explain the process from calling the Shader of ImageEffect from the script.
+ +When you want to make changes to the video that Unity outputs, you almost always need to implement the OnRenderImage method. OnRenderImage is a method defined in Unity's standard workflow, like Start and Update.
+ImageEffectBase.cs
+[ExecuteInEditMode]
+[RequireComponent(typeof(Camera))]
+public class ImageEffectBase : MonoBehaviour
+{
+…
+protected virtual void OnRenderImage
+ (RenderTexture source, RenderTexture destination)
+{
+ Graphics.Blit(source, destination, this.material);
+}
+
+OnRenderImage is only called when it is added to a GameObject that has a Camera component. Therefore, the ImageEffect class [RequireComponent(typeof(Camera))]defines.
The ExcludeInEditModeattributes are also defined because the result of applying ImageEffect should be visible before running Scene . Disable the ImageEffect script when you want to switch between multiple ImageEffects and check when they are disabled.
OnRenderImage is given an input in the first argument (source) and an output destination in the second (destination). Both are of type RenderTexture, but unless otherwise specified, source is given the drawing result of the camera and destination is given null.
+ImageEffect modifies the picture entered in source and writes it to destination, but when destination is null, the modified picture is output to the framebuffer, the area visible to the display.
+Also, when RenderTexture is set to the output destination of the Camera, the source is equivalent to that RenderTexture.
+ +Graphics.BlitThe method is the process of drawing the input RenderTexture to the output RenderTexture using the specified material and shader. The inputs and outputs here are the source and destination of the OnRenderImage. Also, the material will be the one with the shader set for ImageEffect.
As a general rule, the OnRenderImage method must always pass some image data to the destination argument. Therefore, in most cases Graphics.Blit is called within OnRenderImage.
+It Graphics.Blitmay also be used as an application, for example, when creating a texture for use in another effect, or when duplicating a texture . Alternatively, you may use another method to pass the data to the destination, but I'll omit those application examples here for the sake of getting started.
The following items are a little different from the process of applying ImageEffect, so if you are reading for the first time, it is recommended that you skip to the shader description.
+ +I don't think it is necessary to implement or explain this item when explaining ImageEffect, but I decided to explain it so that it would not be an obstacle when reading materials with more practical implementations. Equivalent functionality is implemented in the ImageEffect documentation provided by Unity.
+ImageEffect is a process that is calculated for each pixel. Therefore, in an execution environment without an advanced GPU, ImageEffect may not be welcomed due to the large number of operations. Therefore, it is helpful to verify at the start whether ImageEffect is available in the execution environment and disable it if it is not available.
+ImageEffectBase.cs
+protected virtual void Start()
+{
+ if (!SystemInfo.supportsImageEffects
+ || !this.material
+ || !this.material.shader.isSupported)
+ {
+ base.enabled = false;
+ }
+}
+
+Verification SystemInfo.supportsImageEffectscan be easily achieved by providing Unity .
This implementation will be useful in most cases, but you may need a different implementation, for example when using the fallback feature implemented on the shader side. Please refer to it to the last.
+The only this.materialthing you need to be aware of is when to validate the reference. The example validates with the Start method, but when this is Awake or OnEnable this.material, Unity will show null (and base.enabled = falsewill invalidate the script) , even if a reference is given to , for example . Details are omitted, but ExcludeInEditModeit depends on the specifications (it is hard to say that it is harmful).
Next, I will explain about the ImageEffect shader. The most basic sample presented here implements the effect of just flipping the output colors, similar to what Unity creates as standard.
+ImageEffectBase.shader
+Shader "ImageEffectBase"
+{
+ Properties
+ {
+ _MainTex("Texture", 2D) = "white" {}
+ }
+ SubShader
+ {
+ Cull Off ZWrite Off ZTest Always
+
+ Pass
+ {
+ CGPROGRAM
+
+ #include "UnityCG.cginc"
+ #pragma vertex vert_img
+ #pragma fragment frag
+
+ sampler2D _MainTex;
+
+ fixed4 frag(v2f_img input) : SV_Target
+ {
+ float4 color = tex2D(_MainTex, input.uv);
+ color.rgb = 1 - color.rgb;
+
+ return color;
+ }
+
+ ENDCG
+ }
+ }
+}
+
+As a rough process flow, _MainTexthe image drawn by the camera is input to, and the fragment shader determines the final color to be displayed on the pixel.
Here _MainTextexture information given to OnRenderImagethe source, Graphics.Blitthe sourceis equal to.
_MainTexTooth Graphics.BlitPlease note that has been reserved by the Unity for input. If you change to a different other name, Graphics.Blitthe sourceis not entered correctly shader.
The ImageEffect that Unity generates by default is a bit long and complex (excerpt): ImageEffect is also a shader, so you get the final output through a standard rendering pipeline. Therefore, a vertex shader that does not seem to affect the effect that ImageEffect achieves must also be defined in the ImageEffect shader.
+NewImageEffectShader.shader
+SubShader
+{
+ Cull Off ZWrite Off ZTest Always
+
+ Pass
+ {
+ CGPROGRAM
+ #pragma vertex vert
+ #pragma fragment frag
+
+ #include "UnityCG.cginc"
+
+ struct appdata
+ {
+ float4 vertex : POSITION;
+ float2 uv : TEXCOORD0;
+ };
+
+ struct v2f
+ {
+ float2 uv : TEXCOORD0;
+ float4 vertex : SV_POSITION;
+ };
+
+ v2f vert (appdata v)
+ {
+ v2f o;
+ o.vertex = UnityObjectToClipPos(v.vertex);
+ o.uv = v.uv;
+ return o;
+ }
+
+ sampler2D _MainTex;
+
+ fixed4 frag (v2f i) : SV_Target
+ {
+ fixed4 col = tex2D(_MainTex, i.uv);
+ col.rgb = 1 - col.rgb;
+ return col;
+ }
+ ENDCG
+ }
+}
+
+The vertex shader in ImageEffect simply faces the camera and passes a rectangular mesh that fills the entire surface and its UV coordinates to the fragment shader. There are some benefits that can be achieved by modifying this vertex shader, but most ImageEffects do not.
+That's why Unity provides a standard vertex shader and a structure to define its inputs. They are defined in "UnityCG.cginc". Here, in the source code of the shader is not a prepared standard, defined in the UnityCg.cginc vertex vert_imgYa appdata, v2f_imgby making use of, and to simplify the entire source code.
At first glance, standard values seem to be fine for culling, writing and referencing the Z-buffer. However, Unity Cull Off ZWrite Off ZTest Alwaysrecommends defining to prevent inadvertent writing to the Z-buffer .
Let's practice ImageEffect easily. The sample simply flips the full screen negatively and positively, but try applying negative and positive flipping "only to the diagonal half" of the entire image, as shown in the figure at the beginning of this chapter.
+input.uvIs given coordinates that indicate one pixel of the entire image, so take advantage of this. Each pixel in the entire image is represented by the x * y coordinates normalized by 0 to 1.
An example code that works is included in the sample "Prtactice" folder and will be explained later, but if you are new to it, try implementing it yourself first. I recommend that.
+ +It's very easy to change the color in the upper and lower halves. This is a good way to see the origin of the ImageEffect's coordinates. For example, the following two lines of code invert colors when the x and y coordinates are less than half, respectively.
+Practice/ImageEffectShader_01.shader
+color.rgb = input.uv.x < 0.5 ? 1 - color.rgb : color.rgb; +color.rgb = input.uv.y < 0.5 ? 1 - color.rgb : color.rgb; ++
Did you confirm from the color change that the origin of the coordinates given to ImageEffect is the lower left?
+ +I mentioned earlier that the top, bottom, left, and right halves are easy, but in reality, the diagonal halves are also easy. You can apply the effect (invert the color) diagonally in half with the following source code.
+Practice/ImageEffectShader_02.shader
+color.rgb = input.uv.y < input.uv.x ? 1 - color.rgb : color.rgb; ++
That was introduced as UnityCg.cginc The vertex vert_imgkilling appdataof such useful functions and structures have been defined, convenient value in implementing the ImageEffect In addition to these have been defined.
_ScreenParamsIs float4the type of value, x, yto the pixel width and height of the image to be output, respectively, w, zthe 1 + 1 / x, 1 + 1 / ywe are given.
For example, when you run the rendering of 640x480 size, x = 640, y = 480, z = 1 + 1 / 640, z = 1 + 1 / 480and will be. As a matter of fact, wand zit would not have to use so much.
On the other hand x, ythe value of is often used to calculate, for example, how many pixels on an image it corresponds to, or to calculate the aspect ratio. These are important for creating elaborate effects, but it would be helpful if Unity provided them without giving any values from the script. If you put it in the corner of your head, it may help you to read other shaders.
One of the similar <sampler2Dの変数名>_TexelSizedefinition values is. Here it _MainTex_TexelSizewill be.
_ScreenParamsWhen the same float4, but the type of values, x = 1 / width, y = 1 / height, z = width, y = heightand, different value given to each element. Another sampler2Dfeature is that the values differ depending on the corresponding type. _MainTexRegardless, ~_TexlSizeif you define a corresponding , Unity will give you a value.
_ScreenParamsThere are many ImageEffects that _MainTex_TexelSizeuse, but I think it 's easier to use.
For example, it is often the case in image processing that you want to refer to the color (value) of the next pixel, but you can refer to the value of the next pixel with the following code.
+Practice/ImageEffectShader_03.shader
+sampler2D _MainTex;
+float4 _MainTex_TexelSize;
+
+fixed4 frag(v2f_img input) : SV_Target
+{
+ float4 color = tex2D(_MainTex, input.uv);
+
+ color += tex2D(_MainTex, input.uv + float2(_MainTex_TexelSize.x, 0));
+ color += tex2D(_MainTex, input.uv - float2(_MainTex_TexelSize.x, 0));
+ color += tex2D(_MainTex, input.uv + float2(0, _MainTex_TexelSize.y));
+ color += tex2D(_MainTex, input.uv - float2(0, _MainTex_TexelSize.y));
+
+ color = color / 5;
+
+ return color;
+}
+
+This code references the four surrounding pixels and returns the average value. In image processing, it is literally called a smoothing filter. In addition, a higher quality noise reduction filter may be implemented by referring to the surrounding pixels in the same way, and it is also used in edge / contour detection filters, for example.
+ +
++Figure 9.3: Image of G-Buffer +
+When implementing a material (shader) to apply to a model, you will often refer to the model's depth and normal information. ImageEffect, which manipulates two-dimensional image information, does not seem to be able to acquire depth and normal information, but there is a method to acquire the depth and normal information of an object projected on a certain pixel on the image. I have.
+To explain the technical details, it is necessary to explain the rendering pipeline, which will be a little long, so let me omit it. Briefly, depth information and normal information corresponding to a pixel on the image to be drawn can be buffered. Those buffers are called G-Buffers. Some G-Buffers store colors and depths. (By the way, the original paper shows that the reading of G-Buffer is "game buffer".)
+When drawing an object, the depth and normal information is also written in the buffer, and it is referenced by ImageEffect, which is executed at the end of drawing. This technique plays an important role in Deffered rendering, but it can also be used in Forward rendering.
+These discussions use a sample "ImageEffect" scene and a resource with the same name.
+ +A little setting is required to refer to the depth and normal information in ImageEffect. Since the basic functions are common, here we will set it in ImageEffect.cs, which inherits ImageEffectBase.cs.
+ImageEffect.cs
+public class ImageEffect : ImageEffectBase
+{
+ protected new Camera camera;
+ public DepthTextureMode depthTextureMode;
+
+ protected override void Start()
+ {
+ base.Start();
+
+ this.camera = base.GetComponent<Camera>();
+ this.camera.depthTextureMode = this.depthTextureMode;
+ }
+
+ protected virtual void OnValidate()
+ {
+ if (this.camera != null)
+ {
+ this.camera.depthTextureMode = this.depthTextureMode;
+ }
+ }
+}
+
+To get the depth and normal information, DepthTextureModeyou need to set the camera . This is a setting to control how information such as depth and normal is written. The initial value is None.
Unfortunately, it DepthTextureMode's a parameter that doesn't appear in the camera's Inspector, so you'll need to optionally get a camera reference from the script and set it.
OnValidate For those who haven't used the method very often, it is the method that is called when the parameter is updated on the Inspector.
Use the code presented here DepthTextureModeto change the value of inspector. There are some values, but DepthNormalsnote that we use here .
DepthIf is set, it will be the setting to acquire only the depth information. However Depththeft DepthNormalsdoor, the slightly different procedure to obtain the depth information from the shader. Also MotionVectorsby setting the, How can a lot of fun can get the information of the motion corresponding to each pixel, a little because the longer and all commentary, please let omitted in this place.
DepthTextureModeHere's how to get depth and normal information from the shader when set to camera :
_CameraDepthNormalsTextureIs _MainTexgiven depth and normal information, just as the image to draw is given sampler2D. Therefore input.uv, you can use to get the depth and normal information for a pixel with an image to draw.
ImageEffect.shader
+sampler2D _MainTex;
+sampler2D _CameraDepthNormalsTexture;
+
+fixed4 frag(v2f_img input) : SV_Target
+{
+ float4 color = tex2D(_MainTex, input.uv);
+ float3 normal;
+ float depth;
+
+ DecodeDepthNormal
+ (tex2D(_CameraDepthNormalsTexture, input.uv), depth, normal);
+
+ depth = Linear01Depth(depth);
+ return fixed4(depth, depth, depth, 1);
+
+ return fixed4(normal.xyz, 1);
+}
+
+_CameraDepthNormalsTextureThe values that can be obtained from are the sum of the depth and normal values, so we need to decompose them into their respective values. The function for decomposing is the one provided by Unity. DecodeDepthNormalGive the function a variable to assign the value you want to decompose and the result.
++Figure 9.4: Depth visualization with ImageEffect +
+I will explain the depth information first. Depth information is actually handled differently depending on the platform. Unity provides some mechanisms to absorb the difference Linear01Depth, but I think it's better to use a function when implementing ImageEffect . Linear01DepthIs a function to normalize the obtained depth value from 0 to 1.
In the sample, the acquired depth value is given to R, G, and B to visualize the depth value. Clipping PlanesIt is recommended to move the camera in the scene or change the value from the Inspector to see how it changes.
++Figure 9.5: ImageEffect Visualization of Normals +
+Visualization of normal information is not as complicated as depth information. The normal information is equivalent to that referenced by scripts and common shaders. X, YZ information indicating the direction of the surface projected on a pixel is given in a format normalized to 0 to 1.
+If you just want to check if the normals are obtained correctly, you can output the values of X, Y, Z as R, G, B as they are. In other words, the face facing to the right has a larger value of X = R and becomes more red, and the face facing upward has a value of Y = G and becomes greener.
+ +The main references in this chapter are: Both are official Unity.
+
|
![]() |
|
A programmer who creates installations, signage, the Web (front-end / back-end), smartphone apps, etc. I am interested in video expression and design tool development.
+ + +Former game developer, programmer making interactive art. I like the design and development of moderately complicated mechanisms and libraries. Night Type.
+ + +An interactive artist / engineer who works in an atmosphere. I like interactive content more than three meals. I like potatoes and don't eat radish sprouts. I often post Gene videos on Twitter. I do VJ once in a while.
+A person who makes interactive art in Unity. Freelance. We look forward to your work => hi@sugi.cc
+ + +Interaction engineer. In the field of video expression such as installation, signage, stage production, music video, concert video, VJ, etc., we are producing content that makes use of real-time and procedural characteristics. I have been active several times in a unit called Aqueduct with sugi-cho and mattatz.
+Former technical artist of a game development company. I like art, design and music, so I turned to interactive art. My hobbies are samplers, synths, musical instruments, records, and equipment. I started Twitter.
+ + +Former VFX production technical artist. Current interactive artist / engineer. I'm still a student.
+Interaction engineer. I am interested in visualization of simulations by CG, and I would like to make visualizations that shake people's emotions more, rather than visualizing them accurately. I like to make it, but I find it more fun to know more than that. My favorite school classroom is the drawing room or the library.
+ + +Continuing from the previous introduction to ComputeShader, this time it was more loose and fluffy than Graphics Programming :-) I hope you can reach out to those who haven't been able to keep up with the advanced content written by others.
+ + +After working in physics and the web, he is an interactive engineer. New art school 3rd term. Interested in stage production.
+ \ No newline at end of file diff --git a/html-translated/vol2/Contributors_files/cleardot.gif b/html-translated/vol2/Contributors_files/cleardot.gif new file mode 100644 index 0000000..1d11fa9 Binary files /dev/null and b/html-translated/vol2/Contributors_files/cleardot.gif differ diff --git a/html-translated/vol2/Contributors_files/element_main.js b/html-translated/vol2/Contributors_files/element_main.js new file mode 100644 index 0000000..4c5de3c --- /dev/null +++ b/html-translated/vol2/Contributors_files/element_main.js @@ -0,0 +1,486 @@ +(function(){/* + + Copyright The Closure Library Authors. + SPDX-License-Identifier: Apache-2.0 +*/ +var aa='" style="background-image:url(',ba="-disabled",ca="-document.getElementById('",da="/translate_a/t",ea="/translate_suggestion?client=",fa='
|
![]() |
|
This book is the second volume of the "Unity Graphics Programming" series, which explains the technology related to graphics programming by Unity. This series provides introductory content and applications for beginners, as well as tips for intermediate and above, on a variety of topics that the authors are interested in.
+The source code explained in each chapter is published in the github repository ( https://github.com/IndieVisualLab/UnityGraphicsProgramming2 ), so you can read this manual while executing it at hand.
+The difficulty level varies depending on the article, and depending on the amount of knowledge of the reader, some content may be unsatisfactory or too difficult. Depending on your knowledge, it's a good idea to read articles on the topic you are interested in. For those who usually do graphics programming at work, I hope it will lead to more effect drawers, and students are interested in visual coding, I have touched Processing and openFrameworks, but I still have 3DCG. For those who are feeling a high threshold, I would be happy if it would be an opportunity to introduce Unity and learn about the high expressiveness of 3DCG and the start of development.
+IndieVisualLab is a circle created by colleagues (& former colleagues) in the company. In-house, we use Unity to program the contents of exhibited works in the category generally called media art, and we are using Unity, which is a bit different from the game system. In this book, knowledge that is useful for using Unity in the exhibited works may be scattered.
+ +Some of the contents explained in this manual use Compute Shader, Geometry Shader, etc., and the execution environment in which DirectX 11 operates is recommended, but there are also chapters where the contents are completed by the program (C #) on the CPU side.
+I think that the behavior of the sample code released may not be correct due to the difference in environment, but please take measures such as reporting an issue to the github repository and replacing it as appropriate.
+ +If you have any impressions, concerns, or other requests regarding this book (such as wanting to read the explanation about 〇〇), please feel free to use the Web form ( https://docs.google.com/forms/d/e/1FAIpQLSdxeansJvQGTWfZTBN_2RTuCK_kRqhA6QHTZKVXHCijQnC8zw/ Please let us know via viewform ) or email (lab.indievisual@gmail.com).
+
++Figure 1: Web form QR code +
+
|
![]() |
|
Hello, I'm Sugino! This chapter displays thousands and tens of thousands of skinned animated objects.
+
++Figure 1.1: A flock of birds flapping their wings +
+In Unity, I think you'll be using the Animator and SkinnedMeshRenderer components to achieve character animation.
+For example, what if you want to represent a flock or crowd of birds? Would you like to use Animator and SkinnedMeshRenderer for thousands or tens of thousands of character objects? Generally, when displaying a large number of objects on the screen, GPU instancing is used to render a large number of objects at once. However, SkinnedMeshRenderer does not support instancing, which renders individual objects one by one, which is very heavy.
+As a solution to solve this, there is a method to save the animated vertex position information as a texture, but in this chapter we will explain how to actually do it, the way of thinking and application until implementation, and points to be noted. I will.
+Please feel free to ask questions on Twitter (to @sugi_cho) as some explanations may be omitted or some parts may be difficult to understand. If there is something wrong, I would appreciate it if you could point it out (._.)
+ +First of all, I would like to see how heavy the processing would be if a large number (5000 objects) of normally animated objects were placed. This time, we have prepared a simple animated horse 3D object with 1890 vertices.
+
++Figure 1.2: Horse model used +
+When I actually moved it, I can see that the FPS is 8.8, which is considerably heavier. Figure 1.3
+
++Figure 1.3: 5000 animated horses +
+Now, let's look at Unity's profiler to find out what is getting heavier in this process. Display Profiler (shortcut key: Ctr + 7) from the Window menu. You can get more detailed information by selecting GPU from the Add Profiler pull-down and viewing the GPU Usage profiler. Obtaining GPU Usage information itself is an overhead, so it is better not to display it when it is not needed, but this time GPU Usage will be important, so we will actively use it.
+
++図1.4: Profiler Window (GPU Usage) +
+Looking at the profiler, you can see that the GPU processing time is longer than the CPU processing time, and the CPU is waiting for the GPU processing to complete. Figure 1.4 And PostLateUpdate.UpdateAllSkinnedMeshesyou can see that about 70% of GPU processing is occupied. Also, since there are as many horse objects as you can see, it Camera.Rendererseems that you can reduce the number of GPU rendering processes by batching the objects or performing GPU instancing. It is the way, but in the same way as any CPU Usage PostLateUpdate.UpdateAllSkinnedMeshesand Camera.Renderprocessing of account for most of the time.
In this test scene, the Player Settings are set to use GPU Skinning. If you were skinning on the CPU instead of the GPU, the CPU processing rate would increase and the FPS would be lower than it is now. At the time of GPU skinning, the CPU side calculates the bone matrix, passes the matrix information to the GPU, and performs the skinning process on the GPU. At the time of CPU skinning, matrix calculation and skinning processing are performed on the CPU side, and the skinned vertex data is passed to the GPU side.
+In this way, in order to optimize processing, it is important to first determine where the processing bottleneck is.
+ +As a result of profiling, the mesh skinning process seems to be heavy. Now that I know that, I would like to consider a method of calculating in advance instead of performing the skinning process itself in real time.
+SkinnedMeshRendererSkinnedMeshRenderer.BakeMesh(Mesh)There is a function called as a method to acquire the vertex information after the skinning process of . It takes a snapshot of the skinned mesh and stores it in the specified mesh. It takes a little time to process, but it can be selected if it is used to store skinned vertex information in advance.
Listing 1.1: SkinnedMeshRenderer.BakeMesh () Example
+ 1: animator animator;
+ 2: SkinnedMeshRenderer skinedMesh;
+ 3: List<Mesh> meshList;
+ 4:
+ 5: void Start(){
+ 6: animator = GetComponent<Animator>();
+ 7: skinnedMesh = GetComponentInChildren<SkinnedMeshRenderer>();
+ 8: meshList = new List<Mesh>();
+ 9: animator.Play("Run");
+10: }
+11:
+12: void Update(){
+13: var mesh = new Mesh ();
+14: skinnedMesh.BakeMesh (mesh);
+15: // A snapshot of the skinned mesh is stored in mesh
+16: meshList.Add(mesh);
+17: }
+
+SkinnedMeshRendererNow, the mesh of the snapshot of each frame of the animation of the Animator's Run state will be stored in the meshList. Listing 1.1
If meshListyou use this saved file MeshFilter.sharedMeshand switch the mesh ( ) in the same way as switching pictures in a flip book, SkinnedMeshRendereryou can display the animation of the mesh without using, so the skinning process that was a bottleneck as a result of profiling It seems that you can omit.
However, if this implementation saves multiple Mesh data for each frame, mesh information (Mesh.indeces, Mesh.uv, etc.) that is not changed by animation will also be saved, resulting in a lot of waste. In the case of skinning animation, the only data to be updated is the vertex position information and normal information, so you only need to save and update these.
+ +One possible method is to have the vertex position and normal data for each frame in an array of Vector3, and update the mesh position and normal for each frame. Listing 1.2
+Listing 1.2: Update Mesh
+ 1: Mesh objMesh;
+ 2: List<Vector3>[] vertecesLists;
+ 3: List<Vector3>[] normalsLists;
+ 4: // Saved vertex information
+ 5: // For use with Mesh.SetVertices (List <Vector3>)
+ 6:
+ 7: void Start(){
+ 8: objMesh = GetComponent<MeshFilter>().mesh;
+ 9: objMesh.MarkDynamic();
+10: }
+11:
+12: void Update(){
+13: var frame = xx;
+14: // Calculate the frame at the current time
+15:
+16: objMesh.SetVertices (vertecesLists [frame]);
+17: objMesh.SetNormals(normalsLists[frame]);
+18: }
+
+However, this method puts a heavy CPU load on the mesh update itself for the purpose of displaying the thousands of animation objects that we are trying to solve.
+So, as the answer is written from the beginning of this chapter, we store the position information and normal information in the texture, and use VertexTextureFetch to update the vertex position and normal information of the mesh in the vertex shader. This eliminates the need to update the original mesh data itself, making it possible to realize vertex animation without the processing load of the CPU.
+ +Now, let's briefly explain how to save the position information of mesh vertices in a texture.
+Unity Meshobjects are classes that store data such as vertex positions, normals, and UV values of 3D models displayed in Unity. In the vertex position information ( Mesh.vertices), the position information for all the vertices of the mesh is Vector3saved as an array. Table 1.1
And Unity Texture2Dobjects are saved as an array of color information ( ) for the number of pixels of texture width ( texture.width) x height ( texture.height) Color. Table 1.2
Table 1.1: Location Information (Vector3)
+| x float x direction component |
|---|
| y float y direction component |
| z float z direction component |
Table 1.2: Pornography (Color)
+| r float red component |
|---|
| g float green component |
| b float cyan component |
| a float opacity component |
Positions of the vertices, Mesh.Vertices Table 1.1 x the, y, and z values respectively, the color information of Texture2D Table 1.2 contains r of, g, a b, when stored as TextureAsset in EditorScript, texture vertex position information It will be saved as. This is a sample script that saves the positions and normals of mesh vertices as texture colors. Listing 1.3
+Listing 1.3: Saving vertex information to texture
+ 1: public void CreateTex(Mesh sourceMesh)
+ 2: {
+ 3: var vertCount = sourceMesh.vertexCount;
+ 4: var width = Mathf.FloorToInt(Mathf.Sqrt(vertCount));
+ 5: var height = Mathf.CeilToInt((float)vertCount / width);
+ 6: // Find the width and height where the number of vertices <width x height
+ 7:
+ 8: posTex = new Texture2D(width, height, TextureFormat.RGBAFloat, false);
+ 9: normTex = new Texture2D(width, height, TextureFormat.RGBAFloat, false);
+10: // Texture2D to store Color []
+11: // By specifying TextureFormat.RGBAFloat, you can have color information with each element Float value.
+12:
+13: var vertices = sourceMesh.vertices;
+14: var normals = sourceMesh.normals;
+15: var posColors = new Color[width * height];
+16: var normColors = new Color[width * height];
+17: // Color information array for the number of vertices
+18:
+19: for (var i = 0; i < vertCount; i++)
+20: {
+21: posColors[i] = new Color(
+22: vertices[i].x,
+23: vertices [i] .y,
+24: vertices[i].z
+25: );
+26: normColors[i] = new Color(
+27: normals[i].x,
+28: normals[i].y,
+29: normals[i].z
+30: );
+31: }
+32: // At each vertex, Color.rgb = Vector3.xyz,
+33: // Generate a color array (Color []) such that position → color, normal → color.
+34:
+35: posTex.SetPixels(posColors);
+36: normTex.SetPixels(normColors);
+37: posTex.Apply();
+38: normTex.Apply();
+39: // Set the color array to the texture and apply
+40: }
+
+Now, Meshthe position of the vertex of, we were able to normal information position texture, stamped in the normal texture.
++Figure 1.5: Write Mesh vertex positions and normals to Texture +
+Actually, since there is no index data for making polygons, it is not possible to reproduce the shape of the mesh with only the position texture and normal texture, but it is possible to write the mesh information to the texture. It's done. Figure 1.5
+In the Unity of the official manual, Texture2D.SetPixels(Color[])it is ColorFormat.RGBA32,ARGB32,RGB24,Alpha8to work if only. is what it reads. This is only for fixed-point and fixed-precision texture formats, but apparently RGBAHalf, RGBAFloatit works with floating-point values, even if you assign a negative value or a value of 1 or more to each element of the color. , Clampit seems to us to hold the value without being. ColorSubstituting a fixed precision texture limits the RGB value to a value between 0 and 1 and the precision to 1/256.
In the method of burning the vertex information of this animation into a texture, the animation is sampled at regular intervals, the vertex information of the mesh of each frame is arranged, and a series of animation information is burned into one texture. A total of two textures, a position information texture and a normal information texture, are generated.
+ +This time, AnimationClip.SampleAnimation(gameObject, time);we will use the function to sample the animation . For the specified GameObject, set it to the state of the specified time of AnimationClip. So Animation, Animatorit supports both legacy and legacy . (Rather, it's a way to play an animation without using Animation or Animator components.)
Now, I will explain the actual implementation of specifying a frame from AnimationClip and acquiring the vertex position.
+ +This program consists of the following three elements.
+With AnimationClipTextureBaker, get AnimationClip from Animation or Animator, and create ComputeBuffer of mesh vertex data while sampling AnimationClip to each frame. And it is ComputeShader that converts ComputeBuffer of vertex animation information created from AnimationClip and Mesh data into position information texture and normal information texture with MeshInfoTextureGen.compute.
+TextureAnimPlayer.shader is a Shader for animating the mesh with the created location and normal textures.
+
++図1.6: AnimationClipTextureBaker Inspector +
+AnimationClipTextureBakerInspector. Sets to ComputeShaderplay the animated texture for generating the animated texture Shader. Then, set what you want AnimationClipto texture to Clips. Figure 1.6
++Figure 1.7: Texture writing can be done from the context menu in the Inspector +
+ContextMenuAttributeAllows you to call methods in your script from the context menu in Unity's Inspector. It is convenient because it can be executed without creating an editor extension. In this case, Bakeyou can call the script from "bake texture" in the context menu . Figure 1.6
Now let's look at the actual code.
+Listing 1.4: AnimationClipTextureBaker.cs
+ 1: using System.Collections.Generic;
+ 2: using System.Linq;
+ 3: using UnityEngine;
+ 4:
+ 5: #if UNITY_EDITOR
+ 6: using UnityEditor;
+ 7: using System.IO;
+ 8: #endif
+ 9:
+10: public class AnimationClipTextureBaker : MonoBehaviour
+11: {
+12:
+13: public ComputeShader infoTexGen;
+14: public Shader playShader;
+15: public AnimationClip[] clips;
+16:
+17: // Vertex information is a structure of position and normal
+18: public struct VertInfo
+19: {
+20: public Vector3 position;
+21: public Vector3 normal;
+22: }
+23:
+24: // Reset () is called when scripting a GameObject in the editor
+25: private void Reset()
+26: {
+27: var animation = GetComponent <Animation> ();
+28: var animator = GetComponent <Animator> ();
+29:
+30: if (animation != null)
+31: {
+32: clips = new AnimationClip[animation.GetClipCount()];
+33: var i = 0;
+34: foreach (AnimationState state in animation)
+35: clips[i++] = state.clip;
+36: }
+37: else if (animator != null)
+38: clips = animator.runtimeAnimatorController.animationClips;
+39: // Automatically set AnimationClip if there is an Animation or Animator component
+40: }
+41:
+42: [ContextMenu("bake texture")]
+43: void Bake()
+44: {
+45: var skin = GetComponentInChildren <SkinnedMeshRenderer> ();
+46: var vCount = skin.sharedMesh.vertexCount;
+47: var texWidth = Mathf.NextPowerOfTwo(vCount);
+48: var mesh = new Mesh ();
+49:
+50: foreach (var clip in clips)
+51: {
+52: var frames = Mathf.NextPowerOfTwo((int)(clip.length / 0.05f));
+53: var dt = clip.length / frames;
+54: var infoList = new List<VertInfo>();
+55:
+56: var pRt = new RenderTexture(texWidth, frames,
+57: 0, RenderTextureFormat.ARGBHalf);
+58: pRt.name = string.Format("{0}.{1}.posTex", name, clip.name);
+59: var nRt = new RenderTexture(texWidth, frames,
+60: 0, RenderTextureFormat.ARGBHalf);
+61: nRt.name = string.Format("{0}.{1}.normTex", name, clip.name);
+62: foreach (var rt in new[] { pRt, nRt })
+63: {
+64: rt.enableRandomWrite = true;
+65: rt.Create();
+66: RenderTexture.active = rt;
+67: GL.Clear(true, true, Color.clear);
+68: }
+69: // Texture initialization
+70:
+71: for (var i = 0; i < frames; i++)
+72: {
+73: clip.SampleAnimation(gameObject, dt * i);
+74: // Sampling GameObject at the specified time of AnimationClip
+75: skin.BakeMesh(mesh);
+76: // Call BakeMesh () to get the mesh data in the skinned state
+77:
+78: infoList.AddRange(Enumerable.Range(0, vCount)
+79: .Select(idx => new VertInfo()
+80: {
+81: position = mesh.vertices[idx],
+82: normal = mesh.normals[idx]
+83: })
+84: );
+85: // Store the animation frame in the list first
+86: }
+87: var buffer = new ComputeBuffer(
+88: infoList.Count,
+89: System.Runtime.InteropServices.Marshal.SizeOf(
+90: typeof (VertInfo)
+91: )
+92: );
+93: buffer.SetData(infoList.ToArray());
+94: // Set vertex information in ComputeBuffer
+95:
+96: var kernel = infoTexGen.FindKernel("CSMain");
+97: uint x, y, z;
+98: infoTexGen.GetKernelThreadGroupSizes(
+99: kernel,
+100: out x,
+101: out y,
+102: out z
+103: );
+104:
+105: infoTexGen.SetInt("VertCount", vCount);
+106: infoTexGen.SetBuffer(kernel, "Info", buffer);
+107: infoTexGen.SetTexture(kernel, "OutPosition", pRt);
+108: infoTexGen.SetTexture(kernel, "OutNormal", nRt);
+109: infoTexGen.Dispatch(
+110: kernel,
+111: vCount / (int)x + 1,
+112: frames / (int)y + 1,
+113: 1
+114: );
+115: // Set up Compute Shader and generate textures
+116:
+117: buffer.Release();
+118:
+119: // Editor script to save the generated texture
+120: #if UNITY_EDITOR
+121: var folderName = "BakedAnimationTex";
+122: var folderPath = Path.Combine("Assets", folderName);
+123: if (!AssetDatabase.IsValidFolder(folderPath))
+124: AssetDatabase.CreateFolder("Assets", folderName);
+125:
+126: var subFolder = name;
+127: var subFolderPath = Path.Combine(folderPath, subFolder);
+128: if (!AssetDatabase.IsValidFolder(subFolderPath))
+129: AssetDatabase.CreateFolder(folderPath, subFolder);
+130:
+131: var posTex = RenderTextureToTexture2D.Convert(pRt);
+132: var normTex = RenderTextureToTexture2D.Convert(nRt);
+133: Graphics.CopyTexture(pRt, posTex);
+134: Graphics.CopyTexture(nRt, normTex);
+135:
+136: var mat = new Material (playShader);
+137: mat.SetTexture("_MainTex", skin.sharedMaterial.mainTexture);
+138: mat.SetTexture("_PosTex", posTex);
+139: mat.SetTexture("_NmlTex", normTex);
+140: mat.SetFloat("_Length", clip.length);
+141: if (clip.wrapMode == WrapMode.Loop)
+142: {
+143: mat.SetFloat("_Loop", 1f);
+144: mat.EnableKeyword("ANIM_LOOP");
+145: }
+146:
+147: var go = new GameObject(name + "." + clip.name);
+148: go.AddComponent<MeshRenderer>().sharedMaterial = mat;
+149: go.AddComponent<MeshFilter>().sharedMesh = skin.sharedMesh;
+150: // Set the generated texture as a material, set the mesh and make a Prefab
+151:
+152: AssetDatabase.CreateAsset(posTex,
+153: Path.Combine(subFolderPath, pRt.name + ".asset"));
+154: AssetDatabase.CreateAsset(normTex,
+155: Path.Combine(subFolderPath, nRt.name + ".asset"));
+156: AssetDatabase.CreateAsset(mat,
+157: Path.Combine(subFolderPath,
+158: string.Format("{0}.{1}.animTex.asset", name, clip.name)));
+159: PrefabUtility.CreatePrefab(
+160: Path.Combine(folderPath, go.name + ".prefab")
+161: .Replace("\\", "/"), go);
+162: AssetDatabase.SaveAssets();
+163: AssetDatabase.Refresh();
+164: #endif
+165: }
+166: }
+167: }
+
+If RenderTextureyou generate it once , process it on the GPU, copy it to, Graphics.CopyTexture(rt,tex2d);and Texture2Dsave it as a Unity Asset with an editor script, it will be an asset that can be used without recalculation from now on, so I think it is a versatile technique. Listing 1.4 (lines 119,120)
In this implementation, it is implemented by writing to the texture ComputeShader. When doing a lot of processing, using the GPU is faster, so it is a useful technique, so please try to master it. As for the processing content, the position buffer and normal buffer of the vertex animation generated by the script are simply placed in each pixel as they are. Listing 1.5
Listing 1.5: MeshInfoTextureGen.compute
+ 1: #pragma kernel CSMain
+ 2:
+ 3: struct MeshInfo{
+ 4: float3 position;
+ 5: float3 normal;
+ 6: };
+ 7:
+ 8: RWTexture2D<float4> OutPosition;
+ 9: RWTexture2D<float4> OutNormal;
+10: StructuredBuffer<MeshInfo> Info;
+11: int VertCount;
+12:
+13: [numthreads(8,8,1)]
+14: void CSMain (uint3 id : SV_DispatchThreadID)
+15: {
+16: int index = id.y * VertCount + id.x;
+17: MeshInfo info = Info[index];
+18:
+19: OutPosition[id.xy] = float4(info.position, 1.0);
+20: OutNormal[id.xy] = float4(info.normal, 1.0);
+21: // Arrange the vertex information so that the x-axis of the texture is the vertex ID and the y-axis direction is time.
+22: }
+
+Here is the texture generated from the script. Figure 1.8
+
++Figure 1.8: Generated texture +
+This texture stores the vertices of the mesh in each sampled frame, one column in the x-axis direction. And the uv.ytexture is designed so that the y-axis direction is time and you can specify the animation time by changing when sampling the texture.
What I would like you to pay attention to Texture.FilterMode = Bilinearis Tokoro. When sampling a texture, each pixel is interpolated with adjacent pixels, which causes the Shader to play an animated texture with a halfway time between the frame sampled by the script during texture generation and the next frame. When sampled, the frame-by-frame positions and normals will be automatically interpolated, resulting in smooth playback of the animation. The explanation is a little complicated!
And in this case, the Run animation is a loop animation WrapMode = Repeat. This interpolates the last and first pixels of the animation texture, resulting in a smooth looped animation. Of course, if you WrapMode = Clampwant to generate a texture from a non-looping animation, you need to set it to.
Next is the Shader for playing the generated animation texture. Listing 1.6
+Listing 1.6: TextureAnimPlayer.shaer
+ 1: Shader "Unlit/TextureAnimPlayer"
+ 2: {
+ 3: Properties
+ 4: {
+ 5: _MainTex ("Texture", 2D) = "white" {}
+ 6: _PosTex("position texture", 2D) = "black"{}
+ 7: _NmlTex("normal texture", 2D) = "white"{}
+ 8: _DT ("delta time", float) = 0e
+ 9:
+10: _Length ("animation length", Float) = 1
+11: [Toggle(ANIM_LOOP)] _Loop("loop", Float) = 0
+12: }
+13: SubShader
+14: {
+15: Tags { "RenderType"="Opaque" }
+16: LOD 100 Cull Off
+17:
+18: Pass
+19: {
+20: CGPROGRAM
+21: #pragma vertex vert
+22: #pragma fragment frag
+23: #pragma multi_compile ___ ANIM_LOOP
+24: // It is convenient to make a multi-compile for the loop
+25:
+26: #include "UnityCG.cginc"
+27:
+28: #define ts _PosTex_TexelSize
+29:
+30: struct appdata
+31: {
+32: float2 uv : TEXCOORD0;
+33: };
+34:
+35: struct v2f
+36: {
+37: float2 uv : TEXCOORD0;
+38: float3 normal : TEXCOORD1;
+39: float4 vertex : SV_POSITION;
+40: };
+41:
+42: sampler2D _MainTex, _PosTex, _NmlTex;
+43: float4 _PosTex_TexelSize;
+44: float _Length, _DT;
+45:
+46: v2f vert (appdata v, uint vid : SV_VertexID)
+47: // You can get the vertex ID with the semantic of SV_VertexID
+48: {
+49: float t = (_Time.y - _DT) / _Length;
+50: #if ANIM_LOOP
+51: t = fmod(t, 1.0);
+52: #else
+53: t = saturate(t);
+54: #endif
+55:
+56: float x = (vid + 0.5) * ts.x;
+57: float y = t;
+58: // uv.x is specified based on the vertex ID
+59: // Set the time (t) to sample the animation in uv.y
+60:
+61: float4 pos = tex2Dlod(
+62: _PosTex,
+63: float4 (x, y, 0, 0)
+64: );
+65: float3 normal = tex2Dlod(
+66: _NmlTex,
+67: float4 (x, y, 0, 0)
+68: );
+69: // Sampling location and normal information from textures
+70:
+71: v2f o;
+72: o.vertex = UnityObjectToClipPos(pos);
+73: o.normal = UnityObjectToWorldNormal (normal);
+74: o.uv = v.uv;
+75: return o;
+76: }
+77:
+78: half4 frag (v2f i) : SV_Target
+79: {
+80: half diff = dot(
+81: i.normal,
+82: float3(0, 1, 0)
+83: ) * 0.5 + 0.5;
+84: half4 col = tex2D(_MainTex, i.uv);
+85: return diff * col;
+86: }
+87: ENDCG
+88: }
+89: }
+90: }
+
+Shaders that play animated textures use a technique called VertexTextureFetch (VTF). Simply put, the texture is sampled in the vertex shader and used to calculate the position of the vertices and each value. This method is often used for displacement mapping, etc.
+I'm using the vertex ID to sample the texture, which SV_VertexIDcan be obtained semantically. Since the vertex information obtains both the position information and the normal information from the texture, the part where there is only uv in the app data is also noteworthy. ( appdataTo POSITION,NORMALnot be a particularly error to define a semantic)
The UV when sampling the texture, uv.yis the normalized time of the animation (the value when the start of the animation is 0 and the end is 1.0). uv.xIs the vertex index (vid), uv.x = (vid + 0.5) * _TexelSize.xand what is this 0.5? You may think that this is the position Bilinearwhen sampling the texture with , (n + 0.5) / テクスチャサイズbecause you can get the value in the uninterpolated texture, so add the value of 0.5 to the vertex ID and mesh We are getting the uninterpolated positions and normals between the vertices inside.
Listing 1.7: {TextureName} _TexelSize Float4 properties with texture size information (from Unity official manual)
+x contains 1.0 / width +y contains 1.0 / height +z contains width +w contains height ++
++Figure 1.9: 5000 horses animated by texture +
+SkinnedMeshRendererThe Rendereranimation is being played without using and with animation textures. FPS is greatly improved from 8 to 56.4 compared to when using skinning animation. Figure 1.9
* The GPU of the PC currently being written is GeForce MX150, which is the weakest of the NVIDIA Pascal GPUs. The rendering resolution was a bit smaller because we captured the profiler and the game window at the same time, but that shouldn't be that much as most of the processing load was mesh skinning. .. !!
+Also, I would like you to pay attention to the fact that other optimization processing such as instancing support is not performed. SkinnedMeshRendererSince we no longer use, it is now possible to draw with GPU instancing. It means that it is possible to pursue further performance by supporting Shader's instancing.
Although not explained here, the bird on the cover uses texture-animated Graphics.DrawMeshInstancedIndirect()birds to draw about 4000 birds at once. For Shader instancing support and other applications, check out my GitHub and other articles.
There are some restrictions on the technique using this texture. The memory for holding the texture is consumed depending on the number of vertices of the mesh and the length of the animation. You need to write a Shader to blend the animation. The state machine of AnimatorController cannot be used. Etc.
+The biggest limitation is the maximum size of textures that can be used for each hardware. It can be 4K, 8K, 16K. In other words, in this method, the vertices of each frame of the mesh are arranged in a horizontal row, so the number of vertices of the mesh is limited by the texture size.
+However, when you output a large number of objects, you should not output those with such a large number of vertices, so it is a good idea to accept the limit on the number of vertices as it is and make sure that the number of vertices of the mesh does not exceed the maximum size of the texture. maybe. If you want to use baking animation textures beyond this limit on the number of vertices, you can consider using multiple textures.
+Alternatively, you can pre-calculate the matrix for each bone in the skeleton instead of the vertices of the mesh and save it in a texture or buffer. Since the skinning process itself is performed by Vertex Shader at the time of execution, the skinning process that was performed during normal mesh skinning PostLateUpdate.UpdateAllSkinnedMeshesis performed Camera.Rendercollectively at the time of rendering, so the processing load is considerably lightened. Please, try it.
Since AnimatorController and Unity's state machine cannot be used, it is difficult to control the animation, so it is better to apply it to some deception such as mobs that repeat loop animation and swarms of flying birds and butterflies instead of the main character. I think it's good.
+ +
|
![]() |
|
In this chapter, we will explain the GPU implementation method of Gravitational N-Body Simulation, which is a method of simulating the movement of celestial bodies existing in outer space.
++Figure 2.1: Result +
+The corresponding sample program is
https://github.com/IndieVisualLab/UnityGraphicsProgramming3
"Assets / NBodySimulation".
Simulations that calculate the interaction of N physical objects are collectively called N-Body simulations. There are many types of problems using N-Body simulation, and in particular, the problem of dealing with a system in which celestial bodies scattered in outer space attract each other by gravity to form a unit is called a gravity multisystem problem. I will. The algorithm explained in this chapter corresponds to this, and it means to solve the equation of motion of the gravitational multisystem using N-Body simulation.
+In addition to the gravity multisystem problem, N-Body simulation is also available.
+It is applied in a wide range of fields, from small to magnificent.
+ +Let's see what kind of mathematical formulas we will solve immediately.
+The gravitational multisystem problem can be simulated by calculating the universal gravitational equation, which is a familiar equation for those who are taking high school physics, for all celestial bodies in space. However, in high school physics, I think that it was learned with such a description because it deals only with objects that are in a straight line.
+Where f is the universal gravitational force, G is the gravitational constant, M and m are the masses of the two objects, and r is the distance between the objects. Of course, this can only determine the magnitude of the force (scalar amount) between two objects .
+In this implementation, it is necessary to consider the movement in the 3D space inside Unity, so a vector quantity indicating the direction is required. Therefore, in order to find the vector of the force generated between two celestial bodies ( i, j ), the equation of universal gravitational force is described as follows.
+Here, \ mbox {\ boldmath $ f $} _ {ij} is the vector of the force that the object i receives from the object j, m_i and m_j are the masses of the two objects, and r_ {ij} is the object i from the object j. Direction vector to. On the left side of the right side, the magnitude of the force is calculated in the same way as the equation of universal gravitational force that first appeared, and it is vectorized by multiplying the unit vector in the direction of receiving the force on the right side of the right side.
+missing image: takao / vec+
+Figure: Meaning of formula +
+Furthermore, if the magnitude of the force that one object ( i ) receives from all the surrounding objects, not between two objects, is \ mbox {\ boldmath $ F $} _ {i} , it can be calculated as follows. ..
+As shown in the formula, the force received from the surrounding celestial bodies can be calculated by taking the sum of all universal gravitational forces.
+Also, to simplify the simulation, the equation can be rewritten using the Softening factor \ varepsilon as follows.
+This makes it possible to ignore collisions even if the celestial bodies come to the same position (even if they calculate themselves, the result in Sigma will be 0 ).
+Next, using the second law of motion m \ mbox {\ boldmath $ a $} = \ mbox {\ boldmath $ f $}, we will convert the force vector into the acceleration vector. First, we transform the second law of motion into the following equation.
+Next, by substituting the above transformation equation into the equation of motion of the gravitational multisystem and rewriting it, the acceleration received by the celestial body can be calculated.
+The simulation is now ready. Now that we were able to express the gravitational multisystem problem with mathematical formulas, how can we incorporate these mathematical formulas into our program? I would like to explain it firmly in the next section.
+ +The above equation (\ ref {eq: newton}) is classified as a differential equation among the equations . Because, in the physical world, the relationship between position, velocity, and acceleration is as shown in the following image, and since acceleration is the second derivative of the position function, it is called a differential equation as it is.
++Figure 2.2: Relationship between position, velocity, and acceleration +
+There are various methods for solving differential equations with a computer, but the most common one is the algorithm called the difference method . Let's start with a review of differentiation.
+ +First, let's review the definition of mathematical differentiation. The derivative of the function f (t) is defined by the following equation.
+Since it is difficult to understand what is shown by the formula alone, it becomes as follows when replaced with a graph.
++Figure 2.3: Forward difference +
+You already know that the differential value of the function is the slope of the graph at t_n . After all, this graph shows the state that \ Delta t is made infinitely small to calculate the slope, and you can see that it represents the formula (\ ref {eq: delta}) itself. think.
+ +You cannot handle "infinity" as a numerical value on a computer. Therefore, we will approximate it with a finite \ Delta t that is as small as possible . With that in mind, if we rewrite the definition of differentiation to difference, we get the following equation.
+It's a shape that has taken the limit as it is. I think it's okay to recognize that "an infinitesimal \ Delta t cannot be represented on a computer, so stop at a certain size \ Delta t to approximate it."
+Here, the physical representation of Figure 2.2 above is as follows.
++Figure 2.4: Relationship between position, velocity, and acceleration (mathematical formula) +
+That is, the formula (\ ref {eq: diffuse}) is compared to Figure 2.4 as follows:
+Furthermore, when the formulas (\ ref {eq: x}) and (\ ref {eq: v}) are combined, the result is as follows.
+This formula means that " the position coordinates after \ Delta t seconds from the current time t can be calculated if the acceleration and velocity of the current time are known." This is the basic idea when simulating with the finite difference method. In addition, the expression of such a differential equation using the difference method is called a difference equation (recurrence formula) .
+When actually performing real-time simulation by the difference method, it is common to set the minute time \ Delta t (time step) to the drawing time of one frame (1/60 second at 60 fps).
+ +Now, let's get into the implementation. The corresponding scene will be "SimpleNBodySimulation.unity".
+ +First of all, we define the data structure of celestial particles. Looking at the equation (\ ref {eq: delta}), we can see that the physical quantities that one celestial body should have are "position, velocity, mass". Therefore, it seems good to define the following structure.
+Body.cs
+public struct Body
+{
+ public Vector3 position;
+ public Vector3 velocity;
+ public float mass;
+}
+
+Next, set the number of particles you want to generate from the inspector, and secure a buffer for that number. Separate buffers for reading and writing to prevent data race conditions.
+In addition, the number of bytes per structure can be obtained with the "Marshal.SizeOf (Type t)" function in the "System.Runtime.InteropServices" namespace.
+SimpleNBodySimulation.cs
+void InitBuffer()
+{
+ // Create buffer (for Read / Write) → Conflict prevention
+ bodyBuffers = new ComputeBuffer[2];
+
+ // Each element creates a buffer of Body structure for the number of particles
+ bodyBuffers[READ] = new ComputeBuffer(numBodies,
+ Marshal.SizeOf(typeof(Body)));
+
+ bodyBuffers[WRITE] = new ComputeBuffer(numBodies,
+ Marshal.SizeOf(typeof(Body)));
+}
+
+Then place the particles in space. First, create an array for the particles and give each element an initial value of the physical quantity. In the sample, the inside of the sphere was randomly sampled as the initial position, the velocity was 0, and the mass was randomly given.
+Finally, set the buffer for the created array and you are ready * 1 .
+[* 1] * For look adjustment, we have prepared a variable that can scale the position coordinates, but you do not have to worry because it has already been adjusted.
SimpleNBodySimulation.cs
+void DistributeBodies()
+{
+ Random.InitState(seed);
+
+ // For look adjustment
+ float scale = positionScale
+ * Mathf.Max(1, numBodies / DEFAULT_PARTICLE_NUM);
+
+ // Prepare an array to set in the buffer
+ Body[] bodies = new Body[numBodies];
+
+ int i = 0;
+ while (i < numBodies)
+ {
+ // Sampling inside the sphere
+ Vector3 pos = Random.insideUnitSphere;
+
+ // set in an array
+ bodies[i].position = pos * scale;
+ bodies[i].velocity = Vector3.zero;
+ bodies[i].mass = Random.Range(0.1f, 1.0f);
+
+
+ i++;
+ }
+
+ // Set the array in the buffer
+ bodyBuffers[READ].SetData(bodies);
+ bodyBuffers[WRITE].SetData(bodies);
+
+}
+
+Finally, we will actually move the simulation. The following code is the part that is executed every frame.
+First, set the value in the constant buffer of ComputeShader. For \ Delta t of the difference equation, use "Time.deltaTime" provided by Unity. In addition, due to the implementation of GPU, the number of threads and the number of thread blocks are also transferred.
+After the calculation is completed, the calculation result of the simulation is stored in the buffer for writing, so the buffer is replaced in the last line so that it can be used as the buffer for reading in the next frame.
+SimpleNBodySimulation.cs
+void Update()
+{
+ // Transfer constants to compute shader
+ // Δt
+ NBodyCS.SetFloat ("_ DeltaTime", Time.deltaTime);
+ // Speed attenuation rate
+ NBodyCS.SetFloat("_Damping", damping);
+ // Softening factor
+ NBodyCS.SetFloat("_SofteningSquared", softeningSquared);
+ // Number of particles
+ NBodyCS.SetInt("_NumBodies", numBodies);
+
+ // Number of threads per block
+ NBodyCS.SetVector("_ThreadDim",
+ new Vector4(SIMULATION_BLOCK_SIZE, 1, 1, 0));
+
+ // Number of blocks
+ NBodyCS.SetVector("_GroupDim",
+ new Vector4(Mathf.CeilToInt(numBodies / SIMULATION_BLOCK_SIZE), 1, 1, 0));
+
+ // Transfer the buffer address
+ NBodyCS.SetBuffer(0, "_BodiesBufferRead", bodyBuffers[READ]);
+ NBodyCS.SetBuffer(0, "_BodiesBufferWrite", bodyBuffers[WRITE]);
+
+ // Compute shader execution
+ NBodyCS.Dispatch(0,
+ Mathf.CeilToInt(numBodies / SIMULATION_BLOCK_SIZE), 1, 1);
+
+
+ // Swap Read / Write (conflict prevention)
+ Swap(bodyBuffers);
+}
+
+After the simulation calculation, issue an instance drawing instruction to the material that renders the particles. When rendering a particle, it is necessary to give the position coordinates of the particle to the shader, so transfer the calculated buffer to the shader for rendering.
+ParticleRenderer.cs
+void OnRenderObject ()
+{
+ particleRenderMat.SetPass (0);
+ particleRenderMat.SetBuffer("_Particles"", bodyBuffers[READ]);
+
+ Graphics.DrawProcedural(MeshTopology.Points, numBodies);
+}
+
+In N-Body simulation, it is necessary to calculate the interaction with all particles, so if it is calculated simply, the execution time will be O (n ^ 2) and performance cannot be achieved. Therefore, I will utilize the usage of Shared Memory described in Chapter 3 of UnityGraphicsProgramming Vol1.
+ +Data in the same block is stored in shared memory, which speeds up I / O. The following is a conceptual diagram that resembles a thread block as a tile.
++Figure 2.5: Tile concept +
+Here, the row is the global thread (DispatchThreadID) that is running, and the column is the particle that is being exhausted in the thread. It's like the columns you're running are shifting one by one to the right over time.
+Also, the total number of tiles executed at the same time is (number of particles / number of threads in the group). In the sample, the number of threads in the block is 256 (SIMULATION_BLOCK_SIZE), so it is recognized that the tile content is actually 256x256 instead of 5x5.
+All rows are running in parallel, but because they share data within the tile, they wait until all the columns running in the tile reach Sync (do not go to the right of the Sync layer). After reaching the Sync layer, the data of the next tile is reloaded into the shared memory.
+ +Describe the constant buffer for receiving the input from the CPU in the Compute Shader.
+Also, prepare a buffer for storing particle data. This time, the Body structure is summarized in "Body.cginc". It is convenient to put together cginc for things that are likely to be reused later.
+Finally, make a declaration to use shared memory.
+SimpleNBodySimulation.compute
+#include "Body.cginc"
+
+// constant
+cbuffer cb {
+ float _SofteningSquared, _DeltaTime, _Damping;
+ uint _NumBodies;
+ float4 _GroupDim, _ThreadDim;
+};
+
+
+// Particle buffer
+StructuredBuffer<Body> _BodiesBufferRead;
+RWStructuredBuffer<Body> _BodiesBufferWrite;
+
+// Shared memory (shared within the block)
+groupshared Body sharedBody[SIMULATION_BLOCK_SIZE];
+
+Next, implement the tile.
+SimpleNBodySimulation.compute
+float3 computeBodyForce(Body body, uint3 groupID, uint3 threadID)
+{
+
+ uint start = 0; // start
+ uint finish = _NumBodies;
+
+ float3 acc = (float3)0;
+ int currentTile = 0;
+
+ // Execute for the number of tiles (number of blocks)
+ for (uint i = start; i < finish; i += SIMULATION_BLOCK_SIZE)
+ {
+ // Store in shared memory
+ // sharedBody [thread ID in block]
+ // = _BodiesBufferRead [tile ID * total number of threads in block + thread ID]
+ sharedBody[threadID.x]
+ = _BodiesBufferRead[wrap(groupID.x + currentTile, _GroupDim.x)
+ * SIMULATION_BLOCK_SIZE + threadID.x];
+
+ // Group sync
+ GroupMemoryBarrierWithGroupSync();
+
+ // Calculate the effect of gravity from the surroundings
+ acc = gravitation(body, acc, threadID);
+
+ GroupMemoryBarrierWithGroupSync();
+
+ currentTile ++; // Go to the next tile
+ }
+
+ return acc;
+
+}
+
+Put the image of the for loop in the code in the next image.
++Figure 2.6: Tile ID for loop +
+Since we controlled the movement of the tile in the previous loop, we will implement the for loop in the tile this time .
+SimpleNBodySimulation.compute
+float3 gravitation(Body body, float3 accel, uint3 threadID)
+{
+
+ // 100% survey
+ // Execute for the number of threads in the block
+ for (uint i = 0; i < SIMULATION_BLOCK_SIZE;)
+ {
+ accel = bodyBodyInteraction(accel, sharedBody[i], body);
+ i++;
+ }
+
+ return accel;
+}
+
+This completes the 100% survey in the tile. Also, at the timing after the return of this function, it waits until all the threads in the tile complete the process.
+Then implement the expression (\ ref {eq: first}) as follows:
+SimpleNBodySimulation.compute
+// Calculation in Sigma
+float3 bodyBodyInteraction(float3 acc, Body b_i, Body b_j)
+{
+ float3 r = b_i.position - b_j.position;
+
+ // distSqr = dot(r_ij, r_ij) + EPS^2
+ float distSqr = r.x * r.x + r.y * r.y + r.z * r.z;
+ distSqr += _SofteningSquared;
+
+ // invDistCube = 1/distSqr^(3/2)
+ float distSixth = distSqr * distSqr * distSqr;
+ float invDistCube = 1.0f / sqrt(distSixth);
+
+ // s = m_j * invDistCube
+ float s = b_j.mass * invDistCube;
+
+ // a_i = a_i + s * r_ij
+ acc += r * s;
+
+ return acc;
+}
+
+This completes the program for calculating the total acceleration.
+ +Next, the coordinates and velocity of the particles in the next frame are calculated using the acceleration calculated so far.
+SimpleNBodySimulation.compute
+[numthreads(SIMULATION_BLOCK_SIZE,1,1)]
+void CSMain (
+ uint3 groupID: SV_GroupID, // Group ID
+ uint3 threadID: SV_GroupThreadID, // Thread ID in the group
+ uint3 DTid: SV_DispatchThreadID // Global Thread ID
+) {
+
+ // Current global thread index
+ uint index = DTid.x;
+
+ // Read particles from buffer
+ Body body = _BodiesBufferRead[index];
+
+ float3 force = computeBodyForce(body, groupID, threadID);
+
+ body.velocity + = force * _DeltaTime;
+ body.velocity *= _Damping;
+
+ // Difference
+ body.position + = body.velocity * _DeltaTime;
+
+ _BodiesBufferWrite[index] = body;
+
+}
+
+The position coordinates of the celestial body have been updated. This completes the simulation of the movement of the celestial body!
+ +In this section, I would like to supplement the drawing method of GPU particles, which was insufficiently explained in the previous article * 2 .
+[* 2] The same particle rendering is performed in "Unity Graphics Programming Vol.1-Chapter 5 Fluid Simulation by SPH Method".
A billboard is a simple planar object that always faces the camera. It is no exaggeration to say that most particle systems in the world are implemented by billboards. In order to implement the billboard simply, we need to make good use of the view transformation matrix.
+The view transformation matrix contains numerical information that returns the camera position and rotation to the origin. In other words, by multiplying all the objects in space by the view transformation matrix, it is transformed into a coordinate system with the camera as the origin.
+Therefore, for a billboard that has the characteristic of facing the direction of the camera, it seems that the inverse matrix of the view transformation matrix containing only the rotation information should be multiplied as the model transformation matrix. (Although it will be described later, it is difficult to understand, so the illustration is shown in Figure 2.8 .)
+ +First of all, create a Quad mesh to draw the particles. This can be easily achieved by extending one vertex to a Quad parallel to the xy plane with a geometry shader * 3 .
+[* 3] For a detailed explanation of geometry shaders, see UnityGraphicsProgramming Vol.1 "Growing grass with geometry shaders".
++Figure 2.7: Quad extension with Geometry Shader +
+If you give this quad an inverse matrix * 4 that cancels the translation component of the view transformation matrix as a model transformation matrix, you can create a quad that faces the camera on the spot.
+[* 4] Since it is known that the inverse matrix of the view transformation matrix can be simply transposed, it is implemented here by transposing.
I don't know what you're talking about, so here's an explanatory diagram.
++Figure 2.8: How the billboard works +
+Furthermore, by applying a view and projection matrix to the billboard facing the camera, it can be converted to the coordinates on the screen. The shader that implements these is shown below.
+ParticleRenderer.shader
+[maxvertexcount(4)]
+void geom(point v2g input[1], inout TriangleStream<g2f> outStream) {
+ g2f o;
+
+ float4 pos = input[0].pos;
+
+ float4x4 billboardMatrix = UNITY_MATRIX_V;
+
+ // Take out only the rotating component
+ billboardMatrix._m03 = billboardMatrix._m13 =
+ billboardMatrix._m23 = billboardMatrix._m33 = 0;
+
+ for (int x = 0; x < 2; x++) {
+ for (int y = 0; y < 2; y++) {
+ float2 uv = float2(x, y);
+ o.uv = uv;
+
+ o.pos = pos
+ + mul(transpose(billboardMatrix), float4((uv * 2 - float2(1, 1))
+ * _Scale, 0, 1));
+
+ o.pos = mul(UNITY_MATRIX_VP, o.pos);
+
+ o.id = input[0].id;
+
+ outStream.Append(o);
+ }
+ }
+
+ outStream.RestartStrip();
+}
+
+
+Let's see the result of the above simulation.
+
++Figure 2.9: Simulation results (feeling of korejanai ...) +
+Looking at the movement, all the particles are gathered in the center, which is not visually interesting. Therefore, we will add some ideas in the next section.
+ +The corresponding scene is "NBodySimulation.unity". Even if it is a device, there is only one line to change, and the tile calculation part will be cut off in the middle as follows.
+NBodySimulation.compute
+float3 computeBodyForce(Body body, uint3 groupID, uint3 threadID)
+{
+ ・・・
+
+ uint finish = _NumBodies / div; // Cut in the middle
+
+ ・・・
+
+}
+
+As a result, the calculation of the interaction is discarded in the middle without being exhausted, but as a result, it is not affected by all the particles, so some particle lumps are generated and aggregated into one point. Will never happen.
+Then, the interaction between the parts of multiple masses produces a more dynamic movement as shown in Figure 2.1 at the beginning .
+ +In this chapter, we explained the GPU implementation method of Gravitational N-Body Simulation. From small atoms to the large universe, the potential of N-Body simulation is infinite. Why don't you try to create your own universe on Unity? I hope you can help me even a little!
+ +
|
![]() |
|
This chapter introduces Screen Space Fluid Rendering by Deferred Shading as one of the particle rendering methods .
+ +Traditionally, the Martin Cubes method is used to render fluid-like continuums, but it is relatively computationally intensive and not suitable for detailed drawing in real-time applications. Therefore, a method called Screen Space Fluid Rendering was devised as a method for drawing particle-based fluids at high speed .
+
++Figure 3.1: Schematic diagram of Screen Space Fluid Rendering +
+This creates a surface from the depth of the surface of the particles in the screen space visible to the camera, as shown in Figure 3.1.
+A technique called Deferred Rendering is used to generate this surface geometry .
+ +2-dimensional screen space (screen space) in the shading (shadow calculation) is a technology to perform. For the sake of distinction, the traditional type of technique is called Forward Rendering .
+Figure 3.2 outlines the traditional Forward Rendering and Deferred Rendering rendering pipelines.
+
++Figure 3.2: Comparison of Foward Rendering and Deferred Rendering pipelines +
+In the case of Forward Rendering , lighting and shading processing are performed in the first pass of the shader, but in Deferred Rendering , 2D image information such as normal , position , depth , diffuse color required for shading is generated, and G-Buffer Store in a buffer called. In the second pass, that information is used to perform lighting and shading to obtain the final rendering result. This delays the actual rendering to the second pass (and beyond) , hence the name "Deferred" Rendering .
+The advantage of Deferred Rendering is
+The downside is
+There are some restrictions such as trade-offs, so it is necessary to consider them before making a decision.
+ +Deferred Rendering has the following usage conditions, and this sample program may not work depending on the environment. ..
+Also, Deferred Rendering is not supported when using Orthographic projection , and Forward Rendering is used when the camera's projection mode is set to Orthographic .
+ +Information about (2D texture) in screen space, such as normals , positions , and diffuse colors used for shading and lining calculations, is called G-Buffer . In the G-Buffer path of Unity's rendering pipeline , each object is rendered once and rendered into a G-Buffer texture , generating the following information by default:
+Form 3.1:
+| render target | format | data type |
|---|---|---|
| RT0 | ARGB32 | Diffuse color (RGB), Occulusion (A) |
| RT1 | ARGB32 | Specular color (RGB), Roughness (A) |
| RT2 | ARGB2101010 | World space normal (RGB) |
| RT3 | ARGB2101010 | Emission + (Ambient + Reflections + Lightmaps) |
| Z-buffer | Depth + Stencil |
These G-Buffer textures are set as global properties and can be retrieved within the shader.
+Table 3.2:
+| shader property name | data type |
|---|---|
| _CameraGBufferTexture0 | Diffuse color (RGB), occulusion (A) |
| _CameraGBufferTexture1 | Specular color (RGB) |
| _CameraGBufferTexture2 | World space normal (RGB) |
| _CameraGBufferTexture3 | Emission + (Ambient + Reflections + Lightmaps) |
| _CameraDepthTexture | Depth + Stencil |
If you open Assets / ScreenSpaceFluidRendering / Scenes / ShowGBufferTest in the sample code, you can see how this G-Buffer is acquired and displayed on the screen.
+
++Figure 3.3: G-Buffer generated by default +
+The sample program introduced in this chapter uses Unity's API called CommandBuffer .
+The drawing process itself is not performed in the method written in the script executed by the CPU . Instead, it is added to the GPU 's understandable list of rendering commands , called the graphics command buffer, and the generated command buffer is read directly by the GPU and executed to actually draw the object.
+Rendering commands provided by Unity are , for example, methods such as Graphics.DrawMesh () and Graphics.DrawProcedural () .
+Of Unity of API CommandBuffer By using, Unity of the rendering pipeline to a specific point in the command buffer (a list of the rendering commands) by inserting a, Unity of the rendering pipeline can be extended to.
+You can see some sample projects using CommandBuffer here.
+https://docs.unity3d.com/ja/current/Manual/GraphicsCommandBuffers.html
+ +In the following, we will briefly explain the 3DCG graphics pipeline and coordinate system to understand the contents of the calculations performed on the screen space.
+ +When considering a three-dimensional position vector (x, y, z), it is sometimes treated as a four-dimensional one such as (x, y, z, w), which is called Homogeneous Coordinates. .. By thinking in four dimensions in this way, you can effectively multiply 4x4 Matrix. The calculation of the coordinate transformation is basically done by multiplying the 4x4 Matrix, so the position vector is expressed in 4 dimensions like this.
+The conversion between homogeneous coordinates and non-homogeneous coordinates is done in this way. (x / w, y / w, z / w, 1) = (x, y, z, w)
+ +The coordinate system around which the object itself is central.
+ +World Space is a coordinate system that shows how multiple objects are spatially related in a scene, centered on the scene. The World Space is transformed from the Object Space by a Modeling Transform that moves, rotates, and scales the object .
+ +Eye Space is a coordinate system centered on the drawing camera and with its viewpoint as the origin. Orientation on the position and the camera of the camera, was to define the information, such as a camera focus direction of the orientation of the View Matrix According to the View Transform by making a World Space will be converted from.
+ +Clip Space , the above View Matrix other parameters of the camera defined by and defines a field of view (FOV) · aspect ratio · near clip · _far clip Projection Matrix and View Space coordinate system obtained by converting multiplying the is. This transformation is called the Projection Transform , which clips the space drawn by the camera.
+ +Coordinate value obtained by Clip Space xyz By dividing each element by w, the range is -1 <= x <= 1, -1 <= y <= 1, 0 <= z <= 1. All position coordinates are normalized. The coordinate system obtained by this is called Normalized Device Coordinates (NDC) . This transformation is called Persepective Devide, and the objects in the foreground are drawn larger and the ones in the back are drawn smaller.
+ +A coordinate system in which the normalized values obtained by Normalized Device Coordinates are converted to match the screen resolution. In the case of Direct3D, the origin is the upper left.
+Deferred Rendering calculates based on the image in this screen space, but if necessary, it calculates and uses the information of any coordinate system by multiplying it by the inverse matrix of each transformation, so this rendering pipeline is used. It's important to understand.
+Figure 3.3 illustrates the relationship between the 3DCG graphics pipeline, the coordinate system, and coordinate transformation.
+
++Figure 3.4: Coordinate system, flow of coordinate transformation +
+Of the sample code
+Assets/ScreenSpaceFluidRendering/Scenes/ScreenSpaceFluidRendering
+Please open the scene.
+ +The general algorithm for Screen Space Fluid Rendering is as follows.
+* In this sample code, even the creation of surface geometry is performed. We do not perform transparent expressions.
+ +Table 3.3:
+| Script name | function |
|---|---|
| ScreenSpaceFluidRenderer.cs | Main script |
| RenderParticleDepth.shader | Find the depth of the particle's screen space |
| BilateralFilterBlur.shader | Blur effect that attenuates with depth |
| CalcNormal.shader | Find the normal from the depth information of the screen space |
| RenderGBuffer.shader | Write depth, normals, color information, etc. to G-Buffer |
ScreenSpaceFluidRendering.cs of OnWillRenderObject Within the function, CommandBuffer to create a, at any point of the camera of rendering path CommandBuffer make the process of registering.
+Below is an excerpt of the code
+ScreenSpaceFluidRendering.cs
+// Called when the attached mesh renderer is in the camera
+void OnWillRenderObject ()
+{
+ // If it is not active, release it and do nothing after that
+ var act = gameObject.activeInHierarchy && enabled;
+ if (!act)
+ {
+ CleanUp();
+ return;
+ }
+ // If there is no camera currently rendering, do nothing after that
+ var cam = Camera.current;
+ if (!cam)
+ {
+ return;
+ }
+
+ // For the camera currently rendering
+ // If CommandBuffer is not attached
+ if (!_cameras.ContainsKey(cam))
+ {
+ // Create Command Buffer information
+ var buf = new CmdBufferInfo ();
+ buf.pass = CameraEvent.BeforeGBuffer;
+ buf.buffer = new CommandBuffer();
+ buf.name = "Screen Space Fluid Renderer";
+ // In the path on the camera's rendering pipeline before the G-Buffer was generated,
+ // Add the created CommandBuffer
+ cam.AddCommandBuffer(buf.pass, buf.buffer);
+
+ // Add a camera to the list that manages cameras with CommandBuffer added
+ _cameras.Add(cam, buf);
+ }
+
+The Camera.AddCommandBuffer (CameraEvent evt, Rendering.CommandBuffer buffer) method adds a command buffer to the camera that runs at any path . Here, CameraEvent.BeforeGBuffer in immediately before the G-Buffer is generated and specifies the location of where any command buffer by inserting the, you can generate a calculated geometry on the screen space. The added command buffer is deleted by using the RemoveCommandBuffer method when the application is executed or the object is disabled . The process of deleting the command buffer from the camera is implemented in the Cleanup function.
+Then, CommandBuffer to the rendering command will continue to register a. At that time, at the beginning of frame update, delete all buffer commands by CommandBuffer.Clear method.
+ +Generates a point sprite based on the data of the vertices of a given particle and calculates the depth texture in that screen space .
+The code is excerpted below.
+ScreenSpaceFluidRendering.cs
+// --------------------------------------------------------------------
+// 1. Draw particles as point sprites to get depth and color data
+// --------------------------------------------------------------------
+// Get the shader property ID of the depth buffer
+int depthBufferId = Shader.PropertyToID("_DepthBuffer");
+// Get a temporary RenderTexture
+buf.GetTemporaryRT (depthBufferId, -1, -1, 24,
+ FilterMode.Point, RenderTextureFormat.RFloat);
+
+// Specify color buffer and depth buffer as render targets
+buf.SetRenderTarget
+(
+ new RenderTargetIdentifier(depthBufferId), // デプス
+ new RenderTargetIdentifier (depthBufferId) // For depth writing
+);
+// Clear color buffer and depth buffer
+buf.ClearRenderTarget(true, true, Color.clear);
+
+// Set the particle size
+_renderParticleDepthMaterial.SetFloat ("_ParticleSize", _particleSize);
+// Set particle data (ComputeBuffer)
+_renderParticleDepthMaterial.SetBuffer("_ParticleDataBuffer",
+_particleControllerScript.GetParticleDataBuffer());
+
+// Draw particles as point sprites to get a depth image
+buf.DrawProcedural
+(
+ Matrix4x4.identity,
+ _renderParticleDepthMaterial,
+ 0,
+ MeshTopology.Points,
+ _particleControllerScript.GetMaxParticleNum()
+);
+
+
+RenderParticleDepth.shader
+// --------------------------------------------------------------------
+// Vertex Shader
+// --------------------------------------------------------------------
+v2g vert(uint id : SV_VertexID)
+{
+ v2g or = (v2g) 0;
+ FluidParticle fp = _ParticleDataBuffer[id];
+ o.position = float4(fp.position, 1.0);
+ return o;
+}
+
+// --------------------------------------------------------------------
+// Geometry Shader
+// --------------------------------------------------------------------
+// Position of each vertex of the point sprite
+static const float3 g_positions[4] =
+{
+ float3(-1, 1, 0),
+ float3( 1, 1, 0),
+ float3(-1,-1, 0),
+ float3( 1,-1, 0),
+};
+// UV coordinates of each vertex
+static const float2 g_texcoords[4] =
+{
+ float2(0, 1),
+ float2(1, 1),
+ float2(0, 0),
+ float2(1, 0),
+};
+
+[maxvertexcount(4)]
+void geom(point v2g In[1], inout TriangleStream<g2f> SpriteStream)
+{
+ g2f o = (g2f) 0;
+ // Position of the center vertex of the point sprite
+ float3 vertpos = In[0].position.xyz;
+ // 4 point sprites
+ [unroll]
+ for (int i = 0; i < 4; i++)
+ {
+ // Find and substitute the position of the point sprite in the clip coordinate system
+ float3 pos = g_positions[i] * _ParticleSize;
+ pos = mul(unity_CameraToWorld, pos) + vertpos;
+ o.position = UnityObjectToClipPos(float4(pos, 1.0));
+ // Substitute the UV coordinates of the point sprite vertices
+ o.uv = g_texcoords[i];
+ // Find and substitute the position of the point sprite in the viewpoint coordinate system
+ o.vpos = UnityObjectToViewPos(float4(pos, 1.0)).xyz * float3(1, 1, 1);
+ // Substitute the size of the point sprite
+ o.size = _ParticleSize;
+
+ SpriteStream.Append(o);
+ }
+ SpriteStream.RestartStrip();
+}
+
+// --------------------------------------------------------------------
+// Fragment Shader
+// --------------------------------------------------------------------
+struct fragmentOut
+{
+ float depthBuffer : SV_Target0;
+ float depthStencil : SV_Depth;
+};
+
+fragmentOut frag(g2f i)
+{
+ // Calculate normal
+ float3 N = (float3) 0;
+ N.xy = i.uv.xy * 2.0 - 1.0;
+ float radius_sq = dot(N.xy, N.xy);
+ if (radius_sq > 1.0) discard;
+ N.z = sqrt(1.0 - radius_sq);
+
+ // Pixel position in clip space
+ float4 pixelPos = float4(i.vpos.xyz + N * i.size, 1.0);
+ float4 clipSpacePos = mul(UNITY_MATRIX_P, pixelPos);
+ // depth
+ float depth = clipSpacePos.z / clipSpacePos.w; // normalization
+
+ fragmentOut o = (fragmentOut) 0;
+ o.depthBuffer = depth;
+ o.depthStencil = depth;
+
+ return o;
+}
+
+
+The C # script first generates a temporary RenderTexture for calculations in screen space . In the command buffer, use the CommandBuffer.GetTemporaryRT method to create temporary RenderTexture data and use it. In the first argument of the GetTemporaryRT method, pass the unique ID of the shader property of the buffer you want to create . In shader unique ID and is, in order to access the properties of the shader that is generated each time the game scene of Unity is executed int type unique ID in, Shader.PropertyToID the method property name can be generated by passing the I can. (Since this unique ID is different in game scenes where the execution timing is different, its value cannot be retained or shared with other applications over the network.)
+The second and third arguments of the GetTemporaryRT method specify the resolution. If -1 is specified, the resolution (Camera pixel width, height) of the camera currently rendering in the game scene will be passed.
+The fourth argument specifies the number of bits in the depth buffer. In _DepthBuffer , we also want to write the depth + stencil value, so specify a value of 0 or more.
+The resulting RenderTexture is, CommandBuffer.SetRenderTarget in the method, the render target specified, ClearRenderTarget in the method, leave the clear. If this is not done, it will be overwritten every frame and will not be drawn properly.
+The CommandBuffer.DrawProcedural method draws the particle data and calculates the color and depth textures in screen space . The figure below shows this calculation.
+
++Figure 3.5: Depth image calculation +
+The Vertex shader and Geometry shader generate point sprites that billboard in the viewpoint direction from the given particle data . The Fragment shader calculates the hemispherical normal from the UV coordinates of the point sprite and uses this to get a depth image in screen space .
+
++Figure 3.6: Depth image +
+By applying a blur effect to smooth the obtained depth image , it is possible to draw the image as if it is connected by blurring the boundary with adjacent particles. Here, a filter is used so that the offset amount of the blur effect is attenuated according to the depth.
+
++Figure 3.7: Blurred depth image +
+Calculate the normal from the blurred depth image . The normal is calculated by performing partial differentiation in the X and Y directions .
+
++Figure 3.8: Normal calculation +
+Below is an excerpt of the code
+CalcNormal.shader
+// --------------------------------------------------------------------
+// Fragment Shader
+// --------------------------------------------------------------------
+// Find the position in the viewpoint coordinate system from the UV of the screen
+float3 uvToEye(float2 uv, float z)
+{
+ float2 xyPos = uv * 2.0 - 1.0;
+ // Position in the clip coordinate system
+ float4 clipPos = float4(xyPos.xy, z, 1.0);
+ // Position in the viewpoint coordinate system
+ float4 viewPos = mul(unity_CameraInvProjection, clipPos);
+ // normalization
+ viewPos.xyz = viewPos.xyz / viewPos.w;
+
+ return viewPos.xyz;
+}
+
+// Get the depth value from the depth buffer
+float sampleDepth(float2 uv)
+{
+#if UNITY_REVERSED_Z
+ return 1.0 - tex2D(_DepthBuffer, uv).r;
+#else
+ return tex2D(_DepthBuffer, uv).r;
+#endif
+}
+
+// Get the position in the viewpoint coordinate system
+float3 getEyePos(float2 uv)
+{
+ return uvToEye(uv, sampleDepth(uv));
+}
+
+float4 frag(v2f_img i) : SV_Target
+{
+ // Convert screen coordinates to texture UV coordinates
+ float2 uv = i.uv.xy;
+ // get depth
+ float depth = tex2D(_DepthBuffer, uv);
+
+ // Discard pixels if depth is not written
+#if UNITY_REVERSED_Z
+ if (Linear01Depth(depth) > 1.0 - 1e-3)
+ discard;
+#else
+ if (Linear01Depth(depth) < 1e-3)
+ discard;
+#endif
+ // Store texel size
+ float2 ts = _DepthBuffer_TexelSize.xy;
+
+ // Find the position of the viewpoint coordinate system (as seen from the camera) from the uv coordinates of the screen
+ float3 posEye = getEyePos(uv);
+
+ // Partial differential with respect to x
+ float3 ddx = getEyePos(uv + float2(ts.x, 0.0)) - posEye;
+ float3 ddx2 = posEye - getEyePos(uv - float2(ts.x, 0.0));
+ ddx = abs(ddx.z) > abs(ddx2.z) ? ddx2 : ddx;
+
+ // Partial differential with respect to y
+ float3 ddy = getEyePos(uv + float2(0.0, ts.y)) - posEye;
+ float3 ddy2 = posEye - getEyePos(uv - float2(0.0, ts.y));
+ Dy = abs (Dy.z)> abs (Dy2.z)? dy2: should;
+
+ // Find the normal orthogonal to the vector found above from the cross product
+ float3 N = normalize(cross(ddx, ddy));
+
+ // Change the normal in relation to the camera position
+ float4x4 vm = _ViewMatrix;
+ N = normalize(mul(vm, float4(N, 0.0)));
+
+ // Convert (-1.0 to 1.0) to (0.0 to 1.0)
+ float4 col = float4(N * 0.5 + 0.5, 1.0);
+
+ return col;
+}
+
+
+
++Figure 3.9: Normal image +
+The depth image and normal image obtained by the calculation so far are written to G-Buffer . By writing just before the rendering pass where the G-Buffer is generated, the geometry based on the calculation result is generated, and shading and lighting are applied.
+Excerpt from the code
+ScreenSpaceFluidRendering.cs
+// --------------------------------------------------------------------
+// 4. Write the calculation result to G-Buffer and draw the particles
+// --------------------------------------------------------------------
+buf.SetGlobalTexture ("_NormalBuffer", normalBufferId); // Set the normal buffer
+buf.SetGlobalTexture ("_DepthBuffer", depthBufferId); // Set the depth buffer
+
+// set properties
+_renderGBufferMaterial.SetColor("_Diffuse", _diffuse );
+_renderGBufferMaterial.SetColor("_Specular",
+ new Vector4(_specular.r, _specular.g, _specular.b, 1.0f - _roughness));
+_renderGBufferMaterial.SetColor("_Emission", _emission);
+
+// Set G-Buffer to render target
+buf.SetRenderTarget
+(
+ new RenderTargetIdentifier[4]
+ {
+ BuiltinRenderTextureType.GBuffer0, // Diffuse
+ BuiltinRenderTextureType.GBuffer1, // Specular + Roughness
+ BuiltinRenderTextureType.GBuffer2, // World Normal
+ BuiltinRenderTextureType.GBuffer3 // Emission
+ },
+ BuiltinRenderTextureType.CameraTarget // Depth
+);
+// Write to G-Buffer
+buf.DrawMesh(quad, Matrix4x4.identity, _renderGBufferMaterial);
+
+
+RenderGBuffer.shader
+// GBuffer structure
+struct gbufferOut
+{
+ half4 diffuse: SV_Target0; // Diffuse reflection
+ half4 specular: SV_Target1; // specular reflection
+ half4 normal: SV_Target2; // normal
+ half4 emission: SV_Target3; // emission light
+ float depth: SV_Depth; // depth
+};
+
+sampler2D _DepthBuffer; // depth
+sampler2D _NormalBuffer;// 法線
+
+fixed4 _Diffuse; // Diffuse color
+fixed4 _Specular; // Color of specular light
+float4 _Emission; // Color of synchrotron radiation
+
+void frag(v2f i, out gbufferOut o)
+{
+ float2 uv = i.screenPos.xy * 0.5 + 0.5;
+
+ float d = tex2D(_DepthBuffer, uv).r;
+ float3 n = tex2D(_NormalBuffer, uv).xyz;
+
+#if UNITY_REVERSED_Z
+ if (Linear01Depth(d) > 1.0 - 1e-3)
+ discard;
+#else
+ if (Linear01Depth(d) < 1e-3)
+ discard;
+#endif
+
+ o.diffuse = _Diffuse;
+ o.specular = _Specular;
+ o.normal = float4(n.xyz , 1.0);
+
+ o.emission = _Emission;
+#ifndef UNITY_HDR_ON
+ o.emission = exp2(-o.emission);
+#endif
+
+ o.depth = d;
+}
+
+In the SetRenderTarget method, specify the G-Buffer to be written to the render target . First in the color buffer to be an argument to the target BuiltinRenderTextureType enumeration GBuffer0 , GBuffer1 , GBuffer2 , GBuffer3 specify a RenderTargetIdentifier sequences, also in the depth buffer to be the second argument of the target CameraTarget by specifying a default G- A set of Buffers and depth information can be specified as the render target .
+In order to perform calculations on the screen space with a shader that specifies multiple render targets in the command buffer , this is achieved here by using the DrawMesh method to draw a rectangular mesh that covers the screen.
+ +Don't forget to release the temporary RenderTexture generated by the GetTemporaryRT method with the ReleaseTemporaryRT method. If this is not done, memory will be allocated every frame and memory overflow will occur.
+ +
++Figure 3.10: Rendering result +
+This chapter focused on manipulating geometry with Deferred Shading . In this sample, as a translucent object, elements such as light absorption according to the thickness, background transmission considering internal refraction, and condensing phenomenon are not implemented. If you want to express it as a liquid, you should implement these elements as well. In order to utilize Deferred Rendering , it is necessary to understand the calculation in 3DCG rendering such as coordinate system, coordinate transformation, shading, lighting, etc. that you do not usually need to be aware of when using Unity. In the range of observation, there are not many sample codes and references for learning using Deferred Rendering in Unity, and I am still not fully understood, but I feel that it is a technology that can expand the range of CG expression. I am. We hope that this chapter will help those who have the purpose of expressing with CG that cannot be realized by conventional Forward Rendering .
+ +http://developer.download.nvidia.com/presentations/2010/gdc/Direct3D_Effects.pdf
+
|
![]() |
|
Based on the algorithm of "Cell Division and Growth Algorithm 1" * 3 , which is introduced in the tutorial of iGeo * 2 , a library for procedural modeling in the field of construction by Processing * 1 , the GPU is used for cells. Develop a program that expresses division and growth.
+The sample in this chapter is "Cellular Growth" from
https://github.com/IndieVisualLab/UnityGraphicsProgramming3
.
In this chapter, through the cell division and growth program on the GPU
+I will explain about.
+[*1] https://processing.org/
[*2] http://igeo.jp
[*3] http://igeo.jp/tutorial/55.html
++図4.1: CellularGrowthSphere.scene +
+First, I will introduce a simple implementation of only Particles, and then explain how to introduce Edge and express the network structure that grows and becomes complicated.
+ +In the simulation program, we prepare two structures, Particle and Edge, to imitate the behavior of the cell.
+One particle represents one cell and behaves as follows.
+
++Figure 4.2: Cell behavior +
+Edge expresses how cells stick to each other. By connecting the divided particles with Edge and attracting them like a spring, the particles are attached to each other to express the network structure of cells.
+
++Figure 4.3: Edge sticks connected particles together +
+In this section, we will explain by gradually implementing the necessary functions.
+ +First, we will explain the behavior and implementation of particles through the sample CellularGrowthParticleOnly.cs that implements only the behavior of particles.
+The structure of Particles is defined as follows.
+Particle.cs
+[StructLayout(LayoutKind.Sequential)]
+public struct Particle_t {
+ public Vector2 position; // position
+ public Vector2 velocity; // velocity
+ float radius; // size
+ float threshold; // maximum size
+ int links; // Number of connected Edges (used in the scene below)
+ uint alive; // activation flag
+}
+
+In this project, Particles are increased or decreased at any time, so the object pool is managed by Append / ConsumeStructuredBuffer so that the number of objects can be controlled on the GPU.
+ +Append / ConsumeStructuredBuffer * 4 * 5 is a container for performing LIFO (Last In First Out) on the GPU made available from Direct3D11. AppendStructuredBuffer is responsible for adding data, and ConsumeStructuredBuffer is responsible for retrieving data.
+By using this container, you can dynamically control the number on the GPU and express the increase or decrease of objects.
+[*4] https://docs.microsoft.com/ja-jp/windows/desktop/direct3dhlsl/sm5-object-appendstructuredbuffer
[*5] https://docs.microsoft.com/ja-jp/windows/desktop/direct3dhlsl/sm5-object-consumestructuredbuffer
First, initialize the particle buffer and the object pool buffer.
+CellularGrowthParticleOnly.cs
+protected void Start () {
+ // Particle initialization
+ particleBuffer = new PingPongBuffer(count, typeof(Particle_t));
+
+ // Initialize the object pool
+ poolBuffer = new ComputeBuffer(
+ count,
+ Marshal.SizeOf(typeof(int)),
+ ComputeBufferType.Append
+ );
+ poolBuffer.SetCounterValue(0);
+ countBuffer = new ComputeBuffer(
+ 4,
+ Marshal.SizeOf(typeof(int)),
+ ComputeBufferType.IndirectArguments
+ );
+ countBuffer.SetData(countArgs);
+
+ // Object pool that manages divisible objects
+ dividablePoolBuffer = new ComputeBuffer(
+ count,
+ Marshal.SizeOf(typeof(int)),
+ ComputeBufferType.Append
+ );
+ dividablePoolBuffer.SetCounterValue (0);
+
+ // Particle and object pool initialization Kernel execution (see below)
+ InitParticlesKernel();
+
+ ...
+}
+
+The PingPongBuffer class used as particleBuffer prepares two buffers, one for reading and the other for writing, and it is used in the scene of calculating the interaction of Particles described later.
+poolBuffer and divideablePoolBuffer are Append / ConsumeStructuredBuffer, and ComputeBufferType.Append is specified in the argument ComputeBufferType at the time of initialization. Append / ConsumeStructuredBuffer can handle variable length data, but as you can see from the initialization code, the upper limit of the number of data must be set when creating the buffer.
+The poolBuffer created as an int type Append / ConsumeStructuredBuffer is
+It functions as an object pool according to the flow. In other words, the int buffer of poolBuffer always points to the index of inactive Particles, and can be made to function as an object pool by fetching it as needed. ( Fig. 4.4 )
+
++Figure 4.4: The array on the left represents particleBuffer and the right represents poolBuffer.In the initial state, all particles in particleBuffer are inactive, but when particles appear, the index of the inactive Particle is taken out from poolBuffer and the corresponding index is displayed. Activate the particles in the area +
+countBuffer is an int type buffer and is used to manage the number of object pools.
+The InitParticlesKernel called at the end of Start runs the GPU kernel that initializes the Particles and object pool.
+CellularGrowthParticleOnly.cs
+protected void InitParticlesKernel()
+{
+ var kernel = compute.FindKernel("InitParticles");
+ compute.SetBuffer(kernel, "_Particles", particleBuffer.Read);
+
+ // Specify the object pool as AppendStructuredBuffer
+ compute.SetBuffer(kernel, "_ParticlePoolAppend", poolBuffer);
+
+ Dispatch1D(kernel, count);
+}
+
+The following is the kernel to be initialized.
+CellularGrowth.compute
+THREAD
+void InitParticles(uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+
+ uint count, strides;
+ _Particles.GetDimensions(count, strides);
+ if (idx >= count)
+ return;
+
+ // Particle initialization
+ Particle p = create();
+ p.alive = false; // Inactivate all Particles
+ _Particles[idx] = p;
+
+ // Add particle index to object pool
+ _ParticlePoolAppend.Append(idx);
+}
+
+By executing the above kernel, all the particles in the particleBuffer will be initialized and inactive, and the poolBuffer will store the indexes of all the particles in the inactive state.
+ +Now that we have initialized the particles, let's make them appear. In CellularGrowthParticleOnly.cs, particles are generated at the position where the mouse is clicked.
+CellularGrowthParticleOnly.cs
+protected void Update() {
+ ...
+ if(Input.GetMouseButton(0))
+ {
+ EmitParticlesKernel(GetMousePoint());
+ }
+ ...
+}
+
+When the mouse is clicked, it runs the EmitParticlesKernel to spawn particles.
+CellularGrowthParticleOnly.cs
+protected void EmitParticlesKernel(Vector2 point, int emitCount = 32)
+{
+ // Compare the number of object pools with emitCount,
+ // Prevent _ParticlePoolConsume.Consume () from running when the object pool is empty
+ emitCount = Mathf.Max(
+ 0,
+ Mathf.Min (emitCount, CopyPoolSize (poolBuffer))
+ );
+ if (emitCount <= 0) return;
+
+ var kernel = compute.FindKernel("EmitParticles");
+ compute.SetBuffer(kernel, "_Particles", particleBuffer.Read);
+
+ // Specify the object pool as ConsumeStructuredBuffer
+ compute.SetBuffer(kernel, "_ParticlePoolConsume", poolBuffer);
+
+ compute.SetVector("_Point", point);
+ compute.SetInt("_EmitCount", emitCount);
+
+ Dispatch1D(kernel, emitCount);
+}
+
+As you can see from the fact that the poolBuffer specified in the _ParticlePoolAppend parameter in InitParticlesKernel is specified in the _ParticlePoolConsume parameter in EmitParticlesKernel, the same buffer is specified in Append / ConsumeStructuredBuffer.
+Depending on the purpose of processing on the GPU, just changing the setting of whether to add a buffer (AppendStructuredBuffer) or to retrieve (ConsumeStructuredBuffer), the same buffer is sent to the GPU side from the CPU side. Become.
+At the beginning of EmitParticlesKernel, we compare the size of the object pool obtained by emitCount and GetPoolSize, but this is to prevent index retrieval from the pool when the object pool is empty, if it is an empty object. Attempting to retrieve more indexes from the pool (running _ParticlePoolConsume.Consume inside the GPU kernel) results in unexpected behavior.
+CellularGrowth.compute
+THREAD
+void EmitParticles(uint3 id : SV_DispatchThreadID)
+{
+ // Avoid adding more Particles than _EmitCount
+ if (id.x >= (uint) _EmitCount)
+ return;
+
+ // Extract the index of the inactive Particle from the object pool
+ uint idx = _ParticlePoolConsume.Consume();
+
+ Particle c = create();
+
+ // Place the Particle at a position slightly offset from the mouse position
+ float2 offset = random_point_on_circle(id.xx + float2(0, _Time));
+ c.position = _Point.xy + offset;
+ c.radius = nrand(id.xx + float2(_Time, 0));
+
+ // Set the activated Particle to the inactive index location
+ _Particles[idx] = c;
+}
+
+In Emit Particles, the index of the inactive particle is taken out from the object pool, and the activated particle is set at the position of the corresponding index in the particle Buffer.
+By the above kernel processing, particles can be spawned while considering the number of object pools.
+ +Now that we have managed the appearance of particles, it's time to program the behavior of particles.
+The cells of the simulator developed in this chapter behave as follows, as shown in Figure 4.2 .
+Growth and Repulsion are executed every frame in Update.
+CellularGrowthParticleOnly.cs
+protected void Update() {
+ ...
+ UpdateParticlesKernel();
+ ...
+}
+...
+protected void UpdateParticlesKernel()
+{
+ var kernel = compute.FindKernel("UpdateParticles");
+
+ // Set the read buffer
+ compute.SetBuffer(kernel, "_ParticlesRead", particleBuffer.Read);
+
+ // Set a buffer for writing
+ compute.SetBuffer(kernel, "_Particles", particleBuffer.Write);
+
+ compute.SetFloat ("_Drag", drag); // Speed attenuation
+ compute.SetFloat ("_Limit", limit); // Speed limit
+ compute.SetFloat ("_Repulsion", repulsion); // Coefficient over repulsive distance
+ compute.SetFloat("_Grow", grow); // growth rate
+
+ Dispatch1D(kernel, count);
+
+ // Swap read and write buffers (Ping Pong)
+ particleBuffer.Swap();
+}
+
+The reason for setting the read and write buffers and swapping the buffers after processing will be described later.
+Below is the Update Particles kernel.
+CelluarGrowth.compute
+THREAD
+void UpdateParticles(uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+
+ uint count, strides;
+ _ParticlesRead.GetDimensions(count, strides);
+ if (idx >= count)
+ return;
+
+ Particle p = _ParticlesRead[idx];
+
+ // Process only activated Particles
+ if (p.alive)
+ {
+ // Grow: Particle growth
+ p.radius = min(p.threshold, p.radius + _DT * _Grow);
+
+ // Repulsion: Collisions between Particles
+ for (uint i = 0; i < count; i++)
+ {
+ Particle other = _ParticlesRead[i];
+ if(i == idx || !other.alive) continue;
+
+ // Calculate the distance between particles
+ float2 dir = p.position - other.position;
+ float l = length(dir);
+
+ // The distance between the particles is greater than the sum of their radii * _Repulsion
+ // If they are close, they are in conflict
+ float r = (p.radius + other.radius) * _Repulsion;
+ if (l < r)
+ {
+ p.velocity += normalize(dir) * (r - l);
+ }
+ }
+
+ float2 vel = p.velocity * _DT;
+ float vl = length(vel);
+ // check if velocity length over than zero to avoid NaN position
+ if (vl > 0)
+ {
+ p.position + = normalize (vel) * min (vl, _Limit);
+
+ // Attenuate velocity according to _Drag parameter
+ p.velocity =
+ normalize(p.velocity) *
+ min (
+ length(p.velocity) * _Drag,
+ _Limit
+ );
+ }
+ else
+ {
+ p.velocity = float2(0, 0);
+ }
+ }
+
+ _Particles[idx] = p;
+}
+
+The UpdateParticles kernel uses a read buffer (_ParticlesRead) and a write buffer (_Particles) to calculate collisions between particles.
+If the same buffer is used for both reading and writing here, there is a possibility that another thread will use the particle information after being updated by another thread for particle position calculation due to GPU parallel processing. Will appear, and a problem (data race) will occur in which the calculation is inconsistent.
+If one thread does not refer to the information updated by another thread, it is not necessary to prepare separate buffers for reading and writing, but if the thread refers to the buffer updated by another thread. Like the UpdateParticles kernel, it needs to have separate read and write buffers, which alternate with each update. (It is called Ping Pong buffer because it alternates buffers after each process.)
+ +Particle splitting is performed by coroutines at regular intervals.
+Particle splitting process
+It is done in the flow.
+CellularGrowthParticleOnly.cs
+protected void Start() {
+ ...
+ StartCoroutine(IDivider());
+}
+
+...
+
+protected IEnumerator IDivider()
+{
+ yield return 0;
+ while(true)
+ {
+ yield return new WaitForSeconds(divideInterval);
+ Divide();
+ }
+}
+
+protected void Divide() {
+ GetDividableParticlesKernel();
+ DivideParticlesKernel(maxDivideCount);
+}
+
+...
+
+// Store divisible particle candidates in dividablePoolBuffer
+protected void GetDividableParticlesKernel()
+{
+ // Reset dividablePoolBuffer
+ dividablePoolBuffer.SetCounterValue (0);
+
+ var kernel = compute.FindKernel("GetDividableParticles");
+ compute.SetBuffer(kernel, "_Particles", particleBuffer.Read);
+ compute.SetBuffer(kernel, "_DividablePoolAppend", dividablePoolBuffer);
+
+ Dispatch1D(kernel, count);
+}
+
+protected void DivideParticlesKernel(int maxDivideCount = 16)
+{
+ // With the number you want to split (maxDivideCount)
+ // Compare the number of particles that can be split (the size of the dividable PoolBuffer)
+ maxDivideCount = Mathf.Min (
+ CopyPoolSize (dividablePoolBuffer),
+ maxDivideCount
+ );
+
+ // With the number you want to split (maxDivideCount)
+ // Compare the number of particles remaining in the object pool (poolBuffer size)
+ maxDivideCount = Mathf.Min (CopyPoolSize (poolBuffer), maxDivideCount);
+
+ if (maxDivideCount <= 0) return;
+
+ var kernel = compute.FindKernel("DivideParticles");
+ compute.SetBuffer(kernel, "_Particles", particleBuffer.Read);
+ compute.SetBuffer(kernel, "_ParticlePoolConsume", poolBuffer);
+ compute.SetBuffer(kernel, "_DividablePoolConsume", dividablePoolBuffer);
+ compute.SetInt("_DivideCount", maxDivideCount);
+
+ Dispatch1D(kernel, count);
+}
+
+The GetDividableParticles kernel adds divisible particles (active particles) to the dividablePoolBuffer, and uses that buffer to determine the number of times to execute the DivideParticles kernel that actually performs the split processing.
+How to find the number of splits is as shown at the beginning of the DivideParticlesKernel function.
+Compare with. Comparing these numbers prevents the split process from running beyond the limit of the number of splits that can be split.
+The following is the contents of the kernel.
+CellularGrowth.compute
+// Function that determines the candidate particles that can be split
+// You can adjust the split pattern by changing the conditions here
+bool dividable_particle(Particle p, uint idx)
+{
+ // Split according to growth rate
+ float rate = (p.radius / p.threshold);
+ return rate >= 0.95;
+
+ // Randomly split
+ // return nrand(float2(idx, _Time)) < 0.1;
+}
+
+// Function that splits particles
+uint divide_particle(uint idx, float2 offset)
+{
+ Particle parent = _Particles[idx];
+ Particle child = create();
+
+ // Set the size in half
+ float rh = parent.radius * 0.5;
+ rh = max(rh, 0.1);
+ parent.radius = child.radius = rh;
+
+ // Shift the position of parent and child
+ float2 center = parent.position;
+ parent.position = center - offset;
+ child.position = center + offset;
+
+ // Randomly set the maximum size of the child
+ float x = nrand(float2(_Time, idx));
+ child.threshold = rh * lerp(1.25, 2.0, x);
+
+ // Get the child index from the object pool and set the child particle in the buffer
+ uint cidx = _ParticlePoolConsume.Consume();
+ _Particles[cidx] = child;
+
+ // Update parent particle
+ _Particles[idx] = parent;
+
+ return cidx;
+}
+
+uint divide_particle(uint idx)
+{
+ Particle parent = _Particles[idx];
+
+ // Randomly shift the position
+ float2 offset =
+ random_point_on_circle(float2(idx, _Time)) *
+ parent.radius * 0.25;
+
+ return divide_particle(idx, offset);
+}
+
+...
+
+THREAD
+void GetDividableParticles(uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+ uint count, strides;
+ _Particles.GetDimensions(count, strides);
+ if (idx >= count)
+ return;
+
+ Particle p = _Particles[idx];
+ if (p.alive && dividable_particle(p, idx))
+ {
+ _DividablePoolAppend.Append(idx);
+ }
+}
+
+THREAD
+void DivideParticles(uint3 id : SV_DispatchThreadID)
+{
+ if (id.x >= _DivideCount)
+ return;
+
+ uint idx = _DividablePoolConsume.Consume();
+ divide_particle(idx);
+}
+
+The results of cell division achieved by these processes are as follows.
+
++図4.5: CellularGrowthParticleOnly.scene +
+In order to realize how cells stick to each other, we will introduce Edge that connects particles and express cells in a network structure.
+From here, we will proceed through the implementation of CellularGrowth.cs.
+Edges are added when the particles split, connecting the split particles together.
+The structure of Edge is defined as follows.
+Edge.cs
+[StructLayout(LayoutKind.Sequential)]
+public struct Edge_t
+{
+ public int a, b; // Index of two Particles connected by Edge
+ public Vector2 force; // The force to attach two Particles together
+ uint alive; // activation flag
+}
+
+Edge also increases or decreases like Particle, so manage it with Append / ConsumeStructuredBuffer.
+ +The network structure is divided according to the following flow.
+It is the Particle that actually splits, but the term "splittable Edge" here is convenient when processing the Edge that is connected to the Particle that splits from the split pattern that will be introduced later. For good reason, the network structure is split in Edge units.
+The above-mentioned flow of division allows one particle to repeat division and generate a large network structure.
+Edge splitting is performed by coroutines at regular intervals, similar to CellularGrowthParticleOnly.cs in the previous section.
+CellularGrowth.cs
+protected IEnumerator IDivider()
+{
+ yield return 0;
+ while(true)
+ {
+ yield return new WaitForSeconds(divideInterval);
+ Divide();
+ }
+}
+
+protected void Divide()
+{
+ // 1. Get divisible Edge candidates and store them in the divideablePoolBuffer
+ GetDividableEdgesKernel();
+
+ int dividableEdgesCount = CopyPoolSize (dividablePoolBuffer);
+ if(dividableEdgesCount == 0)
+ {
+ // 2. If the splittable Edge is empty,
+ // Split a Particle with 0 connected Edges (Particle with 0 links) and split it.
+ // Connect two Particles with Edge
+ DivideUnconnectedParticles();
+ } else
+ {
+ // 3. If there is a splittable Edge, take the Edge from the divideablePoolBuffer and split it.
+ // Execute Edge split according to split pattern (described later)
+ switch(pattern)
+ {
+ case DividePattern.Closed:
+ // Patterns that generate closed network structures
+ DivideEdgesClosedKernel(
+ dividableEdgesCount,
+ maxDivideCount
+ );
+ break;
+ case DividePattern.Branch:
+ // Branching pattern
+ DivideEdgesBranchKernel(
+ dividableEdgesCount,
+ maxDivideCount
+ );
+ break;
+ }
+ }
+}
+
+...
+
+protected void GetDividableEdgesKernel()
+{
+ // Reset the buffer that stores the splittable Edge
+ dividablePoolBuffer.SetCounterValue (0);
+
+ var kernel = compute.FindKernel("GetDividableEdges");
+ compute.SetBuffer(
+ kernel, "_Particles",
+ particlePool.ObjectPingPong.Read
+ );
+ compute.SetBuffer(kernel, "_Edges", edgePool.ObjectBuffer);
+ compute.SetBuffer(kernel, "_DividablePoolAppend", dividablePoolBuffer);
+
+ // Maximum number of particle connections
+ compute.SetInt("_MaxLink", maxLink);
+
+ Dispatch1D(kernel, count);
+}
+
+...
+
+protected void DivideUnconnectedParticles()
+{
+ var kernel = compute.FindKernel("DivideUnconnectedParticles");
+ compute.SetBuffer(
+ kernel, "_Particles",
+ particlePool.ObjectPingPong.Read
+ );
+ compute.SetBuffer(
+ kernel, "_ParticlePoolConsume",
+ particlePool.PoolBuffer
+ );
+ compute.SetBuffer(kernel, "_Edges", edgePool.ObjectBuffer);
+ compute.SetBuffer(kernel, "_EdgePoolConsume", edgePool.PoolBuffer);
+
+ Dispatch1D(kernel, count);
+}
+
+The kernels (GetDividableEdges) for getting divisible edges are:
+CellularGrowth.compute
+// Determine if it can be split
+bool dividable_edge(Edge e, uint idx)
+{
+ Particle pa = _Particles[e.a];
+ Particle pb = _Particles[e.b];
+
+ // The number of particle connections does not exceed the maximum number of connections (_MaxLink)
+ // Allow splitting if the splitting conditions defined in dividable_particle are met
+ return
+ !(pa.links >= _MaxLink && pb.links >= _MaxLink) &&
+ (dividable_particle(pa, e.a) && dividable_particle(pb, e.b));
+}
+
+...
+
+// Get a splittable Edge
+THREAD
+void GetDividableEdges(uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+ uint count, strides;
+ _Edges.GetDimensions(count, strides);
+ if (idx >= count)
+ return;
+
+ Edge e = _Edges[idx];
+ if (e.alive && dividable_edge(e, idx))
+ {
+ _DividablePoolAppend.Append(idx);
+ }
+}
+
+If there is no splittable Edge, run the following kernel (Divide Disconnected Particles) that splits the following connected Edgeless Particles.
+CellularGrowth.compute
+// A function that creates an Edge that connects Particles with index a and Particles b
+void connect(int a, int b)
+{
+ // Fetch the inactive Edge index from the Edge object pool
+ uint eidx = _EdgePoolConsume.Consume();
+
+ // Using Atomic operation (described later)
+ // Increment the number of connections for each particle
+ InterlockedAdd(_Particles[a].links, 1);
+ InterlockedAdd(_Particles[b].links, 1);
+
+ Edge e;
+ ea = a;
+ e.b = b;
+ e.force = float2(0, 0);
+ e.alive = true;
+ _Edges[eidx] = e;
+}
+
+...
+
+// Split a Particle that does not have a connected Edge
+THREAD
+void DivideUnconnectedParticles(uint3 id : SV_DispatchThreadID)
+{
+ uint count, stride;
+ _Particles.GetDimensions(count, stride);
+ if (id.x >= count)
+ return;
+
+ uint idx = id.x;
+ Particle parent = _Particles[idx];
+ if (!parent.alive || parent.links > 0)
+ return;
+
+ // Generate a split child Particle from a parent Particle
+ uint cidx = divide_particle(idx);
+
+ // Connect parent and child particles with Edge
+ connect(idx, cidx);
+}
+
+The connect function, which creates an Edge that connects split particles, uses a technique called Atomic operation to increment the number of particle connections.
+When a thread performs a series of processes of reading, modifying, and writing data in global memory or shared memory, the value changes due to writing from other threads to the memory area during the process. You may want to prevent it from happening. (A phenomenon called data race (data race), in which the result changes depending on the order in which threads access memory, which is peculiar to parallel processing)
+Atomic arithmetic guarantees this, preventing interference from other threads during resource arithmetic operations (four arithmetic operations and comparisons), and safely realizing sequential processing on the GPU.
+In HLSL, the functions * 6 that perform these operations have a prefix called Interlocked, and the examples in this chapter use InterlockedAdd.
+The InterlockedAdd function is the process of adding the integer specified in the second argument to the resource specified in the first argument, and increments the number of connections by adding 1 to _Particles [index] .links.
+This allows you to manage the number of connections consistently between threads, and you can increase or decrease the number of connections consistently.
+[*6] https://docs.microsoft.com/ja-jp/windows/desktop/direct3d11/direct3d-11-advanced-stages-cs-atomic-functions
If there is a splittable Edge, remove the Edge from the divideablePoolBuffer and split it. As you can see from the enum parameter called DividePattern, various patterns can be applied to the division.
+Here, we introduce a split pattern (DividePattern.Closed) that creates a closed network structure.
+ +The pattern that creates a closed network structure splits as shown in the figure below.
+
++Figure 4.6: Pattern that creates a closed network structure (DividePattern.Closed) +
+CellularGrowth.cs
+protected void DivideEdgesClosedKernel(
+ int dividableEdgesCount,
+ int maxDivideCount = 16
+)
+{
+ // Pattern that splits into a closed network structure
+ var kernel = compute.FindKernel("DivideEdgesClosed");
+ DivideEdgesKernel(kernel, dividableEdgesCount, maxDivideCount);
+}
+
+// Common processing in split patterns
+protected void DivideEdgesKernel(
+ int kernel,
+ int dividableEdgesCount,
+ int maxDivideCount
+)
+{
+ // Prevent Consume from being called when the object pool is empty
+ // Compare maxDivideCount with the size of each object pool
+ maxDivideCount = Mathf.Min(dividableEdgesCount, maxDivideCount);
+ maxDivideCount = Mathf.Min(particlePool.CopyPoolSize(), maxDivideCount);
+ maxDivideCount = Mathf.Min(edgePool.CopyPoolSize(), maxDivideCount);
+ if (maxDivideCount <= 0) return;
+
+ compute.SetBuffer(
+ kernel, "_Particles",
+ particlePool.ObjectPingPong.Read
+ );
+ compute.SetBuffer(
+ kernel, "_ParticlePoolConsume",
+ particlePool.PoolBuffer
+ );
+ compute.SetBuffer(kernel, "_Edges", edgePool.ObjectBuffer);
+ compute.SetBuffer(kernel, "_EdgePoolConsume", edgePool.PoolBuffer);
+
+ compute.SetBuffer(kernel, "_DividablePoolConsume", dividablePoolBuffer);
+ compute.SetInt("_DivideCount", maxDivideCount);
+
+ Dispatch1D(kernel, maxDivideCount);
+}
+
+The function divide_edge_closed used in the GPU kernel (DivideEdgesClosed) that generates a closed network structure changes the processing according to the number of Edges that the Particle has.
+If the number of connections of one of the particles is 1, connect them with Edge so as to draw a triangle with 3 particles added to the split particles. ( Fig. 4.7 )
+
++Figure 4.7: Two particles and split particles form a closed network in a triangular shape. +
+In other cases, the Edge is connected so that the split particle is inserted between the two existing particles, and the Edge that was connected to the split source particle is converted to maintain a closed network. I will. ( Fig. 4.8 )
+
++Figure 4.8: Insert a split particle between two existing particles and adjust the Edge connectivity to maintain a closed network. +
+By repeating this division process, a closed network structure grows.
+CellularGrowth.compute
+// A function that performs a split into a closed network structure
+void divide_edge_closed(uint idx)
+{
+ Edge e = _Edges[idx];
+
+ Particle pa = _Particles[e.a];
+ Particle pb = _Particles[e.b];
+
+ if ((pa.links == 1) || (pb.links == 1))
+ {
+ // Divide into a triangle with 3 particles and connect them with Edge
+ uint cidx = divide_particle(e.a);
+ connect(e.a, cidx);
+ connect(cidx, e.b);
+ }
+ else
+ {
+ // Generate a Particle between two Particles and
+ // Connect Edges so that they are connected
+ float2 dir = pb.position - pa.position;
+ float2 offset = normalize(dir) * pa.radius * 0.25;
+ uint cidx = divide_particle(e.a, offset);
+
+ // Connect the parent particle and the split child particle
+ connect(e.a, cidx);
+
+ // Edge that connected the original two Particles,
+ // Convert to Edge connecting split child Particles
+ InterlockedAdd(_Particles[e.a].links, -1);
+ InterlockedAdd(_Particles[cidx].links, 1);
+ ea = cidx;
+ }
+
+ _Edges[idx] = e;
+}
+
+...
+
+// Pattern that splits into a closed network structure
+THREAD
+void DivideEdgesClosed(uint3 id : SV_DispatchThreadID)
+{
+ if (id.x >= _DivideCount)
+ return;
+
+ // Get the index of the splittable Edge
+ uint idx = _DividablePoolConsume.Consume();
+ divide_edge_closed(idx);
+}
+
+Many naturally occurring cells have the property of sticking to other cells. To mimic these properties, Edge pulls two connected particles together like a spring.
+Inquiries about springs are realized.
+CellularGrowth.cs
+protected void Update() {
+ ...
+ UpdateEdgesKernel();
+ SpringEdgesKernel ();
+ ...
+}
+
+...
+
+protected void UpdateEdgesKernel()
+{
+ // Calculate the force that the spring attracts for each Edge
+ var kernel = compute.FindKernel("UpdateEdges");
+ compute.SetBuffer(
+ kernel, "_Particles",
+ particlePool.ObjectPingPong.Read
+ );
+ compute.SetBuffer(kernel, "_Edges", edgePool.ObjectBuffer);
+ compute.SetFloat("_Spring", spring);
+
+ Dispatch1D(kernel, count);
+}
+
+protected void SpringEdgesKernel()
+{
+ // Apply the spring force of Edge for each particle
+ var kernel = compute.FindKernel("SpringEdges");
+ compute.SetBuffer(
+ kernel, "_Particles",
+ particlePool.ObjectPingPong.Read
+ );
+ compute.SetBuffer(kernel, "_Edges", edgePool.ObjectBuffer);
+
+ Dispatch1D(kernel, count);
+}
+
+The following is the contents of the kernel.
+CellularGrowth.compute
+THREAD
+void UpdateEdges(uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+ uint count, strides;
+ _Edges.GetDimensions(count, strides);
+ if (idx >= count)
+ return;
+
+ Edge e = _Edges[idx];
+
+ // Initialize the attractive force
+ e.force = float2(0, 0);
+
+ if (!e.alive)
+ {
+ _Edges[idx] = e;
+ return;
+ }
+
+ Particle pa = _Particles[e.a];
+ Particle pb = _Particles[e.b];
+ if (!pa.alive || !pb.alive)
+ {
+ _Edges[idx] = e;
+ return;
+ }
+
+ // Measure the distance between the two Particles,
+ // Apply force to attract if you are too far away or too close
+ float2 dir = pa.position - pb.position;
+ float r = pa.radius + pb.radius;
+ float len = length(dir);
+ if (abs(len - r) > 0)
+ {
+ // Apply force to the proper distance (sum of radii of each other)
+ float l = ((len - r) / r);
+ float2 f = normalize(dir) * l * _Spring;
+ e.force = f;
+ }
+
+ _Edges[idx] = e;
+}
+
+THREAD
+void SpringEdges(uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+ uint count, strides;
+ _Particles.GetDimensions(count, strides);
+ if (idx >= count)
+ return;
+
+ Particle p = _Particles[idx];
+ if (!p.alive || p.links <= 0)
+ return;
+
+ // The more connections you have, the weaker your attraction
+ float dif = 1.0 / p.links;
+
+ int iidx = (int)idx;
+
+ _Edges.GetDimensions(count, strides);
+
+ // Find the Particles that are connected to you from all Edges
+ for (uint i = 0; i < count; i++)
+ {
+ Edge e = _Edges[i];
+ if (!e.alive)
+ continue;
+
+ // Apply force when you find a connected Edge
+ if (e.a == iidx)
+ {
+ p.velocity -= e.force * dif;
+ }
+ else if (e.b == iidx)
+ {
+ p.velocity += e.force * dif;
+ }
+ }
+
+ _Particles[idx] = p;
+}
+
+By the above processing, it is possible to express how the cells composed of the network grow.
+ +Various division patterns can be designed by adjusting the judgment of the edge to be divided (dividable_edge function) and the division logic.
+In the sample project CellularGrowth.cs, the split pattern can be switched by the enum parameter.
+ +In the branching pattern, the division is performed as shown in Figure 4.9 below .
+Split child Particles connect only to the parent Particle. A branched network grows just by repeating this.
+
++Figure 4.9: Branching split pattern +
+CellularGrowth.cs
+protected void DivideEdgesBranchKernel(
+ int dividableEdgesCount,
+ int maxDivideCount = 16
+)
+{
+ // Execute a branching split pattern
+ var kernel = compute.FindKernel("DivideEdgesBranch");
+ DivideEdgesKernel(kernel, dividableEdgesCount, maxDivideCount);
+}
+
+CellularGrowth.compute
+// Function that performs branching
+void divide_edge_branch(uint idx)
+{
+ Edge e = _Edges[idx];
+ Particle pa = _Particles[e.a];
+ Particle pb = _Particles[e.b];
+
+ // Get the Particle index with the smaller number of connections
+ uint i = lerp(e.b, e.a, step(pa.links, pb.links));
+
+ uint cidx = divide_particle(i);
+ connect(i, cidx);
+}
+
+...
+
+// Branching split pattern
+THREAD
+void DivideEdgesBranch(uint3 id : SV_DispatchThreadID)
+{
+ if (id.x >= _DivideCount)
+ return;
+
+ // Get the index of the splittable Edge
+ uint idx = _DividablePoolConsume.Consume();
+ divide_edge_branch(idx);
+}
+
+In a branching pattern, the logic that determines which edges are split has a significant visual impact. You can control the degree of branching by changing the value of the maximum number of connections (_MaxLink) of the Particles referenced in the dividable_edge function.
+
++Figure 4.10: Pattern with _MaxLink set to 2 (DividePattern.Branch) +
+
++Figure 4.11: Pattern with _MaxLink set to 3 (Divide Pattern.Branch) +
+
++Figure 4.12: A pattern in which _MaxLink was set to 3 to grow to some extent and then set to 2 to continue growing (Divide Pattern.Branch). +
+In this chapter, we introduced a program that simulates cell division and growth on the GPU.
+Other attempts to generate CG with such cells as motifs include the Morphogenetic Creations project by Andy Lomas * 7 and the Computational Biology project by JAKaandorp * 8 for academic purposes , especially the latter in biology. We are doing a more realistic simulation based on it.
+Also, Max Cooper's music video * 10 by Maxime Causeret * 9 is an example of a wonderful video work using organic motifs such as cells. (Houdini is used for the simulation part in this video work)
+This time, it was limited to those that split and grow in two dimensions, but as shown in the original iGeo tutorial * 12 , this program can also be extended in three dimensions.
+In the extension to three dimensions, it is also possible to realize a mesh that grows organically with Gniguni by using a cell network that consists of three cells and grows and spreads. Samples of 3D extensions are available at https://github.com/mattatz/CellularGrowth, so if you are interested, please refer to them.
+[*7] http://www.andylomas.com/
[*8] https://staff.fnwi.uva.nl/j.a.kaandorp/research.html
[*9] http://teresuac.fr/
[*10] https://vimeo.com/196269431
[*11] https://www.sidefx.com/
[*12] http://igeo.jp/tutorial/56.html
|
![]() |
|
In nature, there are various patterns such as horizontal stripes of tropical fish and wrinkles like a maze of coral. The genius mathematician Alan Turing expressed the occurrence of these patterns that exist in nature with mathematical formulas. The pattern generated by the mathematical formulas he derived is called the "Turing pattern". This equation is commonly referred to as the reaction-diffusion equation. Based on this reaction-diffusion equation, we will develop a program to create a picture like a pattern of a living thing using Compute Shader on Unity. At first, we will create a program that operates on a two-dimensional plane, but at the end, we will also introduce a program that operates on a three-dimensional space. For details on ComputeShader, refer to "Chapter 2 Introduction to ComputeShader" in UnityGraphicsProgramming vol.1.
+The sample in this chapter is "Reaction Diffusion" from
https://github.com/IndieVisualLab/UnityGraphicsProgramming3
.
As the name suggests, Reaction Diffusion is a local chemical reaction in which the concentrations of one or more substances distributed in a space change with each other, and space. It is a mathematical model of how diffusion, which spreads throughout the whole, changes due to the influence of two processes. This time, we will use the "Gray-Scott model" as the reaction-diffusion equation. The Gray-Scott model was published in a treatise by P. Gray and SKScott in 1983. Roughly speaking, when two virtual substances, U and V, are filled in the grid, they react with each other to increase or decrease or diffuse, and the concentration in the space changes over time. Various patterns will appear as you go.
+Figure 5.1 is a schematic diagram of the “Reaction” of the Gray-Scott model.
+
++Figure 5.1: Schematic diagram of the "Reaction" of the Gray-Scott model +
+Also, as shown in Figure 5.2 , U and V spread to the adjacent grid at different speeds.
+
++Figure 5.2: Schematic diagram of "Diffusion" in the Gray-Scott model +
+This difference in diffusion rate creates a difference in U and V concentrations, creating a pattern. The reaction and diffusion of these U and V are expressed by the following equations.
+\frac{\partial u}{\partial t} = Du \Delta u - uv^2 + f_{(1-u)}
+
+\frac{\partial v}{\partial t} = Dv \Delta v + uv^2 - (f_{}+k)
+
+In this formula, U is represented by u and V is represented by v . The formula is roughly divided into three.
The first Du \ Delta u and Dv \ Delta v are called the diffusion terms, and the first Du and Dv are constants of the diffusion rate of u and v . The latter half, \ Delta u and \ Delta v, are called Laplacian, and represent the process of diffusion in the direction of eliminating the concentration difference between the surroundings of U and V.
The second is called the reaction term, and uv ^ 2 indicates that U decreases and V increases by reacting with one U and two V.
The third + f_ {(1-u)} is called the inflow term and represents the amount to be replenished (Feed) when U decreases. The closer it is to 0, the more it is replenished, and the closer it is to 1, the more it is replenished. lose. -(f_ {} + k) is called the outflow term, which means that the increased V is reduced by a certain number (Kill).
To summarize a little more simply, U decreases and V increases in response to one U and two Vs. At this rate, U will continue to decrease and V will continue to increase, so U will be replenished by + f_ {(1-u)} and V will be forcibly decreased by- (f_ {} + k). It has become. Then, U and V are diffused to the surroundings by Du \ Delta u and Dv \ Delta v .
+ +Now that I have somehow understood the atmosphere of the equation, I will move on to the explanation of the implementation in Unity. The sample scene whose operation can be confirmed is ReactionDiffusion2D_1 .
+ +Suppose you have U and V density values in a two-dimensional planar grid. This time, we will use Compute Shader to process in parallel, so we will manage the grid with Compute Buffer. First, define the structure in one grid.
+ReactionDiffusion2D.cs
+public struct RDData
+{
+ public float u; // U concentration
+ public float v; // V concentration
+}
+
+ReactionDiffusion2D.cs
+/// <summary>
+/// Initialization
+/// </summary>
+void Initialize()
+{
+ ...
+
+ int wh = texWidth * texHeight; // Buffer size
+ buffers = new ComputeBuffer [2]; // Array initialization of ComputeBuffer for double buffering
+
+ for (int i = 0; i < buffers.Length; i++)
+ {
+ // Grid initialization
+ buffers[i] = new ComputeBuffer(wh, Marshal.SizeOf(typeof(RDData)));
+ }
+
+ // Grid array for reset
+ bufData = new RDData[wh];
+ bufData2 = new RDData[wh];
+
+ // Buffer initialization
+ ResetBuffer();
+
+ // Initialize the Seed addition buffer
+ inputData = new Vector2[inputMax];
+ inputIndex = 0;
+ inputBuffer = new ComputeBuffer(
+ inputMax, Marshal.SizeOf(typeof(Vector2))
+ );
+}
+
+The buffers of ComputeBuffer for updating are two-dimensional arrays, but there are two for reading and writing. Because the Compute Shader is multi-threaded and processed in parallel. When processing that changes the calculation result by referring to the surrounding grid like this time, if it is one buffer, the calculation result will be referred to the value of the grid that has been calculated earlier depending on the order of the threads to be processed. It will change. In order to prevent it, it is divided into two parts, one for reading and the other for writing.
+ +ReactionDiffusion2D.cs
+// Update process
+void UpdateBuffer()
+{
+ cs.SetInt("_TexWidth", texWidth);
+ cs.SetInt("_TexHeight", texHeight);
+ cs.SetFloat("_DU", du);
+ cs.SetFloat("_DV", dv);
+
+ cs.SetFloat("_Feed", feed);
+ cs.SetFloat("_K", kill);
+
+ cs.SetBuffer(kernelUpdate, "_BufferRead", buffers[0]);
+ cs.SetBuffer(kernelUpdate, "_BufferWrite", buffers[1]);
+ cs.Dispatch(kernelUpdate,
+ Mathf.CeilToInt((float)texWidth / THREAD_NUM_X),
+ Mathf.CeilToInt((float)texHeight / THREAD_NUM_X),
+ 1);
+
+ SwapBuffer();
+}
+
+In the source on the C # side, the parameters that were also in the above equation are passed to the Compute Shader for update processing. Next, the update process in Compute Shader is explained.
+ReactionDiffusion2D.compute
+// Update process
+[numthreads(THREAD_NUM_X, THREAD_NUM_X, 1)]
+void Update(uint3 id : SV_DispatchThreadID)
+{
+
+ int idx = GetIndex(id.x, id.y);
+ float u = _BufferRead[idx].u;
+ float v = _BufferRead[idx].v;
+ float uvv = u * v * v;
+ float f, k;
+
+ f = _Feed;
+ k = _K;
+
+ _BufferWrite[idx].u = saturate(
+ u + (_DU * LaplaceU(id.x, id.y) - uvv + f * (1.0 - u))
+ );
+ _BufferWrite[idx].v = saturate(
+ v + (_DV * LaplaceV (id.x, id.y) + uvv - (k + f) * v)
+ );
+}
+
+The calculation is exactly the same as the above equation. GetIndex () is a function for associating 2D grid coordinates with 1D ComputeBuffer index.
+ReactionDiffusion2D.compute
+// Buffer index calculation
+int GetIndex(int x, int y) {
+ x = (x < 0) ? x + _TexWidth : x;
+ x = (x >= _TexWidth) ? x - _TexWidth : x;
+
+ y = (y <0)? y + _TexHeight: y;
+ y = (y> = _TexHeight)? y - _TexHeight: y;
+
+ return y * _TexWidth + x;
+}
+
+_BufferRead contains the calculation result one frame before. Extract u and v from there. LaplaceU and LaplaceV are Laplacian functions that collect the U and V concentrations of 8 squares around your grid. This will average the surrounding grid and density. The diagonal grid is adjusted to have a low degree of influence.
+ReactionDiffusion2D.compute
+// Laplacian function of U
+float LaplaceU(int x, int y) {
+ float sumU = 0;
+
+ for (int i = 0; i < 9; i++) {
+ int2 pos = laplaceIndex[i];
+ int idx = GetIndex(x + pos.x, y + pos.y);
+ sumU += _BufferRead[idx].u * laplacePower[i];
+ }
+
+ return sumU;
+}
+
+// Laplacian function of V
+float LaplaceV(int x, int y) {
+ float sumV = 0;
+
+ for (int i = 0; i < 9; i++) {
+ int2 pos = laplaceIndex[i];
+ int idx = GetIndex(x + pos.x, y + pos.y);
+ sumV += _BufferRead[idx].v * laplacePower[i];
+ }
+
+ return sumV;
+}
+
+After calculating u and v, write to _BufferWrite. saturate is insurance for clipping between 0 and 1.
+ +By pressing the A key and C key, the function to intentionally add the density difference between U and V to the grid is provided. Press the A key to place SeedNum points (Seeds) at random positions. Press the C key to place one point in the center.
+ReactionDiffusion2D.cs
+/// <summary>
+/// Add Seed
+/// </summary>
+/// <param name="x"></param>
+/// <param name="y"></param>
+void AddSeed(int x, int y)
+{
+ if (inputIndex < inputMax)
+ {
+ inputData[inputIndex].x = x;
+ inputData [inputIndex] .y = y;
+ inputIndex++;
+ }
+}
+
+The inputData array stores the coordinates of the points on the grid.
+ReactionDiffusion2D.cs
+/// <summary>
+/// Pass the Seed array to the Compute Shader
+/// </summary>
+void AddSeedBuffer()
+{
+ if (inputIndex > 0)
+ {
+ inputBuffer.SetData(inputData);
+ cs.SetInt("_InputNum", inputIndex);
+ cs.SetInt("_TexWidth", texWidth);
+ cs.SetInt("_TexHeight", texHeight);
+ cs.SetInt("_SeedSize", seedSize);
+ cs.SetBuffer(kernelAddSeed, "_InputBufferRead", inputBuffer);
+ cs.SetBuffer(kernelAddSeed, "_BufferWrite", buffers[0]); // update前なので0
+ cs.Dispatch(kernelAddSeed,
+ Mathf.CeilToInt((float)inputIndex / (float)THREAD_NUM_X),
+ 1,
+ 1);
+ inputIndex = 0;
+ }
+}
+
+inputBuffer in, set the inputeData array coordinates entered in the previous point, you pass to ComputeShader.
+ReactionDiffusion2D.compute
+// Add seed
+[numthreads(THREAD_NUM_X, 1, 1)]
+void AddSeed(uint id : SV_DispatchThreadID)
+{
+ if (_InputNum <= id) return;
+
+ int w = _SeedSize;
+ int h = _SeedSize;
+ float radius = _SeedSize * 0.5;
+
+ int centerX = _InputBufferRead[id].x;
+ int centerY = _InputBufferRead[id].y;
+ int startX = _InputBufferRead[id].x - w / 2;
+ int startY = _InputBufferRead[id].y - h / 2;
+ for (int x = 0; x < w; x++)
+ {
+ for (int y = 0; y < h; y++)
+ {
+ float dis = distance(
+ float2(centerX, centerY),
+ float2(startX + x, startY + y)
+ );
+ if (dis <= radius) {
+ _BufferWrite[GetIndex((centerX + x), (centerY + y))].v = 1;
+ }
+ }
+ }
+}
+
+The value of v is set to 1 so that it becomes a circle around the coordinates of the inputBuffer passed from C #.
+ +Since the updated grid is just an array, write it to RenderTexture for visualization and make it an image. Write the density difference between u and v in RenderTexture.
First, create a Render Texture. Since the only information to be written to one pixel is the density difference, set RenderTextureFormat to RFloat. RenderTextureFormat.RFloat is a RenderTexture format that can write information for one float per pixel.
ReactionDiffusion2D.cs
+/// <summary>
+/// Create Render Texture
+/// </summary>
+/// <param name="width"></param>
+/// <param name="height"></param>
+/// <returns></returns>
+RenderTexture CreateRenderTexture(int width, int height)
+{
+ RenderTexture tex = new RenderTexture(width, height, 0,
+ RenderTextureFormat.RFloat,
+ RenderTextureReadWrite.Linear);
+ tex.enableRandomWrite = true;
+ tex.filterMode = FilterMode.Bilinear;
+ tex.wrapMode = TextureWrapMode.Repeat;
+ tex.Create();
+
+ return tex;
+}
+
+Next, it is the process on the C # side that passes RenderTexture to ComputeShader and writes it.
+ReactionDiffusion2D.cs
+/// <summary>
+/// Write the result of Reaction Diffusion to the texture
+/// </summary>
+void DrawTexture()
+{
+ cs.SetInt("_TexWidth", texWidth);
+ cs.SetInt("_TexHeight", texHeight);
+ cs.SetBuffer(kernelDraw, "_BufferRead", buffers[0]);
+ cs.SetTexture(kernelDraw, "_HeightMap", resultTexture);
+ cs.Dispatch(kernelDraw,
+ Mathf.CeilToInt((float)texWidth / THREAD_NUM_X),
+ Mathf.CeilToInt((float)texHeight / THREAD_NUM_X),
+ 1);
+}
+
+This is the processing on the Compute Shader side, the density difference between u and v is obtained from the buffer of the grid and written to the texture.
+ReactionDiffusion2D.compute
+// Value calculation for texture writing
+float GetValue(int x, int y) {
+ int idx = GetIndex(x, y);
+ float u = _BufferRead[idx].u;
+ float v = _BufferRead[idx].v;
+ return 1 - clamp(u - v, 0, 1);
+}
+
+...
+
+// Draw on texture
+[numthreads(THREAD_NUM_X, THREAD_NUM_X, 1)]
+void Draw(uint3 id : SV_DispatchThreadID)
+{
+ float c = GetValue(id.x, id.y);
+
+ // height map
+ _HeightMap[id.xy] = c;
+
+}
+
+The normal Unlit Shader is modified and the two colors are interpolated based on the brightness of the texture created in the previous section.
+ReactionDiffusion2D.cs
+/// <summary>
+/// Material update
+/// </summary>
+void UpdateMaterial()
+{
+ material.SetTexture("_MainTex", resultTexture);
+
+ material.SetColor("_Color0", bottomColor);
+ material.SetColor("_Color1", topColor);
+}
+
+ReactionDiffusion2D.shader
+fixed4 frag (v2f i) : SV_Target
+{
+ // sample the texture
+ fixed4 col = lerp(_Color0, _Color1, tex2D(_MainTex, i.uv).r);
+ return col;
+}
+
+When executed, a creature-like pattern should spread on the screen.
+
++Figure 5.3: Simulation +
+Just by changing the Feed and Kill parameters a little, various patterns emerge. Here are some parameter combinations.
+Feed:0.037 / Kill:0.06
+
++Figure 5.4: Coral-like pattern +
+Feed:0.03 / Kill:0.062
+
++Figure 5.5: Crushed pattern +
+Feed:0.0263 / Kill:0.06
+
++Figure 5.6: Crushing seems to repeat disappearance and division +
+Feed:0.077 / Kill:0.0615
+
++Figure 5.7: A pattern that stretches straight and avoids collision +
+Feed:0.039 / Kill:0.058
+
++Figure 5.8: Plump hole pattern +
+Feed:0.026 / Kill:0.051
+
++Figure 5.9: It seems that it is always undulating and unstable +
+Feed:0.014 / Kill:0.0477
+
++Figure 5.10: Pattern that continues to spread like ripples +
+Here, I will introduce a sample that expresses the beautiful texture unique to Unity using Surface Shader. The sample scene whose operation can be confirmed is ReactionDiffusion2D_2 .
+ +The process of ReactionDiffusion itself is the same as the normal version, but when creating the texture for drawing, a normal map is also created to give a three-dimensional effect. Also, the resulting texture was RenderTextureFormat.RFloat, but since the normal map stores the normal vector in the XYZ directions, it is created with RenderTextureFormat.ARGBFloat.
+ReactionDiffusion2DForStandard.cs
+void Initialize()
+{
+ ...
+ heightMapTexture = CreateRenderTexture(texWidth, texHeight,
+ RenderTextureFormat.RFloat); // Create texture for height map
+ normalMapTexture = CreateRenderTexture(texWidth, texHeight,
+ RenderTextureFormat.ARGBFloat); // Create texture for normal map map
+ ...
+}
+
+/// <summary>
+/// Create RenderTexture
+/// </summary>
+/// <param name="width"></param>
+/// <param name="height"></param>
+/// <param name="texFormat"></param>
+/// <returns></returns>
+RenderTexture CreateRenderTexture(
+ int width,
+ int height,
+ RenderTextureFormat texFormat)
+{
+ RenderTexture tex = new RenderTexture(width, height, 0,
+ texFormat, RenderTextureReadWrite.Linear);
+ tex.enableRandomWrite = true;
+ tex.filterMode = FilterMode.Bilinear;
+ tex.wrapMode = TextureWrapMode.Repeat;
+ tex.Create();
+
+ return tex;
+}
+
+...
+
+void DrawTexture()
+{
+ ...
+ cs.SetTexture(kernelDraw, "_HeightMap", heightMapTexture);
+ cs.SetTexture (kernelDraw, "_NormalMap", normalMapTexture); // Texture set for normal map
+ cs.Dispatch(kernelDraw,
+ Mathf.CeilToInt((float)texWidth / THREAD_NUM_X),
+ Mathf.CeilToInt((float)texHeight / THREAD_NUM_X),
+ 1);
+}
+
+In ComputeShader, the slope is calculated from the density difference with the surrounding grid and written to the texture for the normal map.
+ReactionDiffusion2DStandard.compute
+float3 GetNormal(int x, int y) {
+ float3 normal = float3(0, 0, 0);
+ float c = GetValue(x, y);
+ normal.x = ((GetValue(x - 1, y) - c) - (GetValue(x + 1, y) - c));
+ normal.y = ((GetValue(x, y - 1) - c) - (GetValue(x, y + 1) - c));
+ normal.z = 1;
+ normal = normalize(normal) * 0.5 + 0.5;
+ return normal;
+}
+
+...
+
+// Draw on texture
+[numthreads(THREAD_NUM_X, THREAD_NUM_X, 1)]
+void Draw(uint3 id : SV_DispatchThreadID)
+{
+ float c = GetValue(id.x, id.y);
+
+ // height map
+ _HeightMap[id.xy] = c;
+
+ // normal map
+ _NormalMap[id.xy] = float4(GetNormal(id.x, id.y), 1);
+}
+
+Pass the two created textures to the Surface Shader and draw the pattern. Surface Shader is a shader wrapped for easy use of Unity's physics-based rendering, just assign the necessary data to the SurfaceOutputStandard structure in the surf function and output it, and it will automatically light it. ..
+Definition of SurfaceOutputStandard structure
+struct SurfaceOutputStandard
+{
+ fixed3 Albedo; // Base (diffuse or specular) color
+ fixed3 Normal; // normal
+ half3 Emission; // Emission color
+ half Metallic; // 0 = non-metal, 1 = metal
+ half Smoothness; // 0 = coarse, 1 = smooth
+ half Occlusion; // Occlusion (default 1)
+ fixed Alpha; // Transparency alpha
+};
+
+ReactionDiffusion2DStandard.shader
+void surf(Input IN, inout SurfaceOutputStandard o) {
+
+ float2 uv = IN.uv_MainTex;
+
+ // Get concentration
+ half v0 = tex2D(_MainTex, uv).x;
+
+ // Normal acquisition
+ float3 norm = UnpackNormal(tex2D(_NormalTex, uv));
+
+ // Get the value of the boundary between A and B
+ half p = smoothstep(_Threshold, _Threshold + _Fading, v0);
+
+ o.Albedo = lerp (_Color0.rgb, _Color1.rgb, p); // Base color
+ o.Alpha = lerp (_Color0.a, _Color1.a, p); // Alpha value
+ o.Smoothness = lerp (_Smoothness0, _Smoothness1, p); // Smoothness
+ o.Metallic = lerp (_Metallic0, _Metallic1, p); // Metallic
+ o.Normal = normalize(float3(norm.x, norm.y, 1 - _NormalStrength)); // 法線
+
+ o.Emission = lerp (_Emit0 * _EmitInt0, _Emit1 * _EmitInt1, p) .rgb; //⁇
+}
+
+Use Unity's built-in function unpackNormal to get the normals from the normal map. In addition, various colors and textures of Surface Output Standard are set from the ratio of density difference .
+When you run it, you should see something like the following.
+
++Figure 5.11: Surface Shader version +
+The normal map creates a three-dimensional effect. In addition, although it is not known in monochrome, the gloss of the RGB 3-color point light in the scene is also expressed.
+ +Let's extend Reaction Diffusion, which used to be a simulation on a two-dimensional plane, to three dimensions. The basic flow is the same as for 2D, but since the dimension is increased by 1, the method of creating RenderTexture and ComputeBuffer and the method of Laplace operation are slightly different. The sample scene whose operation can be confirmed is ReactionDiffusion3D .
+ +Some initialization processing is added to change the Render Texture to which the density difference is written from 2D to 3D.
+ReactionDiffusion3D.cs
+RenderTexture CreateTexture(int width, int height, int depth)
+{
+ RenderTexture tex = new RenderTexture(width, height, 0,
+ RenderTextureFormat.RFloat, RenderTextureReadWrite.Linear);
+ tex.volumeDepth = depth;
+ tex.enableRandomWrite = true;
+ tex.dimension = UnityEngine.Rendering.TextureDimension.Tex3D;
+ tex.filterMode = FilterMode.Bilinear;
+ tex.wrapMode = TextureWrapMode.Repeat;
+ tex.Create();
+
+ return tex;
+}
+
+First, put the depth in the Z direction in tex.volumeDepth. Then I put UnityEngine.Rendering.TextureDimension.Tex3D in tex.dimension. This is a setting to specify that RenderTexture is a 3D volume texture. The Render Texture is now a 3D volume texture. Similarly, the Compute Buffer that stores the Reaction Diffusion simulation results is also made three-dimensional. This simply secures the size of width x height x depth.
+ReactionDiffusion3D.cs
+void Initialize()
+{
+ ...
+ int whd = texWidth * texHeight * texDepth;
+ buffers = new ComputeBuffer[2];
+ ...
+ for (int i = 0; i < buffers.Length; i++)
+ {
+ buffers[i] = new ComputeBuffer(whd, Marshal.SizeOf(typeof(RDData)));
+ }
+ ...
+}
+
+Next is the change on the Compute Shader side. First, in order to RenderTexture for the writing of the result is a three-dimensional, ComputeShader of the side of the texture definition RWTexture2D <float> from RWTexture3D <float> will change to.
+ReactionDiffusion3D.compute
+RWTexture3D <float> _HeightMap; // Heightmap ++
Next is the three-dimensionalization of the Laplacian function. It has been changed to refer to a total of 27 squares of 3x3x3. By the way, the degree of influence of laplacePower is a value that was somehow calculated.
+ReactionDiffusion3D.compute
+// Surrounding index calculation table referenced by the Laplacian function
+static const int3 laplaceIndex[27] = {
+ int3 (-1, -1, -1), int3 (0, -1, -1), int3 (1, -1, -1),
+ int3 (-1, 0, -1), int3 (0, 0, -1), int3 (1, 0, -1),
+ int3 (-1, 1, -1), int3 (0, 1, -1), int3 (1, 1, -1),
+
+ int3 (-1, -1, 0), int3 (0, -1, 0), int3 (1, -1, 0),
+ int3 (-1, 0, 0), int3 (0, 0, 0), int3 (1, 0, 0),
+ int3 (-1, 1, 0), int3 (0, 1, 0), int3 (1, 1, 0),
+
+ int3 (-1, -1, 1), int3 (0, -1, 1), int3 (1, -1, 1),
+ int3 (-1, 0, 1), int3 (0, 0, 1), int3 (1, 0, 1),
+ int3 (-1, 1, 1), int3 (0, 1, 1), int3 (1, 1, 1),
+};
+
+// Impact of the grid around the Laplacian
+static const float laplacePower[27] = {
+ 0.02, 0.02, 0.02,
+ 0.02, 0.1, 0.02,
+ 0.02, 0.02, 0.02,
+
+ 0.02, 0.1, 0.02,
+ 0.1, -1.0, 0.1,
+ 0.02, 0.1, 0.02,
+
+ 0.02, 0.02, 0.02,
+ 0.02, 0.1, 0.02,
+ 0.02, 0.02, 0.02
+};
+
+// Buffer index calculation
+int GetIndex(int x, int y, int z) {
+ x = (x < 0) ? x + _TexWidth : x;
+ x = (x >= _TexWidth) ? x - _TexWidth : x;
+
+ y = (y <0)? y + _TexHeight: y;
+ y = (y> = _TexHeight)? y - _TexHeight: y;
+
+ z = (z <0)? z + _TexDepth: z;
+ z = (z> = _TexDepth)? z - _TexDepth: z;
+
+ return z * _TexWidth * _TexHeight + y * _TexWidth + x;
+}
+
+// Laplacian function of U
+float LaplaceU(int x, int y, int z) {
+ float sumU = 0;
+
+ for (int i = 0; i < 27; i++) {
+ int3 pos = laplaceIndex[i];
+
+ int idx = GetIndex(x + pos.x, y + pos.y, z + pos.z);
+ sumU += _BufferRead[idx].u * laplacePower[i];
+ }
+ return sumU;
+}
+
+// Laplacian function of V
+float LaplaceV(int x, int y, int z) {
+ float sumV = 0;
+
+ for (int i = 0; i < 27; i++) {
+ int3 pos = laplaceIndex[i];
+ int idx = GetIndex(x + pos.x, y + pos.y, z + pos.z);
+ sumV += _BufferRead[idx].v * laplacePower[i];
+ }
+ return sumV;
+}
+
+Since the Render Texture of the simulation result is a 3D volume texture, even if you paste the texture on the Unlit Shader or Surface Shader as before, it will not be displayed normally. In the sample, polygons are generated and drawn using a method called the Marching cubes method, but due to space limitations, the explanation of implementation will be omitted. For an explanation of the Marching Cubes method, please refer to "Chapter 7 Introduction to the Marching Cubes Method in an Atmosphere" in Unity Graphics Programming Vol.1. Another method is to draw with volume rendering using ray marching. A very easy-to-understand implementation * 1 is introduced on Mr. Utsu's blog, so please refer to it.
+[* 1] Dent Tips http://tips.hecomi.com/entry/2018/01/05/192332
++Figure 5.12: 3D Reaction Diffusion +
+I introduced how to make a creature-like pattern using the Gray-Scott model. You can create a completely different pattern by just changing the parameters of Feed and Kill, so be careful as the time will pass quickly if you get absorbed in it (* There are individual differences)
Also, for works using Reaction Diffusion , Nakama Kouhei's "DIFFUSION" * 2 and Kitahara Nobutaka's "Reaction-Diffusion" * 3 . Would you like to be obsessed with the mysterious charm of Reaction Diffusion?
[*2] DIFFUSION https://vimeo.com/145251635
[*3] Reaction-Diffusion https://vimeo.com/176261480
|
![]() |
|
In this chapter, we will develop a visualization of a phenomenon called "Strange Attractor" that shows non-linear chaotic behavior by a differential equation or difference equation with a specific state using Unity and GPU arithmetic.
The sample in this chapter is "Strange Attractors" from
https://github.com/IndieVisualLab/UnityGraphicsProgramming3
.
A state in which the motion of a dissipative system (energy non-conservation, an unbalanced system with a specific input and opening) maintains a stable orbit over time is called an "Attractor".
Among them, the one that shows a chaotic behavior by amplifying the slight difference in the initial state with the passage of time is called "Strange Attractor".
In this chapter, I would like to take up " * 1 Lorenz Attractor" and " * 2 Thomas' Cyclically Symmetric Attractor" as the subjects .
+ +Do you know the phenomenon called the butterfly effect? This is a word derived from the title of a lecture given by meteorologist Edward N Lorenz at the American Association for the Advancement of Science in 1972, " * 3 Does the flapping of a single Brazilian butterfly cause a tornado in Texas?"
This term describes a phenomenon in which slight differences in initial values do not always produce mathematically similar results, but are amplified chaotically and behave unpredictably.
Lorenz, who pointed out this mathematical property, published "Lorenz Attractor" in 1963.
+
++Figure 6.1: Initial state of Lorenz attractor +
+
++Figure 6.2: Mid-term Lorenz attractor +
+[*1] Lorenz, E. N.: Deterministic Nonperiodic Flow, Journal of Atmospheric Sciences, Vol.20, pp.130-141, 1963.
[*2] Thomas, René(1999). "Deterministic chaos seen in terms of feedback circuits: Analysis, synthesis, 'labyrinth chaos'". Int. J. Bifurcation and Chaos. 9 (10): 1889–1905.
[*3] http://eaps4.mit.edu/research/Lorenz/Butterfly_1972.pdf
The Lorenz equation is represented by the following nonlinear ODE.
+By setting p = 10, r = 28, b = 8/3 in each variable of p, r, and b in the above equation, it will behave chaotically as "Strange Attractor".
+ +Now let's implement the Lorenz equation with a compute shader. First, define the structure you want to calculate in the compute shader.
StrangeAttractor.cs
+protected struct Params
+{
+ Vector3 emitPos;
+ Vector3 position;
+ Vector3 velocity; // xyz = velocity, w = velocity coef;
+ float life;
+ Vector2 size; // x = current size, y = target size.
+ Vector4 color;
+
+ public Params(Vector3 emitPos, float size, Color color)
+ {
+ this.emitPos = emitPos;
+ this.position = Vector3.zero;
+ this.velocity = Vector3.zero;
+ this.life = 0;
+ this.size = new Vector2(0, size);
+ this.color = color;
+ }
+}
+
+Since this structure will be used universally in multiple Strange Attractors in the future, it is defined in the abstract class StrangeAttractor.cs.
Next, the Compute Buffer is initialized.
LorenzAttrator.cs
+protected sealed override void InitializeComputeBuffer()
+{
+ if (cBuffer != null) cBuffer.Release();
+
+ cBuffer = new ComputeBuffer(instanceCount, Marshal.SizeOf(typeof(Params)));
+ Params[] parameters = new Params[cBuffer.count];
+ for (int i = 0; i < instanceCount; i++)
+ {
+ var normalize = (float)i / instanceCount;
+ var color = gradient.Evaluate(normalize);
+ parameters[i] = new Params(Random.insideUnitSphere *
+ emitterSize * normalize, particleSize, color);
+ }
+ cBuffer.SetData(parameters);
+}
+
+The abstract method InitializeComputeBuffer defined in the abstract class StrangeAttractor.cs is implemented in LorenzAttrator.cs.
Since I want to adjust the gradation, emitter size, and particle size in Unity's inspector, initialize the Params structure with the gradient, emitterSize, and particleSize exposed in the inspector, and setData to the ComputeBuffer variable, cBuffer.
This time, I want to apply the velocity vector by delaying it little by little in the order of particle id, so I add the gradation color in order of particle id.
In "Strange Attractor", the initial position is greatly related to the subsequent behavior depending on the thing, so I would like you to try various initial positions, but in this sample, the sphere is the initial shape.
Then pass the LorenzAttrator variables p, r, b to the compute shader.
LorenzAttrator.cs
+[SerializeField, Tooltip("Default is 10")]
+float p = 10f;
+[SerializeField, Tooltip("Default is 28")]
+float r = 28f;
+[SerializeField, Tooltip("Default is 8/3")]
+float b = 2.666667f;
+
+private int pId, rId, bId;
+private string pProp = "p", rProp = "r", bProp = "b";
+
+protected override void InitializeShaderUniforms()
+{
+ pId = Shader.PropertyToID(pProp);
+ rId = Shader.PropertyToID(rProp);
+ bId = Shader.PropertyToID(bProp);
+}
+
+protected override void UpdateShaderUniforms()
+{
+ computeShaderInstance.SetFloat(pId, p);
+ computeShaderInstance.SetFloat(rId, r);
+ computeShaderInstance.SetFloat(bId, b);
+}
+
+Next, initialize the state of particles at the time of emission on the compute shader side.
LorenzAttractor.compute
+#pragma kernel Emit
+#pragma kernel Iterator
+
+#define THREAD_X 128
+#define THREAD_Y 1
+#define THREAD_Z 1
+#define DT 0.022
+
+struct Params
+{
+ float3 emitPos;
+ float3 position;
+ float3 velocity; //xyz = velocity
+ float life;
+ float2 size; // x = current size, y = target size.
+ float4 color;
+};
+
+RWStructuredBuffer<Params> buf;
+
+[numthreads(THREAD_X, THREAD_Y, THREAD_Z)]
+void Emit(uint id : SV_DispatchThreadID)
+{
+ Params p = buf[id];
+ p.life = (float)id * -1e-05;
+ p.position = p.emitPos;
+ p.size.x = 0.0;
+ buf[id] = p;
+}
+
+Initialization is performed by the Emit method. p.life manages the time since the generation of particles, and provides a small delay for each id at the time of the initial value.
This is to easily prevent the particles from drawing the same trajectory all at once. Also, since the gradation color is set for each id, it is useful for making the color look beautiful.
Here, the particle size p.size is set to 0 at the initial stage, but this is to make the particles invisible at the moment of occurrence to make the balloon natural.
Next, let's look at the iteration part.
LorenzAttractor.compute
+#define DT 0.022
+
+// Lorenz Attractor parameters
+float p;
+float r;
+float b;
+
+// The arithmetic part of the Lorenz equation.
+float3 LorenzAttractor(float3 pos)
+{
+ float dxdt = (p * (pos.y - pos.x));
+ float dydt = (pos.x * (r - pos.z) - pos.y);
+ float dzdt = (pos.x * pos.y - b * pos.z);
+ return float3(dxdt, dydt, dzdt) * DT;
+}
+
+[numthreads(THREAD_X, THREAD_Y, THREAD_Z)]
+void Iterator(uint id : SV_DispatchThreadID)
+{
+ Params p = buf[id];
+ p.life.x += DT;
+ // Clamp the vector length of the velocity vector from 0 to 1 and multiply it by the size to make the start look natural.
+ p.size.x = p.size.y * saturate(length(p.velocity));
+ if (p.life.x > 0)
+ {
+ p.velocity = LorenzAttractor(p.position);
+ p.position += p.velocity;
+ }
+ buf[id] = p;
+}
+
+The LorenzAttractor method above is the arithmetic part of the "Lorenz equation". The velocity vector of x, y, z with a small amount of delta time is calculated, and finally the delta time is multiplied to derive the amount of movement.
From experience, when performing derivative operations related to the shape in the compute shader, it is better to use a fixed value delta time independent of the frame rate difference instead of sending the delta time from Unity to maintain a stable shape.
This is because if the frame rate drops too much, the value of Unity's Time.deltaTime may become too large for differential operations. The larger the delta width, the rougher the calculation result will be compared to the previous one.
Another reason is that, depending on the equation, the "Strange Attractor" may completely converge or diverge infinitely depending on how the delta time is taken.
For these two reasons, DT is using the predefined ones this time.
Next, I would like to implement the "Thomas' Cyclically Symmetric Attractor" announced by biologist René Thomas.
It is not affected by the initial value, it becomes stable over time, and the shape is very unique.
++Figure 6.3: Thomas' Cyclically Symmetric Attractor Stable Period +
+The equation is represented by the following nonlinear ODE.
+In the variable b of the above equation, if it is set as b \ simeq 0.208186, it behaves chaotically as "Strange Attractor", and if it is set as b \ simeq 0, it floats in space.
+ +Now let's implement the "Thomas' Cyclically Symmetric equation" with a compute shader.
Since there is a part in common with the implementation of "Lorenz Attractor" mentioned above, the parameter structure and procedural part are inherited and only the necessary part is taken up.
First, override the color and initial position on the CPU side.
ThomasAttractor.cs
+protected sealed override void InitializeComputeBuffer()
+{
+ if (cBuffer != null) cBuffer.Release();
+
+ cBuffer = new ComputeBuffer(instanceCount, Marshal.SizeOf(typeof(Params)));
+ Params[] parameters = new Params[cBuffer.count];
+ for (int i = 0; i < instanceCount; i++)
+ {
+ var normalize = (float)i / instanceCount;
+ var color = gradient.Evaluate(normalize);
+ parameters[i] = new Params(Random.insideUnitSphere *
+ emitterSize * normalize, particleSize, color);
+ }
+ cBuffer.SetData(parameters);
+}
+
+This time, in order to make the colors look beautiful, the initial position is a sphere with a mantle-like gradation color from the inside to the outside.
Next, let's look at the compute shader methods at the time of emission and iteration.
ThomasAttractor.compute
+//Thomas Attractor parameters
+float b;
+
+float3 ThomasAttractor(float3 pos)
+{
+ float dxdt = -b * pos.x + sin(pos.y);
+ float dydt = -b * pos.y + sin(pos.z);
+ float dzdt = -b * pos.z + sin(pos.x);
+ return float3(dxdt, dydt, dzdt) * DT;
+}
+
+[numthreads(THREAD_X, THREAD_Y, THREAD_Z)]
+void Emit(uint id : SV_DispatchThreadID)
+{
+ Params p = buf[id];
+ p.life = (float)id * -1e-05;
+ p.position = p.emitPos;
+ p.size.x = p.size.y;
+ buf[id] = p;
+}
+
+[numthreads(THREAD_X, THREAD_Y, THREAD_Z)]
+void Iterator(uint id : SV_DispatchThreadID)
+{
+ Params p = buf[id];
+ p.life.x += DT;
+ if (p.life.x > 0)
+ {
+ p.velocity = ThomasAttractor(p.position);
+ p.position += p.velocity;
+ }
+ buf[id] = p;
+}
+
+The ThomasAttractor method becomes the arithmetic part of the "Thomas' Cyclically Symmetric equation".
Also, unlike LorenzAttrator, the implementation at Emit is set from the initial size to the target size because I want to show the initial position this time.
Others have almost the same implementation.
In this chapter, we introduced an example of implementing "Strange Attractor" on GPU using a compute shader.
There are various types of "Strange Attractor", and even in the implementation, it shows chaotic behavior with relatively few operations, so it may be a useful accent in graphics as well.
There are many other types, such as a two-dimensional motion called " * 4 Ueda Attractor" and a spin motion like " * 5 Aizawa Attractor", so if you are interested, please try it.
[*4] http://www-lab23.kuee.kyoto-u.ac.jp/ueda/Kambe-Bishop_ver3-1.pdf
[*5] http://www.algosome.com/articles/aizawa-attractor-chaos.html
|
![]() |
|
Do you know the game Portal * 1 ? A puzzle action game released by Valve in 2007. It is a god game. The feature is a hole called a portal, and you can see the scenery on the other side as if the two holes are connected by a worm home, and you can warp through objects and your character. The point is Anywhere Door. You can set up a portal on a flat surface with a gun called a portal gun, and use this to advance the game. This chapter is an article that tried to implement the function of this portal in Unity while being simple. The sample is "PortalGate System" of
https://github.com/IndieVisualLab/UnityGraphicsProgramming3
.
[*1] https://ja.wikipedia.org/wiki/Portal_(%E3%82%B2%E3%83%BC%E3%83%A0)
Think about the necessary elements as a place to play on the portal.
+I want a hit. Your character may be visible on the other side of the portal, so it's a first-person perspective, but you'll need a full-body model. This time, I used the Adam * 2 model distributed by Unity . Also, I want to see the objects warping other than my character, so I can launch a red ball with the E button.
+[*2] https://assetstore.unity.com/packages/essentials/tutorial-projects/adam-character-pack-adam-guard-lu-74842
The operation method is as follows.
+From now on, the hole to warp is called a gate. The class name is PortalGate in the source code .
+ +My character was created by modifying Unity's Standard Assets * 3 . It's like using the animation of ThirdPersonChracter while modifying the control of FirstPersonCharacter. Field of view because if (the main camera) to the user's character itself will be reflected polygon is not clean Dari sinks Player in the main camera provided a layer Player we try to not displayed to set the layer to the camera of CullingMask.
+[*3] https://assetstore.unity.com/packages/essentials/asset-packs/standard-assets-32351
For the field, I used unity3d-jp 's level design asset playGROWnd * 4 . Somehow the atmosphere is like Portal. This time, the field is a rectangular parallelepiped room to simplify the rest of the process. Collisions are not used as they are, but transparent collisions are placed on each side of the rectangular parallelepiped. The floor collision is wider than the room to prevent it from falling, as the post-warp object may be partially outside the room. This is the Stage Coll layer.
+[*4] https://github.com/unity3d-jp/playgrownd
Now let's implement the gate. The gate this time is oriented to spread on the XY plane in the local coordinate system and pass through the Z + direction.
+
++Figure 7.1: Gate coordinate system +
+The outbreak follows the original portal, so that when you click the mouse, the gate will appear on the plane of the viewpoint, and left-click and right-click will connect each other as a pair. If there is already a gate, the old gate will disappear on the spot and a new gate will open. Internally, the old gate has been moved to a new location and is the earliest.
+PortalGun.cs
+void Shot(int idx)
+{
+ RaycastHit hit;
+ if (Physics.Raycast(transform.position,
+ transform.forward,
+ out hit,
+ float.MaxValue,
+ LayerMask.GetMask(new[] { "StageColl" })))
+ {
+ var gate = gatePair [idx];
+ if (gate == null)
+ {
+ var go = Instantiate(gatePrefab);
+ gate = gatePair[idx] = go.GetComponent<PortalGate>();
+
+ var pair = gatePair[(idx + 1) % 2];
+ if (pair != null)
+ {
+ gate.SetPair(pair);
+ pair.SetPair(gate);
+ }
+ }
+
+ gate.hitColl = hit.collider;
+
+ var trans = gate.transform;
+ var normal = hit.normal;
+ var up = normal.y >= 0f ? transform.up : transform.forward;
+
+ trans.position = hit.point + normal * gatePosOffset;
+ trans.rotation = Quaternion.LookRotation(-normal, up);
+
+ gate.Open();
+ }
+}
+
+By specifying only the StageColl layer transform.forward, the ray is skipped in the direction and the hit is confirmed. If there is a hit, the gate operation is processed. First, check if there is an existing gate, and if not, generate it. Pairing is also done here. PortalGate.hitCollSet the collider that Ray collided with for later use , and ask for the position and orientation. The position is slightly lifted in the normal direction from the plane where it collided, and Z-fighting measures are taken. Did you notice that the way to find the orientation is a little strange? The specification of the up vector of Quaternion.LookRotation () is changed by the positive or negative of normal.y. Normally, transform.up is fine, but when the gate is put out on the ceiling, the front and back (Y direction of PortalGate) will be reversed and it will feel strange, so I did it like this. I think the original Portal also behaved like this.
++Figure 7.2: Without up-vector processing +
+
++Figure 7.3: Up-vector processing +
+When the gate opens, you can see the other side of another paired gate (hereinafter referred to as pair gate), so you need to implement this drawing somehow. I took the approach of "preparing another camera (Virtual Camera), capturing it on the Render Texture, pasting it on the Portal Gate, and drawing with the main camera " to draw the "other side" .
+Virtual Camera is a camera that captures pictures on the other side of the gate.
+
++図7.4: VirtualCamera +
+PortalGate.OnWillRenderObject()Is called for each camera, so if Virtual Camera is required at that timing, it will be generated.
PortalGate.cs
+private void OnWillRenderObject ()
+{
+~ Omitted ~
+ VirtualCamera pairVC;
+ if (!pairVCTable.TryGetValue(cam, out pairVC))
+ {
+ if ((vc == null) || vc.generation < maxGeneration)
+ {
+ pairVC = pairVCTable[cam] = CreateVirtualCamera(cam, vc);
+ return;
+ }
+ }
+
+~ Omitted ~
+}
+
+When the gates face each other, the gate is reflected in the scenery on the other side, and the gate is also on the other side of the gate.
+
++Figure 7.5: Facing gates +
+in this case,
+If you implement it honestly, you will need an infinite number of Virtual Cameras. This is not the case, so PortalGate.maxGenerationI will limit the number of generations, and although it is not an accurate picture, I will substitute it by pasting the texture one frame before to the gate.
PortalGate.cs
+VirtualCamera CreateVirtualCamera(Camera parentCam, VirtualCamera parentVC)
+{
+ var rootCam = parentVC?.rootCamera ?? parentCam;
+ var generation = parentVC?.generation + 1 ?? 1;
+
+ var go = Instantiate(virtualCameraPrefab);
+ go.name = rootCam.name + "_virtual" + generation;
+ go.transform.SetParent(transform);
+
+ var vc = go.GetComponent<VirtualCamera>();
+ vc.rootCamera = rootCam;
+ vc.parentCamera = parentCam;
+ vc.parentGate = this;
+ vc.generation = generation;
+
+ vc.Init ();
+
+ return you;
+}
+
+VirtualCamera.rootCameraIs the main camera that dates back to the generation of Virtual Camera. In addition, the parent camera, target gate, generation, etc. are set.
VirtualCamera.cs
+public void Init()
+{
+ camera_.aspect = rootCamera.aspect;
+ camera_.fieldOfView = rootCamera.fieldOfView;
+ camera_.nearClipPlane = rootCamera.nearClipPlane;
+ camera_.farClipPlane = rootCamera.farClipPlane;
+ camera_.cullingMask |= LayerMask.GetMask(new[] { PlayerLayerName });
+ camera_.depth = parentCamera.depth - 1;
+
+ camera_.targetTexture = tex0;
+ currentTex0 = true;
+}
+
+VirtualCamera.Init()The parameters are inherited from the parent camera. Since my character is reflected in Virtual Camera, the Player layer is deleted from Culling Mask . Also, because you want to capture a picture earlier than the parent of the camera parentCamera.depth - 1has been.
Camera.CopyFrom()I used it at the beginning , but it seems that CommandBuffer is also copied, and an error occurred when using it together with PostProcessingStack * 5 used for post effect, so I copied it for each property.
[*5] https://github.com/Unity-Technologies/PostProcessing
VirtualCamera PortalGate.maxGenerationcan do more as the processing is lighter, so I pay a little attention to performance so as not to waste processing.
VirtualCamera.cs
+private void LateUpdate()
+{
+ // PreviewCamera etc. seems to be null at this timing, so check
+ if (parentCamera == null)
+ {
+ Destroy(gameObject);
+ return;
+ }
+
+ camera_.enabled = parentGate.IsVisible(parentCamera);
+ if (camera_.enabled)
+ {
+ var parentCamTrans = parentCamera.transform;
+ var parentGateTrans = parentGate.transform;
+
+ parentGate.UpdateTransformOnPair(
+ transform,
+ parentCamTrans.position,
+ parentCamTrans.rotation
+ );
+
+
+ UpdateCamera();
+ }
+}
+
+I will follow this code in detail.
+ +If the parent camera does not show the gate, you do not need to prepare the picture on the other side, so disable the camera of Virtual Camera.
+PortalGate.cs
+public bool IsVisible(Camera camera)
+{
+ var ret = false;
+
+ var pos = transform.position;
+ var camPos = camera.transform.position;
+
+ var camToGateDir = (pos - camPos).normalized;
+ var dot = Vector3.Dot(camToGateDir, transform.forward);
+ if (dot > 0f)
+ {
+ var planes = GeometryUtility.CalculateFrustumPlanes(camera);
+ ret = GeometryUtility.TestPlanesAABB(planes, coll.bounds);
+ }
+
+ return ret;
+}
+
+The visibility judgment is as follows.
+parentGate.UpdateTransformOnPair() In, "From the position and orientation of the parent camera with respect to the parent gate, find the position and orientation of the parent pair with respect to the gate and update the transform".
PortalGate.cs
+public void UpdateTransformOnPair(
+ Transform trans,
+ Vector3 worldPos,
+ Quaternion worldRot
+ )
+{
+ var localPos = transform.InverseTransformPoint(worldPos);
+ var localRot = Quaternion.Inverse(transform.rotation) * worldRot;
+
+ var pairGateTrans = pair.transform;
+ var gateRot = pair.gateRot;
+ var pos = pairGateTrans.TransformPoint(gateRot * localPos);
+ var rot = pairGateTrans.rotation * gateRot * localRot;
+
+ trans.SetPositionAndRotation(pos, rot);
+}
+
+The implementation looks like this,
+It is the procedure. gateRot
+public Quaternion gateRot { get; } = Quaternion.Euler(0f, 180f, 0f);
+
+And, I rotate it 180 degrees on the Y axis, but since the Z value should be inverted
+public Quaternion gateRot { get; } = Quaterion.Euler(180f, 0f, 0f);
+
+Even an implementation like this should not break down. However, since the upward direction is reversed between the front and the back of the gate, when you pass through the gate, your character's head will be on the ground side, which makes you feel uncomfortable, so Y-axis rotation seems to be good.
+ +VirtualCamera.cs
+void UpdateCamera()
+{
+ var pair = parentGate.pair;
+ var pairTrans = pair.transform;
+ var mesh = pair.GetComponent<MeshFilter>().sharedMesh;
+ var vtxList = mesh.vertices
+ .Select(vtx => pairTrans.TransformPoint(vtx)).ToList();
+
+ TargetCameraUtility.Update(camera_, vtxList);
+
+ // Oblique
+ // Draw only the back of pairGate = match nearClipPlane with pairGate
+ var pairGateTrans = parentGate.pair.transform;
+ var clipPlane = CalcPlane(camera_,
+ pairGateTrans.position,
+ -pairGateTrans.forward);
+
+ camera_.projectionMatrix = camera_.CalculateObliqueMatrix(clipPlane);
+}
+
+Vector4 CalcPlane(Camera cam, Vector3 pos, Vector3 normal)
+{
+ var viewMat = cam.worldToCameraMatrix;
+
+ var normalOnView = viewMat.MultiplyVector(normal).normalized;
+ var posOnView = viewMat.MultiplyPoint (pos);
+
+ return new Vector4(
+ normalOnView.x,
+ normalOnView.y,
+ normalOnView.z,
+ -Vector3.Dot(normalOnView, posOnView)
+ );
+}
+
+Virtual Camera wants to be as light as possible, so make the view frustum as narrow as possible. Since it is only necessary to draw the range of the pair gate seen through VirtualCamera, the vertices of the pair gate mesh are set to world coordinates, and the TargetCameraUtility.Update()view frustum is Camera.rectchanged so that the vertices fit in .
Also, since the object between the Virtual Camera and the pair gate is not drawn, make the near clip surface of the camera the same plane as the pair gate. Camera.CalculateObliqueMatrix()You can do this with . Since there is not much documentation, it will be judged from the sample code etc., but it seems that the near clip plane is passed by Vector4 with the normal to xyz and the distance to w in the view coordinate system.
What is drawn is different according to the state, but it is done with a single shader.
+PortalGate.maxGenerationIf you reach and there is no Virtual Camera, paste the picture one frame before to PortalGate.
++Figure 7.6: The background is moody when there is no pair gate +
+PortalGate.shader
+GrabPass
+{
+ "_BackgroundTexture"
+}
+
+First , capture the background with GrabPass * 6 .
+[*6] https://docs.unity3d.com/ja/current/Manual/SL-GrabPass.html
PortalGate.shader
+v2f vert(appdata_img In)
+{
+ v2f o;
+
+ float3 posWorld = mul(unity_ObjectToWorld, float4(In.vertex.xyz, 1)).xyz;
+ float4 clipPos = mul(UNITY_MATRIX_VP, float4(posWorld, 1));
+ float4 clipPosOnMain = mul(_MainCameraViewProj, float4(posWorld, 1));
+
+ o.pos = clipPos;
+ o.uv = In.texcoord;
+ o.sposOnMain = ComputeScreenPos(clipPosOnMain);
+ o.grabPos = ComputeGrabScreenPos (o.pos);
+ return o;
+}
+
+The vertex shader looks like this. We are looking for two positions in the screen coordinate system, one for the current camera and one for clipPosthe main camera clipPosOnMain. The former is used for normal rendering, and the latter is used for referencing RenderTexture captured by Virtual Camera. Also, when using GrabPass, there is a dedicated position calculation function, so use this.
PortalGate.shader
+float2 uv = In.uv.xy; +uv = (uv - 0.5) * 2; // map 0~1 to -1~1 +float insideRate = (1 - length(uv)) * _OpenRate; ++
insideRate(Inside ratio of the circle) is calculated. The center of the circle is 1, the circumference is 0, and the outside is negative. _OpenRateYou can change the opening degree of the circle with. It is controlled by PortalGate.Open () .
PortalGate.shader
+// background +float4 grabUV = In.grabPos; +float2 grabOffset = float2( + snoise(float3(uv, _Time.y )), + snoise(float3(uv, _Time.y + 10)) +); +grabUV.xy += grabOffset * 0.3 * insideRate; +float4 bgColor = tex2Dproj(_BackgroundTexture, grabUV); ++
It is generating a moody background. snoiseIs a function defined in the included Noise.cginc and is SimplexNoise. The grab UV is rocking with the uv value and time. By multiplying the insideRate, the fluctuation becomes larger toward the center.
PortalGate.shader
+// portal other side +float2 sUV = In.sposOnMain.xy / In.sposOnMain.w; +float4 sideColor = tex2D(_MainTex, sUV); ++
It is a picture of the other side of the gate. _MainTexContains the texture captured by the Virutual Camera and is referenced by the UV value of the main camera.
PortalGate.shader
+// color +float4 col = lerp(bgColor, sideColor, _ConnectRate); ++
bgColorsideColorI mix (walls and floors) and (beyond the gate). _ConnectRateTransitions from 0 to 1 when a pair gate is created and remains at 1 thereafter.
PortalGate.shader
+// frame +float frame = smoothstep(0, 0.1, insideRate); +float frameColorRate = 1 - abs(frame - 0.5) * 2; +float mixRate = saturate(grabOffset.x + grabOffset.y); +float3 frameColor = lerp(_FrameColor0, _FrameColor1, mixRate); +col.xyz = lerp(col.xyz, frameColor, frameColorRate); + +col.a = frame; ++
Finally, the frame is calculated. insideRateThe edges of are _FrameColor0,_FrameColor1displayed by mixing them appropriately.
The appearance is completed so far. Next, let's focus on the physical behavior.
+ +Changed to process around warp in PortalObj component . GameObjects with this will be able to warp.
+ +The plane on which the gate is installed cannot pass through, that is, there is a collision. This must be disabled when passing through the gate. Actually, the gate is equipped with a collider that pops out rather large in the front and back as a trigger. PortalObj uses this collider as a trigger to invalidate the collision with the plane.
+
++Figure 7.7: Gate Collider +
+PortalObj.cs
+private void OnTriggerStay(Collider other)
+{
+ var gate = other.GetComponent<PortalGate>();
+ if ((gate != null) && !touchingGates.Contains(gate) && (gate.pair != null))
+ {
+ touchingGates.Add(gate);
+ Physics.IgnoreCollision(gate.hitColl, collider_, true);
+ }
+}
+
+private void OnTriggerExit(Collider other)
+{
+ var gate = other.GetComponent<PortalGate>();
+ if (gate != null)
+ {
+ touchingGates.Remove(gate);
+ Physics.IgnoreCollision(gate.hitColl, collider_, false);
+ }
+}
+
+OnTriggerEnder()The OnTriggerStay()reason for this is that if there is only one gate and there is no pair, Enter will be performed and then a pair will be created. First tougingGates, register the gate that triggered it in . The above PortalGate.hitCollis finally coming out. Physics.IgnoreCollision()Set this and your collider to ignore the collision with.
OnTriggerExit()The collision is enabled again with. As many of you may have noticed, since PortalGate.hitCollis a collider on the entire plane, it can actually pass through even outside the frame of the Portal Gate. The condition "as long as you keep OnTriggerStay ()" is attached, so it is not very noticeable, but it seems that a little more complicated processing is required to collide in the form of a proper gate.
PortalObj.cs
+private void Update()
+{
+ var passedGate = touchingGates.FirstOrDefault(gate =>
+ {
+ var posOnGate = gate.transform.InverseTransformPoint(center.position);
+ return posOnGate.z > 0f;
+ });
+
+
+ if (passedGate != null)
+ {
+ PassGate(passedGate);
+ }
+
+ if ((rigidbody_ != null) && !rigidbody_.useGravity)
+ {
+ if ((Time.time - ignoreGravityStartTime) > ignoreGravityTime)
+ {
+ rigidbody_.useGravity = true;
+ }
+ }
+}
+
+centerIs a Transform used to determine if it has passed the gate. Basically, the GameObject with PortalObj component is fine, but I want to warp my character when the camera passes, not the center of the character, so I can set it manually. center.positionWe are checking z > 0fif there is a gate with (behind the gate) touchingGates. If such a gate is found PassGate()(warp processing).
Also, as will be described later, Portal Obj disables gravity immediately after passing through the gate. This is done to make the object behave with a little inertia after passing because if you open a gate that connects to another floor under the object that is falling on the ground, the object will vibrate back and forth between the gates. I have.
+PortalObj.cs
+void PassGate(PortalGate gate)
+{
+ gate.UpdateTransformOnPair(transform);
+
+ if (rigidbody_ != null)
+ {
+ rigidbody_.velocity = gate.UpdateDirOnPair(rigidbody_.velocity);
+ rigidbody_.useGravity = false;
+ ignoreGravityStartTime = Time.time;
+ }
+
+ if (fpController != null)
+ {
+ fpController.m_MoveDir = gate.UpdateDirOnPair(fpController.m_MoveDir);
+ fpController.InitMouseLook();
+ }
+}
+
+The warp process looks like this. I also used it to find the position of the Virtual PortalGate.UpdateTransformOnPair()Camera and warp the Transform. RigidBodyIf you have, change the direction of speed as well. fpControllerThe same applies to (script for own character operation). As this area becomes larger, there will be objects that need more support, so it may be better to prepare each script callback and notify it.
There was a point that I had to implement a warp this time and pack some more.
+ +I wanted to somehow nullify the collision after the physics engine made a collision detection and before extrusion, but I couldn't find a good way. OnTriggerEnter(), OnCollisionEnter()The inner Physics.IgnoreCollision()seems to be referred to are disabled from after a collision once. I think On~Enter()it Physics.IgnoreCollision()'s probably a little late to reflect what is called after extrusion . For this reason, the range of the trigger is made to protrude considerably so that the frame that enters the trigger and the frame that collides with the wall are different. However, this method has its limitations and is not compatible with Portal Obj, which moves at a higher speed. If anyone says "There is such a way!", Please contact me!
I implemented the warp by "rewriting the position of the object" , but strictly speaking, there should be a state where it is half in front and half behind while passing through the gate. If you want to put out a large object, it will be noticeable, so you need to think about this as well. In addition, it needs to be affected by collisions both in front and behind, and more strictly, I feel that we have to intervene in the solver in the physics engine. It seems to be strict with Unity, so I feel that it is realistic to cheat well.
+ +I tried to reproduce Portal that I wanted to try from before with Unity. I tried it comfortably for the first time by stacking the cameras, but I found that it was more difficult than I expected. Among CG and game technologies, those that are closer to the real world are in high demand and are becoming more and more standardized. When it becomes easier to create a sense of reality, Anywhere Door-like "ideas that used to be common but unrealistic and slept" may come to life as a new experience.
+ +
|
![]() |
|
++Figure 8.1: Sphere deforms +
+When expressing the softness of an object, we sometimes imitate a spring or calculate a simulation of a fluid or soft body, but here we do not make such an exaggerated calculation, but express the soft deformation of the object. to watch. As shown in the figure, it is a transformation like a hand-drawn animation.
+The sample in this chapter is "Over Reaction" from
https://github.com/IndieVisualLab/UnityGraphicsProgramming3
.
In the "Over Reaction" scene, you can see the basic transformation. Watch the object move and transform from the manipulator or Inspector.
+In the "Physics Scene" scene, you can apply force to the object with the up, down, left, and right keys. If you place some objects on the scene, you can see how they transform depending on the situation.
+ +There are many possible transformation rules, but there are probably only three basic rules.
+Here, we will especially consider when the object moves, so first we will detect the direction and magnitude of the movement. Although different from the term used in the laws of physics, this parameter moveEnergyis called "kinetic energy" for convenience . Since kinetic energy is a parameter expressed by direction and magnitude, it can be expressed by a vector.
* Kinetic energy is the correct physics term. Here, it is named "move energy" because it is an energy that considers only movement.
+This is not the case as it is treated as a matter of course in game programming, but the movement of an object simply detects a change in coordinates. The only thing I want to note is that Updateit uses instead FixedUpdate.
OverReaction.cs
+protected void FixedUpdate()
+{
+ this.crntMove = this.transform.position - this.prevPosition;
+
+ UpdateMoveEnergy();
+ UpdateDeformEnergy();
+ DeformMesh();
+
+ this.prevPosition = this.transform.position;
+ this.prevMove = this.crntMove;
+}
+
+FixedUpdateIs a method that is called at regular intervals, so it's Updateclearly different in nature from being called twice or three times per second . I will omit the details of these differences because it is out of the main subject, but FixedUpdateI adopted it here because I want to support the movement of objects using PhysX (physical behavior) of Unity . There Updateis no particular need to deform the mesh as often as.
Now that the movement of the object can be calculated from the change in coordinates, let's calculate the kinetic energy. The calculation of kinetic energy is UpdateMoveEnergyimplemented in the method.
OverReaction.cs
+protected void UpdateMoveEnergy()
+{
+ this.moveEnergy = new Vector3()
+ {
+ x = UpdateMoveEnergy
+ (this.crntMove.x, this.prevMove.x, this.moveEnergy.x),
+
+ y = UpdateMoveEnergy
+ (this.crntMove.y, this.prevMove.y, this.moveEnergy.y),
+
+ z = UpdateMoveEnergy
+ (this.crntMove.z, this.prevMove.z, this.moveEnergy.z),
+ };
+}
+
+Kinetic energy is calculated by decomposing into each component in the X, Y, and Z directions. The following UpdateMoveEnergyprocesses will be explained step by step.
First, consider the case where there is no current movement. When there is no movement, the existing kinetic energy decays.
+OverReaction.cs
+protected float UpdateMoveEnergy
+(float crntMove, float prevMove, float moveEnergy)
+{
+ int crntMoveSign = Sign(crntMove);
+ int prevMoveSign = Sign(prevMove);
+ int moveEnergySign = Sign(moveEnergy);
+
+ if (crntMoveSign == 0)
+ {
+ return moveEnergy * this.undeformPower;
+ }
+…
+}
+
+public static int Sign(float value)
+{
+ return value == 0 ? 0 : (value > 0 ? 1 : -1);
+}
+
+When the current movement and the previous movement are reversed, the kinetic energy is reversed.
+OverReaction.cs
+if (crntMoveSign != prevMoveSign)
+{
+ return moveEnergy - crntMove;
+}
+
+When the current movement and the kinetic energy are reversed, reduce the kinetic energy.
+OverReaction.cs
+if (crntMoveSign != moveEnergySign)
+{
+ return moveEnergy + crntMove;
+}
+
+In cases other than the above, when the current movement and the kinetic energy are in the same direction, the current movement and the existing kinetic energy are compared and the larger one is adopted.
+However, the kinetic energy decays and becomes smaller. In addition, the new kinetic energy generated by the current movement is increased by multiplying it by any parameter so that it can easily produce deformation.
+OverReaction.cs
+if (crntMoveSign < 0)
+{
+ return Mathf.Min(crntMove * this.deformPower,
+ moveEnergy * this.undeformPower);
+}
+else
+{
+ return Mathf.Max(crntMove * this.deformPower,
+ moveEnergy * this.undeformPower);
+}
+
+With this, the kinetic energy to be used for deformation could be calculated.
+ +It then converts the calculated kinetic energy into parameters that determine the deformation. For convenience, this parameter deformEnergyis called "deformation energy" . The deformation energy is UpdateDeformEnergyupdated by the method.
The magnitude of deformation energy can be defined as the magnitude of kinetic energy as it is, but if there is a discrepancy between the direction of deformation energy and the direction in which the object is moving, the deformation energy is completely in the object. I can't tell. It is also possible that the direction of deformation energy and the direction in which the object is moving are reversed.
+Therefore, the amount of deformation energy transmitted is calculated from the inner product of the deformation energy and the current movement. If the directions are exactly the same, the inner product of the unit vectors will be 1, and will gradually approach 0 depending on the magnitude of the deviation. When it is further inverted, it becomes a negative value.
+OverReaction.cs
+protected void UpdateDeformEnergy()
+{
+ float deformEnergyVertical
+ = this.moveEnergy.magnitude
+ * Vector3.Dot(this.moveEnergy.normalized,
+ this.crntMove.normalized);
+ …
+
+Now that we have calculated the force that deforms the object in the vertical direction, it deforms in the horizontal direction by the amount of change in the vertical direction. In other words, if the object stretches vertically, it will shrink horizontally. On the contrary, when it shrinks in the vertical direction, it should stretch in the horizontal direction.
+The amount of deformation in the vertical direction is calculated by "the magnitude of the deformation in the vertical direction / the maximum magnitude of the deformation". After that, it is calculated so that it deforms in the horizontal direction as much as it deforms in the vertical direction.
+Assuming that the deformation is +0.8 in the vertical direction, the deformation should be -0.8 in the horizontal direction, so the deformation energy in the horizontal direction is 1-0.8 = 0.2. Also, as the coefficient for actual deformation, * 0.8 is small, so add 1 to make it * 1.8.
+OverReaction.cs
+protected void UpdateDeformEnergy()
+{
+…
+ float deformEnergyHorizontalRatio
+ = deformEnergyVertical / this.maxDeformScale;
+
+ float deformEnergyHorizontal
+ = 1 - deformEnergyHorizontalRatio;
+…
+ deformEnergyVertical = 1 + deformEnergyVertical;
+}
+
+Finally, consider the case where the object collapses in the direction of travel. The case where the object collapses in the direction of travel is when the kinetic energy and the current movement are reversed, that is, when the previous "inner product of the kinetic energy and the current movement" is negative.
+When the value of the inner product is negative, the deformation energy in the vertical direction and the deformation energy in the horizontal direction are reversed.
+To help you understand the case, the following code is a continuation of the previous steps. When the dot product value is negative, deformEnergyHorizontalis a positive value greater than 1. Or deformEnergyVerticalis deformEnergyHorizontalof value reversed with, it will be one less than the positive value.
OverReaction.cs
+protected void UpdateDeformEnergy()
+{
+ float deformEnergyVertical
+ = this.moveEnergy.magnitude
+ * Vector3.Dot(this.moveEnergy.normalized,
+ this.crntMove.normalized);
+
+ float deformEnergyHorizontalRatio
+ = deformEnergyVertical / this.maxDeformScale;
+
+ float deformEnergyHorizontal
+ = 1 - deformEnergyHorizontalRatio;
+
+ if (deformEnergyVertical < 0)
+ {
+ deformEnergyVertical = deformEnergyHorizontalRatio;
+ }
+
+ deformEnergyVertical = 1 + deformEnergyVertical;
+…
+
+Finally, correct the value so that the deformation energy falls within the arbitrarily set range, and complete the calculation of the deformation energy.
+OverReaction.cs
+deformEnergyVertical = Mathf.Clamp(deformEnergyVertical, + this.minDeformScale, + this.maxDeformScale); + +deformEnergyHorizontal = Mathf.Clamp(deformEnergyHorizontal, + this.minDeformScale, + this.maxDeformScale); + +this.deformEnergy = new Vector3(deformEnergyHorizontal, + deformEnergyVertical, + deformEnergyHorizontal); ++
Here, the mesh is transformed with a script for explanation and generalization. The transformation of the mesh is DeformMeshimplemented in the method.
* Since it is a matrix operation, it is often better to process it on the GPU using a shader for practical purposes.
+The obtained deformation energy deformEnergyis moveEnergya vector representing expansion and contraction when facing the direction of kinetic energy . Therefore, when transforming, it is necessary to match the coordinates before transforming. First, suppress the parameters required for that purpose. The rotation matrix of the current object and its inverse matrix, the kinetic energy moveEnergyrotation matrix and its inverse matrix.
OverReaction.cs
+protected void DeformMesh()
+{
+Vector3[] deformedVertices = new Vector3[this.baseVertices.Length];
+
+Quaternion crntRotation = this.transform.localRotation;
+Quaternion crntRotationI = Quaternion.Inverse(crntRotation);
+
+Quaternion moveEnergyRotation
+= Quaternion.FromToRotation(Vector3.up, this.moveEnergy.normalized);
+Quaternion moveEnergyRotationI = Quaternion.Inverse(moveEnergyRotation);
+…
+
+deformEnergyaccording to the vertices .deformEnergyIf you give an appropriate deformation energy in an easy-to-understand manner and comment out the source code in sequence, the processing procedure will be easier to understand.
OverReaction.cs
+for (int i = 0; i < this.baseVertices.Length; i++)
+{
+ deformedVertices[i] = this.baseVertices[i];
+ deformedVertices[i] = crntRotation * deformedVertices[i];
+ deformedVertices[i] = moveEnergyRotationI * deformedVertices[i];
+ deformedVertices[i] = new Vector3(
+ deformedVertices[i].x * this.deformEnergy.x,
+ deformedVertices[i].y * this.deformEnergy.y,
+ deformedVertices[i].z * this.deformEnergy.z);
+ deformedVertices[i] = moveEnergyRotation * deformedVertices[i];
+ deformedVertices[i] = crntRotationI * deformedVertices[i];
+}
+
+this.baseMesh.vertices = deformedVertices;
+
+I was able to transform the object with a very simple implementation. Although it is so easy to implement, the impression it gives to the appearance changes greatly.
+Although the calculation cost is higher, it is possible to support the rotation and enlargement / reduction of the object, move the center of gravity of the deformation, and support the skin mesh animation as an advanced form.
\ No newline at end of file diff --git a/html-translated/vol3/Chapter 8 _ Easily express soft deformation_files/01.png b/html-translated/vol3/Chapter 8 _ Easily express soft deformation_files/01.png new file mode 100644 index 0000000..41c931e Binary files /dev/null and b/html-translated/vol3/Chapter 8 _ Easily express soft deformation_files/01.png differ diff --git a/html-translated/vol3/Chapter 8 _ Easily express soft deformation_files/cleardot.gif b/html-translated/vol3/Chapter 8 _ Easily express soft deformation_files/cleardot.gif new file mode 100644 index 0000000..1d11fa9 Binary files /dev/null and b/html-translated/vol3/Chapter 8 _ Easily express soft deformation_files/cleardot.gif differ diff --git a/html-translated/vol3/Chapter 8 _ Easily express soft deformation_files/element_main.js b/html-translated/vol3/Chapter 8 _ Easily express soft deformation_files/element_main.js new file mode 100644 index 0000000..4c5de3c --- /dev/null +++ b/html-translated/vol3/Chapter 8 _ Easily express soft deformation_files/element_main.js @@ -0,0 +1,486 @@ +(function(){/* + + Copyright The Closure Library Authors. + SPDX-License-Identifier: Apache-2.0 +*/ +var aa='" style="background-image:url(',ba="-disabled",ca="-document.getElementById('",da="/translate_a/t",ea="/translate_suggestion?client=",fa='
|
![]() |
|
A person who makes interactive art in Unity. Freelance. We look forward to your work => hi@sugi.cc
+ + +A student freelance engineer who likes physics. I am addicted to the TA business of VFX production, interactive engineering, and VR related business. Please feel free to contact DM on Twitter!
+Interaction engineer. In the field of video expression such as installation, signage, stage production, music video, concert video, VJ, etc., we are producing content that makes use of real-time and procedural characteristics. I have been active several times in a unit called Aqueduct with sugi-cho and mattatz.
+A programmer who creates installations, signage, the Web (front-end / back-end), smartphone apps, etc. I am interested in video expression and design tool development.
+ + +An interactive engineer who works in an atmosphere. I often post Gene videos on Twitter. I do VJ once in a while.
+Former technical artist of a game development company. I like art, design and music, so I turned to interactive art. My hobbies are samplers, synths, musical instruments, records, and equipment. I started Twitter.
+ + +Former game developer, programmer making interactive art. I like the design and development of moderately complicated mechanisms and libraries. Night Type.
+ + +It is inevitable to keep up with it, and I am living somehow while becoming tattered. Please also use "Unity Shader Programming" for getting started with shaders.
+ \ No newline at end of file diff --git a/html-translated/vol3/Contributors_files/cleardot.gif b/html-translated/vol3/Contributors_files/cleardot.gif new file mode 100644 index 0000000..1d11fa9 Binary files /dev/null and b/html-translated/vol3/Contributors_files/cleardot.gif differ diff --git a/html-translated/vol3/Contributors_files/element_main.js b/html-translated/vol3/Contributors_files/element_main.js new file mode 100644 index 0000000..4c5de3c --- /dev/null +++ b/html-translated/vol3/Contributors_files/element_main.js @@ -0,0 +1,486 @@ +(function(){/* + + Copyright The Closure Library Authors. + SPDX-License-Identifier: Apache-2.0 +*/ +var aa='" style="background-image:url(',ba="-disabled",ca="-document.getElementById('",da="/translate_a/t",ea="/translate_suggestion?client=",fa='
|
![]() |
|
This book is the third volume of the "Unity Graphics Programming" series, which explains the technology related to graphics programming by Unity. This series provides introductory content and applications for beginners, as well as tips for intermediate and above, on a variety of topics that the authors are interested in.
+The source code explained in each chapter is published in the github repository ( https://github.com/IndieVisualLab/UnityGraphicsProgramming3 ), so you can read this manual while executing it at hand.
+The difficulty level varies depending on the article, and depending on the amount of knowledge of the reader, some content may be unsatisfactory or too difficult. Depending on your knowledge, it's a good idea to read articles on the topic you are interested in. For those who usually do graphics programming at work, I hope it will lead to more effect drawers, and students are interested in visual coding, I have touched Processing and openFrameworks, but I still have 3DCG. For those who are feeling a high threshold, I would be happy if it would be an opportunity to introduce Unity and learn about the high expressiveness of 3DCG and the start of development.
+IndieVisualLab is a circle created by colleagues (& former colleagues) in the company. In-house, we use Unity to program the contents of exhibited works in the category generally called media art, and we are using Unity, which is a bit different from the game system. In this book, knowledge that is useful for using Unity in the exhibited works may be scattered.
+ +Some of the contents explained in this manual use Compute Shader, Geometry Shader, etc., and the execution environment in which DirectX 11 operates is recommended, but there are also chapters where the contents are completed by the program (C #) on the CPU side.
+I think that the behavior of the sample code released may not be correct due to the difference in environment, but please take measures such as reporting an issue to the github repository and replacing it as appropriate.
+ +If you have any impressions, concerns, or other requests regarding this book (such as wanting to read the explanation about 〇〇), please feel free to use the Web form ( https://docs.google.com/forms/d/e/1FAIpQLSdxeansJvQGTWfZTBN_2RTuCK_kRqhA6QHTZKVXHCijQnC8zw/ Please let us know via viewform ) or email (lab.indievisual@gmail.com).
\ No newline at end of file diff --git a/html-translated/vol3/Preface_files/cleardot.gif b/html-translated/vol3/Preface_files/cleardot.gif new file mode 100644 index 0000000..1d11fa9 Binary files /dev/null and b/html-translated/vol3/Preface_files/cleardot.gif differ diff --git a/html-translated/vol3/Preface_files/element_main.js b/html-translated/vol3/Preface_files/element_main.js new file mode 100644 index 0000000..4c5de3c --- /dev/null +++ b/html-translated/vol3/Preface_files/element_main.js @@ -0,0 +1,486 @@ +(function(){/* + + Copyright The Closure Library Authors. + SPDX-License-Identifier: Apache-2.0 +*/ +var aa='" style="background-image:url(',ba="-disabled",ca="-document.getElementById('",da="/translate_a/t",ea="/translate_suggestion?client=",fa='
|
![]() |
|
This chapter introduces the GPU implementation of the Space Colonization Algorithm, an algorithm that generates a shape that blanchs along a point cloud, and its application examples.
+The sample in this chapter is "Space Colonization" at
https://github.com/IndieVisualLab/UnityGraphicsProgramming4
.
++図1.1: SkinnedAnimation.scene +
+The Space Colonization Algorithm was developed by Adam et al. * 1 as a tree modeling method.
+[*1] http://algorithmicbotany.org/papers/colonization.egwnp2007.html
A method of generating a branching shape from a given point cloud,
+It has the feature.
+This chapter introduces the GPU implementation of this algorithm and application examples combined with skinning animation.
+ +First, I will explain the Space Colonization Algorithm. The general steps of the algorithm are divided as follows.
+In the initialization phase, the point cloud is prepared as an attraction (point that will be the seed of the branch). Place one or more Nodes (branch branch points) in the attraction. This first placed Node will be the starting point for your branch.
+In the figure below, Attraction is represented by a round dot and Node is represented by a square dot.
+
++Figure 1.2: Setup --Attraction and Node Initialization Round dots represent Attraction and square dots represent Node. +
+For each attraction, find the closest Node within the influence distance.
+
++Figure 1.3: Search-Search for the nearest Node in the area of influence from each Attraction +
+For each Node, determine the direction to extend the branch based on the attraction within the range of influence, and the point beyond the extension by the growth distance is the candidate point (Candidate) for the point to generate a new Node. )will do.
+
++Figure 1.4: Attract-Extend a branch from each Node and determine candidate points to generate new Nodes +
+Create a new Node at the Candidate position and connect the original Node with Edge to extend the branch.
+
++Figure 1.5: Connect-Connecting a new node to an existing node to extend a branch +
+Deletes an attraction that is within the kill distance from the node.
+
++Figure 1.6: Remove --Search Node for attractions within the removal range +
+
++Figure 1.7: Remove-Removes Attraction found within the removal range +
+Grow Node and go back to Step.2.
+The general flow of the entire algorithm is shown in the figure below.
+
++Figure 1.8: Rough flow of the algorithm +
+Now, I will explain the concrete implementation of the algorithm.
+ +As an element that increases or decreases in the Space Colonization Algorithm
+Is required, but in order to express these on GPGPU, Append / ConsumeStructuredBuffer is used for some elements.
+Append / ConsumeStructuredBuffer is explained in Unity Graphics Programming vol.3 "GPU-Based Cellular Growth Simulation".
+ +The structure of Attraction is defined as follows.
+Attraction.cs
+public struct Attraction {
+ public Vector3 position; // position
+ public int nearest; // index of the nearest Node
+ public uint found; // Whether a nearby Node was found
+ public uint active; // Whether it is a valid attraction (1 is valid, 0 is deleted)
+}
+
+The increase / decrease of attraction is expressed by determining whether it is a deleted attraction by the active flag.
+In Space Colonization, it is necessary to prepare a point cloud of Attraction in the initialization phase. In the sample SpaceColonization.cs, point clouds are randomly scattered inside the sphere and used as the position of Attraction.
+SpaceColonization.cs
+// Randomly sprinkle points inside the sphere to generate an attraction + var attractions = GenerateSphereAttractions(); + count = attractions.Length; + + // Initialize the Attraction buffer + attractionBuffer = new ComputeBuffer( + count, + Marshal.SizeOf(typeof(Attraction)), + ComputeBufferType.Default + ); + attractionBuffer.SetData(attractions); ++
The structure of Node is defined as follows.
+Node.cs
+public struct Node {
+ public Vector3 position; // position
+ public float t; // Growth rate (0.0 ~ 1.0)
+ public float offset; // Distance from Root (Node depth)
+ public float mass; // mass
+ public int from; // index of branch source Node
+ public uint active; // Whether it is a valid Node (1 is valid)
+}
+
+Node resources
+It is managed by two buffers.
+SpaceColonization.cs
+// Actual Node data + nodeBuffer = new ComputeBuffer( + count, + Marshal.SizeOf(typeof(Node)), + ComputeBufferType.Default + ); + + // Object pool + nodePoolBuffer = new ComputeBuffer( + count, + Marshal.SizeOf(typeof(int)), + ComputeBufferType.Append + ); + nodePoolBuffer.SetCounterValue(0); ++
The structure of Candidate is defined as follows.
+Candidate.cs
+public struct Candidate
+{
+ public Vector3 position; // position
+ public int node; // index of the original Node of the candidate point
+}
+
+Candidate is represented by Append / ConsumeStructuredBuffer.
+SpaceColonization.cs
+candidateBuffer = new ComputeBuffer( + count, + Marshal.SizeOf(typeof(Candidate)), + ComputeBufferType.Append + ); + candidateBuffer.SetCounterValue(0); ++
The structure of Edge is defined as follows.
+Edge.cs
+public struct Edge {
+ public int a, b; // index of two Nodes connected by Edge
+}
+
+Edge is represented by Append / Consume Structured Buffer like Candidate.
+SpaceColonization.cs
+edgeBuffer = new ComputeBuffer( + count * 2, + Marshal.SizeOf(typeof(Edge)), + ComputeBufferType.Append + ); + edgeBuffer.SetCounterValue(0); ++
Now that we have the necessary resources, we will implement each step of the algorithm in GPGPU with Compute Shader.
+ +In the initialization phase
+to hold.
+Pick up some from the prepared Attraction and generate an initial Node at that position.
+SpaceColonization.cs
+ var seeds = Enumerable.Range(0, seedCount).Select((_) => {
+ return attractions[Random.Range(0, count)].position;
+ }).ToArray();
+ Setup(seeds);
+
+SpaceColonization.cs
+protected void Setup(Vector3[] seeds)
+{
+ var kernel = compute.FindKernel("Setup");
+ compute.SetBuffer(kernel, "_NodesPoolAppend", nodePoolBuffer);
+ compute.SetBuffer(kernel, "_Nodes", nodeBuffer);
+ GPUHelper.Dispatch1D(compute, kernel, count);
+
+ ...
+}
+
+The Setup kernel initializes the object pool. Store the index in the Node's object pool and turn off the active flag for that Node.
+SpaceColonization.compute
+void Setup (uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+ uint count, stride;
+ _Nodes.GetDimensions(count, stride);
+ if (idx >= count)
+ return;
+
+ _NodesPoolAppend.Append(idx);
+
+ Node n = _Nodes[idx];
+ n.active = false;
+ _Nodes[idx] = n;
+}
+
+This will turn off the active flags for all Nodes and create an object pool with Node indexes.
+Now that the object pool has been initialized, it's time to create the initial seed node.
+The initial node is generated by executing the Seed kernel with the seed position (Vector3 []) prepared earlier as input.
+SpaceColonization.cs
+...
+
+// seedBuffer is automatically disposed when it goes out of scope
+using(
+ ComputeBuffer seedBuffer = new ComputeBuffer(
+ seeds.Length,
+ Marshal.SizeOf(typeof(Vector3))
+ )
+)
+{
+ seedBuffer.SetData(seeds);
+ kernel = compute.FindKernel("Seed");
+ compute.SetFloat("_MassMin", massMin);
+ compute.SetFloat("_MassMax", massMax);
+ compute.SetBuffer(kernel, "_Seeds", seedBuffer);
+ compute.SetBuffer(kernel, "_NodesPoolConsume", nodePoolBuffer);
+ compute.SetBuffer(kernel, "_Nodes", nodeBuffer);
+ GPUHelper.Dispatch1D(compute, kernel, seedBuffer.count);
+}
+
+// Initialize the number of Nodes and Edges
+nodesCount = nodePoolBuffer.count;
+edgesCount = 0;
+
+...
+
+The Seed kernel takes a position from the Seeds buffer and creates a Node at that position.
+SpaceColonization.compute
+void Seed (uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+
+ uint count, stride;
+ _Seeds.GetDimensions(count, stride);
+ if (idx >= count)
+ return;
+
+ Node n;
+
+ // Create a new Node (see below)
+ uint i = CreateNode(n);
+
+ // Set Seed position to Node position
+ n.position = _Seeds[idx];
+ n.t = 1;
+ n.offset = 0;
+ n.from = -1;
+ n.mass = lerp(_MassMin, _MassMax, nrand(id.xy));
+ _Nodes[i] = n;
+}
+
+Create a new Node with the CreateNode function. Extracts the index from the object pool ConsumeStructuredBuffer and returns the initialized Node.
+SpaceColonization.compute
+uint CreateNode(out Node node)
+{
+ uint i = _NodesPoolConsume.Consume();
+ node.position = float3(0, 0, 0);
+ node.t = 0;
+ node.offset = 0;
+ node.from = -1;
+ node.mass = 0;
+ node.active = true;
+ return i;
+}
+
+This is the end of the initialization phase.
+Each step of the looping algorithm shown in Figure 1.8 is performed within the Step function.
+SpaceColonization.cs
+protected void Step(float dt)
+{
+ // Do not run when the object pool is empty
+ if (nodesCount > 0)
+ {
+ Search(); // Step.2
+ Attract(); // Step.3
+ Connect(); // Step.4
+ Remove(); // Step.5
+
+ // Get the number of data that Append / ConsumeStructuredBuffer has
+ CopyNodesCount();
+ CopyEdgesCount();
+ }
+ Grow(dt); // Step.6
+}
+
+
+From each attraction, find the closest Node within the influence distance.
+SpaceColonization.cs
+protected void Search()
+{
+ var kernel = compute.FindKernel("Search");
+ compute.SetBuffer(kernel, "_Attractions", attractionBuffer);
+ compute.SetBuffer(kernel, "_Nodes", nodeBuffer);
+ compute.SetFloat("_InfluenceDistance", unitDistance * influenceDistance);
+ GPUHelper.Dispatch1D(compute, kernel, count);
+}
+
+The GPU kernel implementation is as follows.
+SpaceColonization.compute
+void Search (uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+ uint count, stride;
+ _Attractions.GetDimensions(count, stride);
+ if (idx >= count)
+ return;
+
+ Attraction attr = _Attractions[idx];
+
+ attr.found = false;
+ if (attr.active)
+ {
+ _Nodes.GetDimensions(count, stride);
+
+ // Search for Nodes closer than influence distance
+ float min_dist = _InfluenceDistance;
+
+ // index of the nearest Node
+ uint nearest = -1;
+
+ // Execute a loop for all Nodes
+ for (uint i = 0; i < count; i++)
+ {
+ Node n = _Nodes[i];
+
+ if (n.active)
+ {
+ float3 dir = attr.position - n.position;
+ float d = length(dir);
+ if (d < min_dist)
+ {
+ // Update the nearest Node
+ min_dist = d;
+ nearest = i;
+
+ // Set the index of the neighboring Node
+ attr.found = true;
+ attr.nearest = nearest;
+ }
+ }
+ }
+
+ _Attractions[idx] = attr;
+ }
+}
+
+For each Node, determine the direction to extend the branch based on the attraction within the range of influence, and the point beyond the extension by the growth distance is the candidate point (Candidate) for the point to generate a new Node. )will do.
+SpaceColonization.cs
+protected void Attract()
+{
+ var kernel = compute.FindKernel("Attract");
+ compute.SetBuffer(kernel, "_Attractions", attractionBuffer);
+ compute.SetBuffer(kernel, "_Nodes", nodeBuffer);
+
+ candidateBuffer.SetCounterValue (0); // Initialize the buffer that stores the candidate points
+ compute.SetBuffer(kernel, "_CandidatesAppend", candidateBuffer);
+
+ compute.SetFloat("_GrowthDistance", unitDistance * growthDistance);
+
+ GPUHelper.Dispatch1D(compute, kernel, count);
+}
+
+The GPU kernel implementation is as follows. Please refer to the code contents and comments of the Attract kernel for the calculation method of the position of the candidate point.
+SpaceColonization.compute
+void Attract (uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+ uint count, stride;
+ _Nodes.GetDimensions(count, stride);
+ if (idx >= count)
+ return;
+
+ Node n = _Nodes[idx];
+
+ // Node is valid and
+ // Create a new Node if the growth rate (t) is greater than or equal to the threshold (1.0)
+ if (n.active && n.t >= 1.0)
+ {
+ // Accumulation variable to extend the branch
+ float3 dir = (0.0).xxx;
+ uint counter = 0;
+
+ // Run a loop for all attractions
+ _Attractions.GetDimensions(count, stride);
+ for (uint i = 0; i < count; i++)
+ {
+ Attraction attr = _Attractions[i];
+ // Search for the attraction whose node is the nearest neighbor
+ if (attr.active && attr.found && attr.nearest == idx)
+ {
+ // Normalize the vector from Node to Attraction and add it to the accumulation variable
+ float3 dir2 = (attr.position - n.position);
+ dir + = normalize (dir2);
+ counter++;
+ }
+ }
+
+ if (counter > 0)
+ {
+ Candidate c;
+
+ // Take the average of the unit vectors from Node to Attraction
+ // Set it as the position of the candidate point extended from the Node by the growth distance
+ dir = dir / counter;
+ c.position = n.position + (dir * _GrowthDistance);
+
+ // Set the index of the original Node that extends to the candidate point
+ c.node = idx;
+
+ // Add to candidate point buffer
+ _CandidatesAppend.Append(c);
+ }
+ }
+}
+
+Create a new Node based on the candidate point buffer generated by the Attract kernel, and extend the branch by connecting the Nodes with Edge.
+In the Connect function, the number of kernel executions is determined by comparing the remaining number of object pools (nodesCount) with the size of the candidate point buffer so that data retrieval (Consume) is not executed when the Node object pool (nodePoolBuffer) is empty. I am.
+SpaceColonization.cs
+protected void Connect()
+{
+ var kernel = compute.FindKernel("Connect");
+ compute.SetFloat("_MassMin", massMin);
+ compute.SetFloat("_MassMax", massMax);
+ compute.SetBuffer(kernel, "_Nodes", nodeBuffer);
+ compute.SetBuffer(kernel, "_NodesPoolConsume", nodePoolBuffer);
+ compute.SetBuffer(kernel, "_EdgesAppend", edgeBuffer);
+ compute.SetBuffer(kernel, "_CandidatesConsume", candidateBuffer);
+
+ // The number of data (nodeCount) of the Node object pool acquired by CopyNodeCount
+ // Restrict so that it does not exceed
+ var connectCount = Mathf.Min(nodesCount, CopyCount(candidateBuffer));
+ if (connectCount > 0)
+ {
+ compute.SetInt("_ConnectCount", connectCount);
+ GPUHelper.Dispatch1D(compute, kernel, connectCount);
+ }
+}
+
+Below is the implementation of the GPU kernel.
+SpaceColonization.compute
+void Connect (uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+ if (idx >= _ConnectCount)
+ return;
+
+ // Extract candidate points from the candidate point buffer
+ Candidate c = _CandidatesConsume.Consume();
+
+ Node n1 = _Nodes [c.node];
+ Node n2;
+
+ // Generate Node at the position of the candidate point
+ uint idx2 = CreateNode(n2);
+ n2.position = c.position;
+ n2.offset = n1.offset + 1.0; // Set the distance from Root (original Node + 1.0)
+ n2.from = c.node; // Set the index of the original Node
+ n2.mass = lerp(_MassMin, _MassMax, nrand(float2(c.node, idx2)));
+
+ // Update Node buffer
+ _Nodes[c.node] = n1;
+ _Nodes[idx2] = n2;
+
+ // Connect two Nodes with Edge (see below)
+ CreateEdge(c.node, idx2);
+}
+
+The CreateEdge function creates an Edge based on the indexes of the two Nodes passed and adds it to the Edge buffer.
+SpaceColonization.compute
+void CreateEdge(int a, int b)
+{
+ Edge e;
+ ea = a;
+ e.b = b;
+ _EdgesAppend.Append(e);
+}
+
+Remove the Attraction that is within the kill distance from the Node.
+SpaceColonization.cs
+protected void Remove()
+{
+ var kernel = compute.FindKernel("Remove");
+ compute.SetBuffer(kernel, "_Attractions", attractionBuffer);
+ compute.SetBuffer(kernel, "_Nodes", nodeBuffer);
+ compute.SetFloat("_KillDistance", unitDistance * killDistance);
+ GPUHelper.Dispatch1D(compute, kernel, count);
+}
+
+The GPU kernel implementation is as follows.
+SpaceColonization.compute
+void Remove(uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+ uint count, stride;
+ _Attractions.GetDimensions(count, stride);
+ if (idx >= count)
+ return;
+
+ Attraction attr = _Attractions[idx];
+ // Do not execute if the attraction has been deleted
+ if (!attr.active)
+ return;
+
+ // Execute a loop for all Nodes
+ _Nodes.GetDimensions(count, stride);
+ for (uint i = 0; i < count; i++)
+ {
+ Node n = _Nodes[i];
+ if (n.active)
+ {
+ // If there is a Node within the deletion range, turn off the active flag of Attraction and delete it
+ float d = distance(attr.position, n.position);
+ if (d < _KillDistance)
+ {
+ attr.active = false;
+ _Attractions[idx] = attr;
+ return;
+ }
+ }
+ }
+}
+
+Grow Node.
+When generating candidate points with the Attract kernel, it is used as a condition whether the growth rate (t) of Node is above the threshold value (if it is below the threshold value, no candidate points are generated), but the growth rate parameter is in this Grow kernel. I'm incrementing.
+SpaceColonization.cs
+protected void Grow(float dt)
+{
+ var kernel = compute.FindKernel("Grow");
+ compute.SetBuffer(kernel, "_Nodes", nodeBuffer);
+
+ var delta = dt * growthSpeed;
+ compute.SetFloat("_DT", delta);
+
+ GPUHelper.Dispatch1D(compute, kernel, count);
+}
+
+SpaceColonization.compute
+void Grow (uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+ uint count, stride;
+ _Nodes.GetDimensions(count, stride);
+ if (idx >= count)
+ return;
+
+ Node n = _Nodes[idx];
+
+ if (n.active)
+ {
+ // Disperse the growth rate with the mass parameter randomly set for each Node
+ n.t = saturate(n.t + _DT * n.mass);
+ _Nodes[idx] = n;
+ }
+}
+
+Now that we have a blanched shape with the above implementation, let's talk about how to render that shape.
+ +First, simply render using Line Mesh.
+Generate a simple Line Topology Mesh to draw a Line that represents a single Edge.
+SpaceColonization.cs
+protected Mesh BuildSegment()
+{
+ var mesh = new Mesh ();
+ mesh.hideFlags = HideFlags.DontSave;
+ mesh.vertices = new Vector3[2] { Vector3.zero, Vector3.up };
+ mesh.uv = new Vector2[2] { new Vector2(0f, 0f), new Vector2(0f, 1f) };
+ mesh.SetIndices(new int[2] { 0, 1 }, MeshTopology.Lines, 0);
+ return mesh;
+}
+
+
++Figure 1.9: Line Topology Mesh with only two simple vertices +
+Display the branches generated by rendering a Segment (line segment) with only two vertices using GPU instancing for the number of Edges.
+SpaceColonization.cs
+// Generate a buffer that determines the number of meshes to render, required for GPU instancing
+protected void SetupDrawArgumentsBuffers(int count)
+{
+ if (drawArgs[1] == (uint)count) return;
+
+ drawArgs[0] = segment.GetIndexCount(0);
+ drawArgs[1] = (uint)count;
+
+ if (drawBuffer != null) drawBuffer.Dispose();
+ drawBuffer = new ComputeBuffer(
+ 1,
+ sizeof(uint) * drawArgs.Length,
+ ComputeBufferType.IndirectArguments
+ );
+ drawBuffer.SetData(drawArgs);
+}
+
+...
+
+// Perform rendering with GPU instancing
+protected void Render(float extents = 100f)
+{
+ block.SetBuffer("_Nodes", nodeBuffer);
+ block.SetBuffer("_Edges", edgeBuffer);
+ block.SetInt("_EdgesCount", edgesCount);
+ block.SetMatrix("_World2Local", transform.worldToLocalMatrix);
+ block.SetMatrix("_Local2World", transform.localToWorldMatrix);
+ Graphics.DrawMeshInstancedIndirect(
+ segment, 0,
+ material, new Bounds(Vector3.zero, Vector3.one * extents),
+ drawBuffer, 0, block
+ );
+}
+
+The shader for rendering (Edge.shader) generates an animation of the branch extending from the branch point by controlling the length of the Edge according to the growth rate parameter (t) of the Node.
+Edge.shader
+v2f vert(appdata IN, uint iid : SV_InstanceID)
+{
+ v2f OUT;
+ UNITY_SETUP_INSTANCE_ID(IN);
+ UNITY_TRANSFER_INSTANCE_ID(IN, OUT);
+
+ // Get the corresponding Edge from the instance ID
+ Edge e = _Edges[iid];
+
+ // Get 2 Nodes from the index of Edge
+ Node a = _Nodes[e.a];
+ Node b = _Nodes [eb];
+
+ float3 ap = a.position;
+ float3 bp = b.position;
+ float3 dir = bp - ap;
+
+ // Determine the length of Edge from a to b according to the growth rate (t) of Node b
+ bp = ap + normalize (dir) * length (dir) * bt;
+
+ // Since the vertex ID (IN.vid) is 0 or 1, if it is 0, it refers to the node of a, and if it is 1, it refers to the position of Node of b.
+ float3 position = lerp(ap, bp, IN.vid);
+
+ float4 vertex = float4(position, 1);
+ OUT.position = UnityObjectToClipPos(vertex);
+ OUT.uv = IN.uv;
+
+ // If Node is inactive or the instance ID is outside the total number of Edges, set alpha to 0 and do not draw
+ OUT.alpha = (a.active && b.active) && (iid < _EdgesCount);
+
+ return OUT;
+}
+
+With these implementations, the shape obtained by the Space Colonization Algorithm can be rendered using Line Topology. You can get the following picture by executing Line.scene.
+
++Figure 1.10: Line.scene --Example of rendering with Edge.shader +
+By converting the Line Topology Segment to a Capsule shape with the Geometry Shader, you can draw thick lines.
+
++Figure 1.11: Convert Line Topology Segment to Capsule shape with Geometry Shader +
+The vertex shader is almost the same as Edge.shader, and Geometry Shader builds the Capsule shape. Only the important Geometry Shader implementations are listed below.
+TubularEdge.shader
+...
+[maxvertexcount(64)]
+void geom(line v2g IN[2], inout TriangleStream<g2f> OUT) {
+ v2g p0 = IN[0];
+ v2g p1 = IN[1];
+
+ float alpha = p0.alpha;
+
+ float3 t = normalize(p1.position - p0.position);
+ float3 n = normalize(p0.viewDir);
+ float3 bn = cross(t, n);
+ n = cross(t, bn);
+
+ float3 tp = lerp(p0.position, p1.position, alpha);
+ float thickness = _Thickness * alpha;
+
+ // Definition of Capsule mesh resolution
+ static const uint rows = 6, cols = 6;
+ static const float rows_inv = 1.0 / rows, cols_inv = 1.0 / (cols - 1);
+
+ g2f00, o1;
+ o0.uv = p0.uv; o0.uv2 = p0.uv2;
+ o1.uv = p1.uv; o1.uv2 = p1.uv2;
+
+ // Build aspects of the Capsule
+ for (uint i = 0; i < cols; i++) {
+ float r = (i * cols_inv) * UNITY_TWO_PI;
+
+ float s, c;
+ sincos(r, s, c);
+ float3 normal = normalize(n * c + bn * s);
+
+ float3 w0 = p0.position + normal * thickness;
+ float3 w1 = p1.position + normal * thickness;
+ o0.normal = o1.normal = normal;
+
+ o0.position = UnityWorldToClipPos(w0);
+ OUT.Append(o0);
+
+ o1.position = UnityWorldToClipPos(w1);
+ OUT.Append(o1);
+ }
+ OUT.RestartStrip();
+
+ // Construction of Capsule tip (hemispherical)
+ uint row, col;
+ for (row = 0; row < rows; row++)
+ {
+ float s0 = sin((row * rows_inv) * UNITY_HALF_PI);
+ float s1 = sin(((row + 1) * rows_inv) * UNITY_HALF_PI);
+ for (col = 0; col < cols; col++)
+ {
+ float r = (col * cols_inv) * UNITY_TWO_PI;
+
+ float s, c;
+ sincos(r, s, c);
+
+ float3 n0 = normalize(n * c * (1.0 - s0) + bn * s * (1.0 - s0) + t * s0);
+ float3 n1 = normalize(n * c * (1.0 - s1) + bn * s * (1.0 - s1) + t * s1);
+
+ o0.position = UnityWorldToClipPos(float4(tp + n0 * thickness, 1));
+ o0.normal = n0;
+ OUT.Append(o0);
+
+ o1.position = UnityWorldToClipPos(float4(tp + n1 * thickness, 1));
+ o1.normal = n1;
+ OUT.Append(o1);
+ }
+ OUT.RestartStrip();
+ }
+}
+
+...
+
+The result looks like this: (TubularEdge.scene)
+
++Figure 1.12: TubularEdge.scene --Example of rendering with TubularEdge.shader +
+Now that you can render Edge with a thick mesh, you can add lighting and so on.
+ +With the above, we have realized the GPU implementation of Space Colonization. In this section, we will introduce the cooperation with skinning animation as an application example.
+With this application, it is possible to realize an expression that blanchs along the animated model shape.
+ +The cooperation with skinning animation is developed according to the following flow.
+The structure of the previous example
+Make changes to have the Bone index.
+In this application, the number of bones that affect each node is limited to one. Originally, skinning animation could have multiple bones affecting each vertex, but in this example we simply limit it to only one.
+SkinnedAttraction.cs
+public struct SkinnedAttraction {
+ public Vector3 position;
+ public int bone; // boneのindex
+ public int nearest;
+ public uint found;
+ public uint active;
+}
+
+SkinnedNode.cs
+public struct SkinnedNode {
+ public Vector3 position;
+ public Vector3 animated; // Node position after skinning animation
+ public int index0; // boneのindex
+ public float t;
+ public float offset;
+ public float mass;
+ public int from;
+ public uint active;
+}
+
+SkinnedCandidate.cs
+public struct SkinnedCandidate
+{
+ public Vector3 position;
+ public int node;
+ public int bone; // boneのindex
+}
+
+Prepare the animation model you want to link.
+In this example, the model downloaded from Clara.io * 2 is used (the number of polygons is reduced by reduction with MeshLab * 3 ), and the animation is generated by mixamo * 4 .
+[*2] https://clara.io/view/d49ee603-8e6c-4720-bd20-9e3d7b13978a
[*3] http://www.meshlab.net/
[*4] https://mixamo.com
In order to get the position of Attraction from the model volume, we use a package called VolumeSampler * 5 that generates a point cloud in the model volume .
+[*5] https://github.com/mattatz/unity-volume-sampler
VolumeSampler acquires the volume of the model using the technique explained in Unity Graphics Programming vol.2 "Real-Time GPU-Based Voxelizer". First, the volume inside the mesh is acquired as Voxel, and Poisson Disk Sampling is executed based on it to generate a point cloud that fills the inside of the mesh.
+To generate a point cloud asset using VolumeSampler, click Window → VolumeSampler from the Unity toolbar to display Window, and as shown in the figure below.
+If you set and click the asset generation button, the Volume asset will be generated in the specified path.
+
++図1.13: VolumeSamplerWindow +
+Generate a SkinnedAttraction array from the point cloud asset (Volume class) generated from VolumeSampler and apply it to ComputeBuffer.
+SkinnedSpaceColonization.cs
+protected void Start() {
+ ...
+ // Generate a Skinned Attraction array from the point cloud of Volume
+ attractions = GenerateAttractions(volume);
+ count = attractions.Length;
+ attractionBuffer = new ComputeBuffer(
+ count,
+ Marshal.SizeOf(typeof(SkinnedAttraction)),
+ ComputeBufferType.Default
+ );
+ attractionBuffer.SetData(attractions);
+ ...
+}
+
+Bone information of the nearest vertex from each position is applied to Skinned Attraction generated from the volume of Mesh.
+The SetupSkin function prepares the vertices of the mesh and the bone buffer, and assigns the bone index to all Skinned Attraction on the GPU.
+SkinnedSpaceColonization.cs
+protected void Start() {
+ ...
+ SetupSkin();
+ ...
+}
+
+...
+
+protected void SetupSkin()
+{
+ var mesh = skinnedRenderer.sharedMesh;
+ var vertices = mesh.vertices;
+ var weights = mesh.boneWeights;
+ var indices = new int[weights.Length];
+ for(int i = 0, n = weights.Length; i < n; i++)
+ indices[i] = weights[i].boneIndex0;
+
+ using (
+ ComputeBuffer
+ vertBuffer = new ComputeBuffer(
+ vertices.Length,
+ Marshal.SizeOf(typeof(Vector3))
+ ),
+ boneBuffer = new ComputeBuffer(
+ weights.Length,
+ Marshal.SizeOf(typeof(uint))
+ )
+ )
+ {
+ vertBuffer.SetData(vertices);
+ boneBuffer.SetData(indices);
+
+ var kernel = compute.FindKernel("SetupSkin");
+ compute.SetBuffer(kernel, "_Vertices", vertBuffer);
+ compute.SetBuffer(kernel, "_Bones", boneBuffer);
+ compute.SetBuffer(kernel, "_Attractions", attractionBuffer);
+ GPUHelper.Dispatch1D(compute, kernel, attractionBuffer.count);
+ }
+}
+
+Below is the implementation of the GPU kernel.
+SkinnedSpaceColonization.compute
+void SetupSkin (uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+ uint count, stride;
+ _Attractions.GetDimensions(count, stride);
+ if (idx >= count)
+ return;
+
+ SkinnedAttraction attr = _Attractions[idx];
+
+ // Get the index of the nearest vertex from the position of Skinned Attraction
+ float3 p = attr.position;
+ uint closest = -1;
+ float dist = 1e8;
+ _Vertices.GetDimensions(count, stride);
+ for (uint i = 0; i < count; i++)
+ {
+ float3 v = _Vertices[i];
+ float l = distance(v, p);
+ if (l < dist)
+ {
+ dist = l;
+ closest = i;
+ }
+ }
+
+ // Set the bone index of the nearest vertex to Skinned Attraction
+ attr.bone = _Bones[closest];
+ _Attractions[idx] = attr;
+}
+
+In this application, some GPU kernels are modified to get the bone information needed for skinning animation in each step of the Space Colonization Algorithm.
+The contents of the GPU kernel are almost the same, but for the generated SkinnedNode, it is necessary to obtain Bone information (Bone index) from the nearest Skinned Attraction, so
+In the two GPU kernels, neighborhood search logic has been added.
+SkinnedSpaceColonization.compute
+void Seed (uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+
+ uint count, stride;
+ _Seeds.GetDimensions(count, stride);
+ if (idx >= count)
+ return;
+
+ SkinnedNode n;
+ uint i = CreateNode(n);
+ n.position = n.animated = _Seeds[idx];
+ n.t = 1;
+ n.offset = 0;
+ n.from = -1;
+ n.mass = lerp(_MassMin, _MassMax, nrand(id.xy));
+
+ // Search for the nearest Skinned Attraction and
+ // Copy the Bone index
+ uint nearest = -1;
+ float dist = 1e8;
+ _Attractions.GetDimensions(count, stride);
+ for (uint j = 0; j < count; j++)
+ {
+ SkinnedAttraction attr = _Attractions[j];
+ float l = distance(attr.position, n.position);
+ if (l < dist)
+ {
+ nearest = j;
+ dist = l;
+ }
+ }
+ n.index0 = _Attractions[nearest].bone;
+
+ _Nodes[i] = n;
+}
+
+...
+
+void Attract (uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+ uint count, stride;
+ _Nodes.GetDimensions(count, stride);
+ if (idx >= count)
+ return;
+
+ SkinnedNode n = _Nodes[idx];
+
+ if (n.active && n.t >= 1.0)
+ {
+ float3 dir = (0.0).xxx;
+ uint counter = 0;
+
+ float dist = 1e8;
+ uint nearest = -1;
+
+ _Attractions.GetDimensions(count, stride);
+ for (uint i = 0; i < count; i++)
+ {
+ SkinnedAttraction attr = _Attractions[i];
+ if (attr.active && attr.found && attr.nearest == idx)
+ {
+ float3 dir2 = (attr.position - n.position);
+ dir + = normalize (dir2);
+ counter++;
+
+ // Search for the nearest Skinned Attraction
+ float l2 = length(dir2);
+ if (l2 < dist)
+ {
+ dist = l2;
+ nearest = i;
+ }
+ }
+ }
+
+ if (counter > 0)
+ {
+ SkinnedCandidate c;
+ dir = dir / counter;
+ c.position = n.position + (dir * _GrowthDistance);
+ c.node = idx;
+ // Set the bone index of the nearest Skinned Attraction
+ c.bone = _Attractions[nearest].bone;
+ _CandidatesAppend.Append(c);
+ }
+ }
+}
+
+
+With the above implementation, you can now execute the Space Colonization Algorithm while setting the Bone information for Node.
+After that, you can apply skinning animation to Node by getting the required Bone matrix from SkinnedMeshRenderer and moving the position of SkinnedNode on the GPU according to the deformation of Bone.
+SkinnedSpaceColonization.cs
+protected void Start() {
+ ...
+ // Create a buffer for the bind pose matrix
+ var bindposes = skinnedRenderer.sharedMesh.bindposes;
+ bindPoseBuffer = new ComputeBuffer(
+ bindposes.Length,
+ Marshal.SizeOf(typeof(Matrix4x4))
+ );
+ bindPoseBuffer.SetData(bindposes);
+ ...
+}
+
+protected void Animate()
+{
+ // Create a buffer representing the SkinnedMeshRenderer's Bone matrix that is updated as the animation plays
+ var bones = skinnedRenderer.bones.Select(bone => {
+ return bone.localToWorldMatrix;
+ }).ToArray();
+ using (
+ ComputeBuffer boneMatrixBuffer = new ComputeBuffer(
+ bones.Length,
+ Marshal.SizeOf(typeof(Matrix4x4))
+ )
+ )
+ {
+ boneMatrixBuffer.SetData(bones);
+
+ // Pass the Bone and Node buffers and perform GPU skinning
+ var kernel = compute.FindKernel("Animate");
+ compute.SetBuffer(kernel, "_BindPoses", bindPoseBuffer);
+ compute.SetBuffer(kernel, "_BoneMatrices", boneMatrixBuffer);
+ compute.SetBuffer(kernel, "_Nodes", nodeBuffer);
+ GPUHelper.Dispatch1D(compute, kernel, count);
+ }
+}
+
+SkinnedSpaceColonization.compute
+void Animate (uint3 id : SV_DispatchThreadID)
+{
+ uint idx = id.x;
+ uint count, stride;
+ _Nodes.GetDimensions(count, stride);
+ if (idx >= count)
+ return;
+
+ SkinnedNode node = _Nodes[idx];
+ if (node.active)
+ {
+ // Perform skinning
+ float4x4 bind = _BindPoses[node.index0];
+ float4x4 m = _BoneMatrices[node.index0];
+ node.animated = mul(mul(m, bind), float4(node.position, 1)).xyz;
+ _Nodes[idx] = node;
+ }
+}
+
+The shaders for rendering are almost the same, except that the Edge is drawn by referring to the animated position after skinning animation, not the original position of the SkinnedNode.
+SkinnedTubularEdge.hlsl
+v2g vert(appdata IN, uint iid : SV_InstanceID)
+{
+ ...
+ Edge e = _Edges[iid];
+
+ // Refer to the position after applying skinning animation
+ SkinnedNode a = _Nodes[e.a], b = _Nodes[e.b];
+ float3 ap = a.animated, bp = b.animated;
+
+ float3 dir = bp - ap;
+ bp = ap + normalize (dir) * length (dir) * bt;
+ float3 position = lerp(ap, bp, IN.vid);
+ OUT.position = mul(unity_ObjectToWorld, float4(position, 1)).xyz;
+ ...
+}
+
+With the above implementation, you can get the capture picture shown at the beginning. ( Fig. 1.1 SkinnedAnimation.scene)
+ +In this chapter, we introduced the GPU implementation of the Space Colonization Algorithm that generates a blanching shape along a point cloud, and an application example that combines it with skinning animation.
+With this technique,
+You can control the density of branches with these three parameters, but you can generate more diverse models by changing these parameters locally or with time.
+Also, in the sample, Attraction runs the algorithm only with what was generated during initialization, but you should be able to generate more different patterns by dynamically increasing Attraction.
+If you are interested, try various applications of this algorithm to find interesting patterns.
+ +
|
![]() |
|
++Figure 2.1: +
+
++Figure 2.2: +
+In this chapter, we will introduce how to draw the limit set of Kleinian groups with a shader and animate the resulting fractal figure. Speaking of fractal animation, it is interesting to see a self-similar figure by scaling it up or down, but with this method, you can see a characteristic movement in which straight lines and circumferences transition more smoothly.
+The sample in this chapter is "Kleinian Group" of
https://github.com/IndieVisualLab/UnityGraphicsProgramming4
.
First, I will introduce the inversion of figures. I think it is familiar that a figure that is inverted like a mirror image with a straight line as a boundary is line-symmetrical, and if it is inverted around a point, it is point-symmetrical. There is also a reversal of circles. It is an operation to switch the inside and outside of the circle on the two-dimensional plane.
+
++Figure 2.3: Circle Inversion P \ rightarrow P' +
+The center of the circle O , radius r inverted with respect to the circle of, \ Left | OP \ Right | a | '\ right | \ left OP is so as to satisfy the following equation remains the same direction P to P' to move to the operation Become.
+\left| OP\right| \cdot \left| OP'\right| =r^{2}
+
+In the vicinity of the circumference, the inside and outside appear to be interchanged like a distorted line symmetry, and the infinity far away from the circumference and the center of the circle are interchanged. What is interesting is the case where the straight line on the outside of the circle is inverted, and when it is close to the circle, it moves to the inside across the circumference and continues to infinity as it moves away from it. Become.
+
++Figure 2.4: Straight line inversion +
+It will appear as a small circle inside the circle. If you think of a straight line as a circle with an infinite radius, you can say that circle inversion is an operation that swaps the circles inside and outside the circle.
+ +The formula for the circle inversion of the unit circle on the complex plane is as follows.
+z \ rightarrow \ dfrac {1} {\ overline {z}}
+
+z is a complex number and \ overline {z} is its complex common benefit.
+The next way and try to formula deformation z 1 divided by the square power of the length of the z you can see that is an operation that is scale.
+z\rightarrow \dfrac {1}{\overline {z}}=\dfrac {1}{x-iy}=\dfrac {x+iy}{\left( x-iy\right) \left( x+iy\right) }=\dfrac {x+iy}{x^{2}+y^{2}}=\dfrac {z}{\left| z\right| ^{2}}
+
+As a graphic operation on the complex plane,
+I think that many people are aware that, but this is where operations including division are newly added.
+ +The Mobius transformation * 1 is a generalized form that includes division in the transformation on the complex plane .
+[* 1] It is derived from the mathematician August Ferdinand Mobius, who is familiar with Mobius strip.
z \ rightarrow \ dfrac {az + b} {cz + d}
+
+a, b, c, d are all complex numbers.
+ +Consider creating a fractal figure by repeatedly using the Mobius transformations.
+Prepare four sets of circles D_A , D_a , D_B , D_b that do not intersect each other . First of which focuses on two sets of circle D_A the outside of D_a inside, D_A the inside of D_a Mobius transformation transferred to outside a create a. Similarly, make a Mobius transformation b from two other sets of circles D_B and D_b . Also , prepare the inverse transformations A and B respectively .
+
++Figure 2.5: +
+The entire transformation (for example, aaBAbbaB ) obtained by synthesizing the four Mobius transformations a , A , b , and B in any order is called " Schottky group * 2 based on a , b ".
+[* 2] It is derived from the mathematician Friedrich Hermann Schottky who first devised such a group.
This is further generalized, and the discrete group consisting of Mobius transformations is called the Kleinian group . I have the impression that this name is more widely used.
+ +When you display the image of the Schottky group, you will see a circle inside the circle, a circle inside it, and so on. These sets are called " the limit set of Schottky groups in a and b ". The purpose of this chapter is to draw this limit set .
+ +I will introduce how to draw the limit set with a shader. It is difficult to implement it honestly because the combination of conversions continues infinitely, but Jos Lays has published an algorithm for this * 5, so I will try to follow it.
+First, prepare two Mobius transformations.
+a: z\rightarrow \dfrac {tz-i}{-iz}
+
+b: z\rightarrow z+2 ++
t is the complex number u + iv . The shape of the figure can be changed by changing this value as a parameter.
+If you take a closer look at transformation a ,
+a: z \ rightarrow \ dfrac {tz-i} {- iz} = \ dfrac {t} {- i} + \ dfrac {1} {z} = \ dfrac {1} {z} + (- v + iu )
+
+\dfrac {1}{z}=\dfrac {1}{x+iy}=\dfrac {x-iy}{(x+iy)(x-iy)}=\dfrac {x-iy}{x^{2}+y^{2}}=\dfrac {x-iy}{\left|z\right|^{2}}
+
+Therefore,
+a: z\rightarrow \dfrac {tz-i}{-iz}=\dfrac{x-iy}{\left|z\right|^{2}}+(-v+iu)
+
+So
+You can see that it is an operation.
+The limit set using transformations a and b and their inverse transformations is the following strip-shaped figure in which large and small circles are connected.
+
++Figure 2.6: Limit set +
+Let's take a closer look at the features of this shape.
+
++Figure 2.7: +
+It has a band shape of 0 \ leq y \ leq u , and the parallelograms separated by Lines 1 and 2 repeat in the left-right direction. Line1 is a straight line that passes through points (1,0) and points (1-v, u) , and Line2 is a straight line that passes through points (-1,0) and points (-1-v, u) . Line3 becomes a line that divides the figure into the upper limit, and the upper and lower figures divided by this line in the parallelogram are point symmetric at the point z =-\ dfrac {v} {2} + \ dfrac {iu} {2} It has become.
+ +Determines if any point is included in the limit set. Utilizing the fact that parallelogram regions repeat on the left and right, and point symmetry on the top and bottom with Line 3 as the boundary, the judgment at each point is finally brought to the judgment of the figure in the lower half of the center. ..
+We will process a certain point as follows.
+The largest circle tangent to the straight line y = 0 is the inverted line y = u in the unit circle. When the conversion a is applied to the points in this, y <0 and it deviates from the band of 0 \ leq y \ leq u . Therefore,
++When a point is multiplied by the transformation a , y <0 = A point is included in this largest circle = It is included in the limit set
Judgment is made as.
+On the contrary, what if it is not included? Even if the above procedure is repeated, the band of 0 \ leq y \ leq u cannot be removed, and finally the movement of two points across Line 3 will be repeated. Therefore, if the points are the same as two points before, it can be judged that the points are not included in the limit set.
+In summary, it will be processed as follows.
+Let's take a look at the code.
+KleinianGroup.cs
+private void OnRenderImage(RenderTexture source, RenderTexture destination)
+{
+ material.SetColor("_HitColor", hitColor);
+ material.SetColor("_BackColor", backColor);
+ material.SetInt("_Iteration", iteration);
+ material.SetFloat("_Scale", scale);
+ material.SetVector("_Offset", offset);
+ material.SetVector("_Circle", circle);
+
+ Vector2 uv = kleinUV;
+ if ( useUVAnimation)
+ {
+ uv = new Vector2(
+ animKleinU.Evaluate(time),
+ animKleinV.Evaluate(time)
+ );
+ }
+ material.SetVector("_KleinUV", uv);
+ Graphics.Blit(source, destination, material, pass);
+}
+
+On the C # side, it is OnRenderImage()only a process to draw the material while passing the parameters set in the inspector in KleinianGroup.cs .
Let's take a look at the shader.
+KleinianGroup.shader
+#pragma vertex vert_img ++
The Vertex shader uses Unity standard vert_img. The main is the Fragment shader. There are 3 Fragment shaders, each with a different path. The first is the standard one, the second is the one with the blurring process and the appearance is a little cleaner, and the third is the one with the further circle inversion described later. You can now choose which path to use in KleinianGroup.cs. Let's look at the first one here.
+KleinianGroup.shader
+fixed4 frag (v2f_img i) : SV_Target
+{
+ float2 pos = i.uv;
+ float aspect = _ScreenParams.x / _ScreenParams.y;
+ pos.x *= aspect;
+ pos += _Offset;
+ pos *= _Scale;
+
+ bool hit = josKleinian(pos, _KleinUV);
+ return hit ? _HitColor : _BackColor;
+}
+
+_ScreenParamsThe aspect ratio is calculated from and multiplied by pos.x. Now the area on the screen represented by pos is 0 ≤ y ≤ 1, and x is the range according to the aspect ratio. Furthermore , the position and range to be displayed can be adjusted by applying _Offset, passed from the C # side _Scale. josKleinian()The color to be output is determined by judging whether or not the limit set is possible.
josKleinian()Let's take a closer look.
KleinianGroup.shader
+bool josKleinian(float2 z, float2 t)
+{
+ float u = t.x;
+ float v = ty;
+
+ float2 lz=z+(1).xx;
+ float2 llz=z+(-1).xx;
+
+ for (uint i = 0; i < _Iteration ; i++)
+ {
+ ~
+
+A function that receives the point z and the Mobius transformation parameter t and determines whether z is included in the limit set. lz and llz are variables for judging "the same point as before" indicating that they are outside the set. For the time being, the values are initialized so that they are different from z at the start and also different from each other. _IterationIs the maximum number of times to repeat the procedure. I think that it is enough that the value is not so large unless you look at the details in a magnified manner.
KleinianGroup.shader
+// wrap if outside of Line1,2 +float offset_x = abs (v) / u * zy; +z.x += offset_x; +z.x = wrap(z.x, 2, -1); +z.x -= offset_x; ++
KleinianGroup.shader
+float wrap(float x, float width, float left_side){
+ x -= left_side;
+ return (x - width * floor(x/width)) + left_side;
+}
+
+Here is
++Move to the center parallelogram if it is to the right of Line1 and to the left of Line2
It will be the part of.
+wrap()Is a function that receives the position of the point, the width of the rectangle, and the coordinates of the left edge of the rectangle, and stores the points that extend to the left and right. It is a process to convert a parallelogram to a rectangle with wrap()offset_x, put it within the range with offset_x, and return it to a parallelogram again with offset_x.
Next is the judgment of Line3.
+KleinianGroup.shader
+//if above Line3, inverse at (-v/2, u/2)
+float separate_line = u * 0.5
+ + sign(v) *(2 * u - 1.95) / 4 * sign(z.x + v * 0.5)
+ * (1 - exp(-(7.2 - (1.95 - u) * 15)* abs(z.x + v * 0.5)));
+
+if (z.y >= separate_line)
+{
+ z = float2 (-v, u) - z;
+}
+
+separate_lineThe part that asks for is the conditional expression of Line3. I don't know how to derive this part, and I think it is roughly calculated from the symmetry of the figure. Depending on the value of t that has been squeezed into a complicated figure, the upper and lower figures may mesh with each other in a jagged manner, and this conditional expression may not be sufficient to divide it properly, but this time it is effective in a general form. Will be used as it is.
KleinianGroup.shader
+z = TransA (z, t); ++
KleinianGroup.shader
+float2 TransA(float2 z, float2 t){
+ return float2(z.x, -z.y) / dot(z,z) + float2(-t.y, t.x);
+}
+
+Finally , apply the Mobius transformation a to the point z . Using the above formula transformation to make it easier to code,
+a: z\rightarrow \dfrac {tz-i}{-iz}=\dfrac{x-iy}{\left|z\right|^{2}}+(-v+iu)
+
+I'm implementing this.
+KleinianGroup.shader
+//hit!
+if (z.y<0) { return true; }
+
+As a result of conversion, if y <0, limit set judgment,
+KleinianGroup.shader
+//2cycle
+if(length(z-llz) < 1e-6) {break;}
+
+llz=lz;
+lz=z;
+
+If the value is almost the same as the point two points before, it is judged that it is not the limit set. Also, _Iterationif the judgment result is not obtained even if it is repeated, it is judged that it is not the limit set.
This completes the shader implementation. The parameter t is (2,0) is the most with a typical value of (1.94,0.02) is likely to be interesting shape in the vicinity. The sample project can be edited in the inspector of KleinianGroupDemo.cs, so please play with it.
+ +That's all for displaying the limit set, but to make it interesting as an animation, we will add a powerful topping at the end. josKleinian()Invert the position in a circle before passing it over. Circle inversion swaps the infinitely expanding area outside the circle with the inside, and the circle is transferred as a circle. And the limit set is made up of innumerable circles. By moving this inverted circle or changing the radius, you can create a mysterious appearance that you cannot predict while taking advantage of the fun of fractals.
KleinianGroup.shader
+float4 calc_color(float2 pos)
+{
+ bool hit = josKleinian(pos, _KleinUV);
+ return hit ? _HitColor : _BackColor;
+}
+
+~
+float4 _Circle;
+
+float2 circleInverse(float2 pos, float2 center, float radius)
+{
+ float2 p = pos - center;
+ p = (p * radius) / dot(p,p);
+ p += center;
+ return p;
+}
+
+fixed4 frag_circle_inverse(v2f_img i) : SV_Target
+{
+ float2 pos = i.uv;
+ float aspect = _ScreenParams.x / _ScreenParams.y;
+ pos.x *= aspect;
+ pos *= _Scale;
+ pos += _Offset;
+
+ int sample_num = 10;
+ float4 sum;
+ for (int i = 0; i < sample_num; ++i)
+ {
+ float2 offset = rand2n(pos, i) * (1/_ScreenParams.y) * 3;
+ float2 p = circleInverse(pos + offset, _Circle.xy, _Circle.w);
+ sum += calc_color(p);
+ }
+
+ return sum / sample_num;
+}
+
+This is a Fragment shader with the circular inversion defined in the third path. sample_numThe loop of is a process to make the appearance beautiful by calculating the surrounding pixels and blurring it a little. calc_color()However, it is the process of calculating the color so far, ciecleInverse()and the circle is inverted before that .
In the KleinianGroupCircleInverse scene, fractal-like animation works by changing the parameters of this shader with Animator.
+ +In this chapter, we introduced how to draw the limit set of Kleinian groups with a shader and how to make more interesting fractal figures by using circle inversion. Fractal and Mobius transformations were difficult to get to in fields that I wasn't familiar with, but it was very exciting to see unexpected patterns moving one after another. If you like, please try it out!
+ +[*3] https://www.amazon.co.jp/dp/4535783616
[*4] http://userweb.pep.ne.jp/hannyalab/MatheVital/IndrasPearls/IndrasPearlsindex.html
[*5] http://www.josleys.com/articles/Kleinian%20escape-time_3.pdf
[*6] https://www.shadertoy.com/user/JosLeys
[*7] https://www.shadertoy.com/view/MlGfDG
[*8] https://twitter.com/soma_arc
[*9] http://tokyodemofest.jp/2018/
|
![]() |
|
Simulation of the shape of a planar object that is deformed by an external force such as a flag or clothes is called Cloth Simulation , and much research has been done in the CG field as it is essential for animation creation. .. Already implemented in Unity, this chapter introduces a simple cloth simulation theory and GPU implementation for the purpose of learning parallel computing using GPU and understanding the nature of simulation and the meaning of parameters. I will do it.
+
++Figure 3.1: Cloth simulation +
+An object such as a spring, rubber, or cushion that deforms when a force is applied and returns to its original shape when the force is stopped is called an elastic body . Since such elastic bodies cannot be represented by a single position or orientation, they represent an object by points and connections between them, and the movement of each point simulates the entire shape. This point is called a mass point and is considered to be a mass of mass without size. In addition, the connection between mass points has the property of a spring. The method of simulating an elastic body by calculating the expansion and contraction of each spring is called the mass-spring system, and it is a flag by calculating the motion of a set of mass points arranged in a two-dimensional shape. Simulation of clothes etc. is called Cloth Simulation.
+
++Figure 3.2: Mass-Spring system +
+Each spring applies a force to the connected mass points according to the following equation.
+F_{spring} = -k \left( I-I_{0} \right) - b v
+
+Here, I is the current length of the spring (distance between the connected mass points ), and I_ {0} is the natural length of the spring at the start of the simulation (the length when no load is applied to the spring). ). k is a constant that represents the hardness of the spring, v is the velocity of the mass point, and b is the constant that determines the degree of velocity attenuation. This equation means that the spring always exerts a force that tries to return the distance between the connected mass points to the initial natural length of the spring. If the current distance of the spring is significantly different from the natural length of the spring, a larger force will be applied and it will be attenuated in proportion to the current velocity of the mass point.
+
++Figure 3.3: Spring force +
+In this simulation, the springs that make up the basic structure are connected in the horizontal and vertical directions, and the springs are also connected between the mass points located diagonally to prevent extreme deviation in the diagonal direction. They are called Structure Spring and Shear Spring, respectively, and one mass point connects the spring to 12 adjacent mass points, respectively.
+
++Figure 3.4: Spring structure +
+In this simulation, the position of the mass point is calculated by the Verlet method, which is a method often used in real-time applications. The Verlet method is one of the numerical solutions of Newton's equation of motion, and is a method used in molecular dynamics to calculate the movement of atoms. In calculating the motion of the object, but usually seek position from the speed, the Belle method, the current position and, the position of the previous time step from the position at the next time step will seek.
+The derivation of the Verlet algebraic equation is shown below. F is the force applied to the mass point , m is the mass of the mass point, v is the velocity, x is the position, t is the time, and \ Delta t is the time step of the simulation (how much time is advanced per simulation calculation). ) Then, the equation of motion of the mass point is
+m\dfrac {d^{2}x\left( t\right) }{dt^{2}}=F
+
+Can be written. If this equation of motion is made into an algebraic equation using the following two Taylor expansions,
+x\left( t+\Delta t\right) =x\left( t\right) +\Delta t\dfrac {dx\left( t\right) }{dt}+\dfrac {1}{2!}\Delta t^{2}\dfrac {dx^{2}\left( t\right) }{dt^{2}}+\dfrac {1}{3!}\Delta t^{3}\dfrac {dx^{3}\left( t\right) }{dt^{3}}+\ldots
+
+x\left( t-\Delta t\right) =x\left( t\right) -\Delta t\dfrac {dx\left( t\right) }{dt}+\dfrac {1}{2!}\Delta t^{2}\dfrac {dx^{2}\left( t\right) }{dt^{2}}-\dfrac {1}{3!}\Delta t^{3}\dfrac {dx^{3}\left( t\right) }{dt^{3}}+\ldots
+
+It will be. From these two Taylor expansion equations, if we solve the second-order derivative term and ignore the terms of \ Delta t of the second order or higher as sufficiently small, we can write as follows.
+\dfrac {dx^{2}\left( t\right) }{dt^{2}}=\dfrac {x\left( t+\Delta t\right) -2x\left( t\right) +x\left( t-\Delta t\right) }{\Delta t^{2}}
+
+If the second-order differential term is expressed by mass m and force F from the equation of motion ,
+x\left( t+\Delta t\right) =2x\left( t\right) -x\left( t-\Delta t\right) +\dfrac {\Delta t^{2}}{m}F\left( t\right)
+
+The algebraic equation is obtained. In this way, the formula for calculating the position in the next time step from the current position, the position in the previous time step, the mass, the force, and the value of the time step can be obtained.
+The speed is calculated from the current position and the previous position in time.
+v\left( t\right) =\dfrac {x\left( t\right) -x\left( t-\Delta t\right) }{\Delta t}
+
+The speed obtained by this calculation is not very accurate, but it is not a problem as it is only used to calculate the damping of the spring.
+ +Collision processing is performed in two phases: "collision detection" and "reaction to it".
+Collision detection is performed by the following formula.
+\left\| x\left( t+\Delta t\right) -c\right\| -r < 0 ++
c and r are the sphere of center and radius , x \ left (t + \ Delta t \. right) is the position at the next time step is determined by Belle method. If a collision is detected, move the mass point onto the surface of the sphere to prevent the sphere from colliding with the cloth. Specifically, this is done by shifting the mass points inside the sphere in the direction normal to the surface of the collision point. The position of the mass point is updated according to the following formula.
+\begin{aligned}
+d=\dfrac {x\left( t+\Delta t\right) -c}{\left\| x\left( t+\Delta t\right) -c\right\|}
+\\
+x^{\prime}\left(t + \Delta t\right) = c + dr
+\end{aligned}
+
+x ^ {\ prime} \ left (t + \ Delta t \ right) is the updated position after the collision. d can be regarded as an approximation of the acceptable accuracy of the normal to the surface at the collision point, provided that the mass does not penetrate deeply.
+
++Figure 3.5: Collision calculation +
+The sample program is in the Assets / GPUClothSimulation folder in the repository below .
+https://github.com/IndieVisualLab/UnityGraphicsProgramming4
+ +This sample program uses the Compute Shader. Please check here for the operating environment of Compute Shader.
+https://docs.unity3d.com/ja/2018.3/Manual/class-ComputeShader.html
+ +It is a schematic diagram showing how each component and code work in relation to each other.
+
++Figure 3.6: Structure and processing flow of each component and code +
+GPUClothSimulation.cs is a C # script that manages the data and processing used for simulation. This script creates and manages position and normal data used for simulation in RenderTexture format. In addition, processing is performed by calling the kernel described in Kernels.compute. The GPUClothRenderer.cs script provides visualization of the calculation results. The Mesh object generated by this script is drawn by transforming the geometry by the processing of ClothSurface.shader that refers to the RenderTexture that stores the position data and normal data that are the calculation results.
+ +A C # script that controls the simulation.
+using System.Collections;
+using System.Collections.Generic;
+using UnityEngine;
+
+public class GPUClothSimulation : MonoBehaviour
+{
+ [Header("Simulation Parameters")]
+ // Time step
+ public float TimeStep = 0.01f;
+ // Number of simulation iterations
+ [Range(1, 16)]
+ public int VerletIterationNum = 4;
+ // Cloth resolution (horizontal, vertical)
+ public Vector2Int ClothResolution = new Vector2Int(128, 128);
+ // Cloth grid spacing (natural length of spring)
+ public float RestLength = 0.02f;
+ // Constants that determine the elasticity of the fabric
+ public float Stiffness = 10000.0f;
+ // Velocity decay constant
+ public float Damp = 0.996f;
+ // quality
+ public float Mass = 1.0f;
+ // gravity
+ public Vector3 Gravity = new Vector3(0.0f, -9.81f, 0.0f);
+
+ [Header("References")]
+ // Reference to Transform of collision sphere
+ public Transform CollisionSphereTransform;
+ [Header("Resources")]
+ // Kernel to simulate
+ public ComputeShader KernelCS;
+
+ // Cloth simulation position data buffer
+ private RenderTexture[] _posBuff;
+ // Cloth simulation position data (previous time step) buffer
+ private RenderTexture[] _posPrevBuff;
+ // Cloth simulation normal data buffer
+ private RenderTexture _normBuff;
+
+ // Cloth length (horizontal, vertical)
+ private Vector2 _totalClothLength;
+
+ [Header("Debug")]
+ // Show simulation buffer for debugging
+ public bool EnableDebugOnGUI = true;
+ // Buffer display scale during debug display
+ private float _debugOnGUIScale = 1.0f;
+
+ // Did you initialize the simulation resource?
+ public bool IsInit { private set; get; }
+
+ // Get the position data buffer
+ public RenderTexture GetPositionBuffer()
+ {
+ return this.IsInit ? _posBuff[0] : null;
+ }
+ // Get a buffer of normal data
+ public RenderTexture GetNormalBuffer()
+ {
+ return this.IsInit ? _normBuff : null;
+ }
+ // Get the resolution of the cloth
+ public Vector2Int GetClothResolution()
+ {
+ return ClothResolution;
+ }
+
+ // Number of X, Y dimensional threads in the Compute Shader kernel
+ const int numThreadsXY = 32;
+
+ void Start()
+ {
+ var w = ClothResolution.x;
+ var h = ClothResolution.y;
+ var format = RenderTextureFormat.ARGBFloat;
+ var filter = FilterMode.Point; // Prevent interpolation between texels
+ // Create RenderTexture to store data for simulation
+ CreateRenderTexture(ref _posBuff, w, h, format, filter);
+ CreateRenderTexture(ref _posPrevBuff, w, h, format, filter);
+ CreateRenderTexture(ref _normBuff, w, h, format, filter);
+ // Reset the data for simulation
+ ResetBuffer();
+ // Set the initialized flag to True
+ IsInit = true;
+ }
+
+ void Update()
+ {
+ // Press the r key to reset the simulation data
+ if (Input.GetKeyUp("r"))
+ ResetBuffer();
+
+ // Perform a simulation
+ Simulation();
+ }
+
+ void OnDestroy()
+ {
+ // Removed RenderTexture that stores data for simulation
+ DestroyRenderTexture(ref _posBuff );
+ DestroyRenderTexture(ref _posPrevBuff);
+ DestroyRenderTexture(ref _normBuff );
+ }
+
+ void OnGUI ()
+ {
+ // Draw RenderTexture containing simulation data for debugging
+ DrawSimulationBufferOnGUI();
+ }
+
+ // Reset simulation data
+ void ResetBuffer()
+ {
+ ComputeShader cs = KernelCS;
+ // Get kernel ID
+ int kernelId = cs.FindKernel("CSInit");
+ // Calculate the number of execution thread groups in the Compute Shader kernel
+ int groupThreadsX =
+ Mathf.CeilToInt((float)ClothResolution.x / numThreadsXY);
+ int groupThreadsY =
+ Mathf.CeilToInt((float)ClothResolution.y / numThreadsXY);
+ // Calculation of cloth length (horizontal, vertical)
+ _totalClothLength = new Vector2(
+ RestLength * ClothResolution.x,
+ RestLength * ClothResolution.y
+ );
+ // Set parameters and buffers
+ cs.SetInts ("_ClothResolution",
+ new int[2] { ClothResolution.x, ClothResolution.y });
+ cs.SetFloats("_TotalClothLength",
+ new float[2] { _totalClothLength.x, _totalClothLength.y });
+ cs.SetFloat ("_RestLength", RestLength);
+ cs.SetTexture(kernelId, "_PositionBufferRW", _posBuff[0]);
+ cs.SetTexture(kernelId, "_PositionPrevBufferRW", _posPrevBuff[0]);
+ cs.SetTexture(kernelId, "_NormalBufferRW", _normBuff);
+ // run the kernel
+ cs.Dispatch(kernelId, groupThreadsX, groupThreadsY, 1);
+ // copy the buffer
+ Graphics.Blit (_posBuff [0], _posBuff [1]);
+ Graphics.Blit (_posPrevBuff [0], _posPrevBuff [1]);
+ }
+
+ // simulation
+ void Simulation()
+ {
+ ComputeShader cs = KernelCS;
+ // Calculation of CSSimulation Calculation of the value of the time step per time
+ float timestep = (float)TimeStep / VerletIterationNum;
+ // Get kernel ID
+ int kernelId = cs.FindKernel("CSSimulation");
+ // Calculate the number of execution thread groups in the Compute Shader kernel
+ int groupThreadsX =
+ Mathf.CeilToInt((float)ClothResolution.x / numThreadsXY);
+ int groupThreadsY =
+ Mathf.CeilToInt((float)ClothResolution.y / numThreadsXY);
+
+ // set parameters
+ cs.SetVector("_Gravity", Gravity);
+ cs.SetFloat ("_Stiffness", Stiffness);
+ cs.SetFloat ("_Damp", Damp);
+ cs.SetFloat ("_InverseMass", (float)1.0f / Mass);
+ cs.SetFloat ("_TimeStep", timestep);
+ cs.SetFloat ("_RestLength", RestLength);
+ cs.SetInts ("_ClothResolution",
+ new int[2] { ClothResolution.x, ClothResolution.y });
+
+ // Set the parameters of the collision sphere
+ if (CollisionSphereTransform != null)
+ {
+ Vector3 collisionSpherePos = CollisionSphereTransform.position;
+ float collisionSphereRad =
+ CollisionSphereTransform.localScale.x * 0.5f + 0.01f;
+ cs.SetBool ("_EnableCollideSphere", true);
+ cs.SetFloats("_CollideSphereParams",
+ new float[4] {
+ collisionSpherePos.x,
+ collisionSpherePos.y,
+ collisionSpherePos.z,
+ collisionSphereRad
+ });
+ }
+ else
+ cs.SetBool("_EnableCollideSphere", false);
+
+ for (var i = 0; i <VerletIterationNum; i ++)
+ {
+ // set the buffer
+ cs.SetTexture(kernelId, "_PositionBufferRO", _posBuff[0]);
+ cs.SetTexture(kernelId, "_PositionPrevBufferRO", _posPrevBuff[0]);
+ cs.SetTexture(kernelId, "_PositionBufferRW", _posBuff[1]);
+ cs.SetTexture(kernelId, "_PositionPrevBufferRW", _posPrevBuff[1]);
+ cs.SetTexture(kernelId, "_NormalBufferRW", _normBuff);
+ // run thread
+ cs.Dispatch(kernelId, groupThreadsX, groupThreadsY, 1);
+ // Swap the read buffer and write buffer
+ SwapBuffer(ref _posBuff[0], ref _posBuff[1] );
+ SwapBuffer (ref _posPrevBuff [0], ref _posPrevBuff [1]);
+ }
+ }
+
+ // Create RenderTexture to store data for simulation
+ void CreateRenderTexture(ref RenderTexture buffer, int w, int h,
+ RenderTextureFormat format, FilterMode filter)
+ {
+ buffer = new RenderTexture(w, h, 0, format)
+ {
+ filterMode = filter,
+ wrapMode = TextureWrapMode.Clamp,
+ hideFlags = HideFlags.HideAndDontSave,
+ enableRandomWrite = true
+ };
+ buffer.Create();
+ }
+
+ // Create RenderTexture [] to store data for simulation
+ void CreateRenderTexture(ref RenderTexture[] buffer, int w, int h,
+ RenderTextureFormat format, FilterMode filter)
+ {
+ // ~ slightly~
+ }
+
+ // Removed RenderTexture that stores data for simulation
+ void DestroyRenderTexture(ref RenderTexture buffer)
+ {
+ // ~ slightly~
+ }
+
+ // Remove RenderTexture [] to store data for simulation
+ void DestroyRenderTexture(ref RenderTexture[] buffer)
+ {
+ // ~ slightly~
+ }
+
+ // Delete material
+ void DestroyMaterial(ref Material mat)
+ {
+ // ~ slightly~
+ }
+
+ // Swap buffers
+ void SwapBuffer(ref RenderTexture ping, ref RenderTexture pong)
+ {
+ RenderTexture temp = ping;
+ ping = pong;
+ pong = temp;
+ }
+
+ // Draw a buffer for simulation in the OnGUI function for debugging
+ void DrawSimulationBufferOnGUI()
+ {
+ // ~ slightly~
+ }
+}
+
+
+At the beginning, the parameters required for the simulation are declared. In addition, RenderTexture is used to hold the simulation results. The data used and obtained for this simulation
+is.
+ +The InitBuffer function creates a RenderTexture that stores the data needed for the calculation. For the position and the position in the previous time step, the data in the previous time step is used and the calculation is performed based on it, so create two for reading and one for writing. In this way, the method of creating data for reading and data for writing and letting the shader calculate efficiently is called Ping Pong Buffering .
+Regarding the creation of RenderTexture, in format, the precision of the texture (the number of channels and the number of bits of each channel) is set. Generally, the lower the value, the faster the processing, but ARGBHalf (16bit per channel) has low accuracy and the calculation result becomes unstable, so set it to ARGBFloat (32bit per channel). Also, enableRandomWrite should be true to allow ComputeShader to write the calculation result. RenderTexture is not created on the hardware just by calling the constructor, so execute the Create function to make it available in the shader.
+ +The ResetBuffer function initializes the RenderTexture that stores the data needed for the simulation. Get kernel ID, calculate number of thread groups, set various parameters such as cloth length, RenderTexture used for calculation for Compute Shader, and call CSInit kernel written in Kernels.compute for processing. .. The contents of the CSInit kernel are described in the following Kernels.compute details.
+ +The Simulation function simulates the actual cloth. At the beginning, like the ResetBuffer function, get the kernel ID, calculate the number of thread groups, set various parameters used for simulation and RenderTexture. If you calculate with a large time step at a time, the simulation becomes unstable, so in Update (), divide the time step into small values so that the simulation can be calculated stably by dividing it into several times. I will. The number of iterations is set in VerletIterationNum .
+ +Compute Shader that describes processing such as actual simulation.
+This Compute Shader has
+There are two kernels.
+Each kernel performs the following processing.
+ +Calculates the initial values of position and normal. The position of the mass point is calculated so that it is arranged in a grid pattern on the XY plane based on the thread ID (2D).
+ +Perform a simulation. The figure below outlines the processing flow of the CSSimulation kernel.
+
++Figure 3.7: Calculation flow in CSSimulation kernel +
+The code is shown below.
+#pragma kernel CSInit
+#pragma kernel CSSimulation
+
+#define NUM_THREADS_XY 32 // Number of kernel threads
+
+// For reading position data (previous time step)
+Texture2D<float4> _PositionPrevBufferRO;
+// For reading position data
+Texture2D<float4> _PositionBufferRO;
+// For writing position data (previous time step)
+RWTexture2D<float4> _PositionPrevBufferRW;
+// For writing position data
+RWTexture2D<float4> _PositionBufferRW;
+// For writing normal data
+RWTexture2D<float4> _NormalBufferRW;
+
+int2 _ClothResolution; // Cloth resolution (number of particles) (horizontal, vertical)
+float2 _TotalClothLength; // Overall length of the cloth
+
+float _RestLength; // Natural length of the spring
+
+float3 _Gravity; // Gravity
+float _Stiffness; // Constant that determines the degree of expansion and contraction of the cloth
+float _Damp; // Attenuation rate of cloth speed
+float _InverseMass; // 1.0/quality
+
+float _TimeStep; // Size of time step
+
+bool _EnableCollideSphere; // Flag for collision handling
+float4 _CollideSphereParams; // Collision handling parameters (pos.xyz, radius)
+
+// Array of ID offsets (x, y) of nearby particles
+static const int2 m_Directions[12] =
+{
+ int2 (-1, -1), // 0
+ int2 (0, -1), // 1
+ int2 (1, -1), // 2
+ int2 (1, 0), // 3
+ int2 (1, 1), // 4
+ int2 (0, 1), // 5
+ int2 (-1, 1), // 6
+ int2 (-1, 0), // 7
+ int2 (-2, -2), // 8
+ int2 (2, -2), // 9
+ int2 (2, 2), // 10
+ int2 (-2, 2) // 11
+};
+// Returns the offset of the ID of nearby particles
+int2 NextNeigh(int n)
+{
+ return m_Directions[n];
+}
+
+// Kernel that initializes the simulation buffer
+[numthreads(NUM_THREADS_XY, NUM_THREADS_XY, 1)]
+void CSInit(uint3 DTid : SV_DispatchThreadID)
+{
+ uint2 idx = DTid.xy;
+
+ // location
+ float3 pos = float3(idx.x * _RestLength, idx.y * _RestLength, 0);
+ pos.xy -= _TotalClothLength.xy * 0.5;
+ // normal
+ float3 nrm = float3 (0, 0, -1);
+ // write to buffer
+ _PositionPrevBufferRW[idx] = float4(pos.xyz, 1.0);
+ _PositionBufferRW[idx] = float4(pos.xyz, 1.0);
+ _NormalBufferRW[idx] = float4(nrm.xyz, 1.0);
+}
+
+// Kernel to simulate
+[numthreads(NUM_THREADS_XY, NUM_THREADS_XY, 1)]
+void CSSimulation(uint2 DTid : SV_DispatchThreadID)
+{
+ int2 idx = (int2)DTid.xy;
+ // Cloth resolution (number of particles) (horizontal, vertical)
+ int2 res = _ClothResolution.xy;
+ // read position
+ float3 pos = _PositionBufferRO[idx.xy].xyz;
+ // Read position (previous time step)
+ float3 posPrev = _PositionPrevBufferRO[idx.xy].xyz;
+ // Calculate the speed from the position and the position of the previous time step
+ float3 vel = (pos - posPrev) / _TimeStep;
+
+ float3 normal = (float3)0; // 法線
+ float3 lastDiff = (float3) 0; // Variable for storing direction vector used when calculating normal
+ float iters = 0.0; // Variable for adding the number of iterations when calculating normals
+
+ // Substitute the force applied to the particles and the value of gravity as the initial value
+ float3 force = _Gravity.xyz;
+ // 1.0 / quality
+ float invMass = _InverseMass;
+
+ // If it is the top side of the cloth, do not calculate to fix the position
+ if (idx.y == _ClothResolution.y - 1)
+ return;
+
+ // Calculate for nearby particles (12)
+ [unroll]
+ for (int k = 0; k < 12; k++)
+ {
+ // Offset of ID (coordinates) of neighboring particles
+ int2 neighCoord = NextNeigh(k);
+ // Do not calculate for X-axis, edge particles
+ if (((idx.x+neighCoord.x) < 0) || ((idx.x+neighCoord.x) > (res.x-1)))
+ continue;
+ // Do not calculate for Y-axis, edge particles
+ if (((idx.y + neighCoord.y) <0) || ((idx.y + neighCoord.y)> (res.y-1)))
+ continue;
+ // ID of nearby particles
+ int2 idxNeigh = int2(idx.x + neighCoord.x, idx.y + neighCoord.y);
+ // Position of nearby particles
+ float3 posNeigh = _PositionBufferRO[idxNeigh].xyz;
+ // Difference in the position of nearby particles
+ float3 posDiff = posNeigh - pos;
+
+ // Normal calculation
+ // Direction vector from the base point to nearby particles
+ float3 currDiff = normalize(posDiff);
+ if ((iters > 0.0) && (k < 8))
+ {
+ // With the direction vector with the neighboring particles examined one time ago
+ // If the current angle is obtuse
+ float a = dot(currDiff, lastDiff);
+ if (a > 0.0) {
+ // Find and add orthogonal vectors by cross product
+ normal += cross(lastDiff, currDiff);
+ }
+ }
+ lastDiff = currDiff; // Keep for calculation with next neighbor particles
+
+ // Calculate the natural length of the spring with neighboring particles
+ float restLength = length(neighCoord * _RestLength);
+ // Calculate the force of the spring
+ force += (currDiff*(length(posDiff)-restLength))*_Stiffness-vel*_Damp;
+ // Addition
+ if (k < 8) iters += 1.0;
+ }
+ // Calculate normal vector
+ normal = normalize (normal / - (iters - 1.0));
+
+ // acceleration
+ float3 acc = (float3)0.0;
+ // Apply the law of motion (the magnitude of acceleration is proportional to the magnitude of force and inversely proportional to mass)
+ acc = force * invMass;
+
+ // Position calculation by Verlet method
+ float3 tmp = pos;
+ pos = pos * 2.0 - posPrev + acc * (_TimeStep * _TimeStep);
+ posPrev = tmp; // Position of the previous time step
+
+ // Calculate collision
+ if (_EnableCollideSphere)
+ {
+ float3 center = _CollideSphereParams.xyz; // Center position
+ float radius = _CollideSphereParams.w; // 半径
+
+ if (length(pos - center) < radius)
+ {
+ // Calculate the unit vector from the center of the collision sphere to the position of the particles on the cloth
+ float3 collDir = normalize(pos - center);
+ // Move the position of particles to the surface of the collision sphere
+ pos = center + collDir * radius;
+ }
+ }
+
+ // write
+ _PositionBufferRW[idx.xy] = float4(pos.xyz, 1.0);
+ _PositionPrevBufferRW[idx.xy] = float4(posPrev.xyz, 1.0);
+ _NormalBufferRW[idx.xy] = float4(normal.xyz, 1.0);
+}
+
+
+In this component
+to hold.
+Please check the sample code for details.
+ +In this shader, the position and normal data obtained by simulation are acquired in the vertex shader, and the shape of the mesh is changed by rewriting the vertices.
+Please check the sample code for details
+ +When you run it, you'll see an object that behaves like a cloth that collides with the sphere. If you change the various parameters used in the simulation, you can see the change in the movement.
+TimeStep is the time of the simulation that progresses when the Update function is executed once. If you increase it, the change in movement will be large, but if you set a value that is too large, the simulation will become unstable and the value will diverge.
+VerletIterationNum is the number of CSSimulation kernels to execute in the Simulation function. Even if the value of the same time step is increased, increasing this value will make the simulation easier to stabilize, but will increase the calculation load.
+ClothResolution is the resolution of the cloth. If you increase it, you will see many details such as wrinkles, but if you increase it too much, the simulation will become unstable. Since the thread size is set to 32 on ComputeShader, it is desirable to be a multiple of 32.
+RestLength is the natural length of the spring. Since it is the distance between the springs, the length of the cloth is Cloth Resolution x Rest Length.
+Stiffness is the hardness of the spring. Increasing this value will reduce the stretch of the fabric, but increasing it too much will make the simulation unstable.
+Damp is the attenuation value of the moving speed of the spring. Increasing this value will make the mass point stagnant faster and less likely to vibrate, but will reduce the change.
+Mass is the mass of the mass point. Increasing this value will cause the cloth to move as if it were heavy.
+Gravity is the gravity on the cloth. The acceleration of the cloth is the combination of this gravity and the force of the spring.
+If you check EnableDebugOnGUI , a texture that stores the position that is the result of the simulation, the position in the previous time step, and the normal data is drawn in the upper left of the screen for confirmation.
+When you press the R key, the cloth returns to its initial state. If the simulation becomes unstable and the values diverge, please return to the initial state.
+
++Figure 3.8: Execution result (calculation result RenderTexture is drawn in screen space) +
+If you open Assets / GPUClothSimulation / Debug / GPUClothSimulationDebugRender.unity , you can see the spring that connects the mass points with particles and lines.
+
++Figure 3.9: Draw mass points and springs with particles and lines +
+The movement obtained by the mass point-spring system simulation changes in a complicated manner depending on how the force is applied, and an interesting shape can be created. The cloth simulation presented in this chapter is very simple. Challenges such as collisions with objects of complex geometry rather than simple things like spheres, collisions between cloths, friction, consideration of the fiber structure of cloths, stability of simulations when taking large time steps, etc. Much research has been done to overcome it. If you are interested in physics engine, why not pursue it?
+ +
|
![]() |
|
+Figure 4.1: Rays extending from bright areas +
+LightLeak, LightStreak, or StarGlow, which stretches when a strong light is reflected, let's express this with a post effect. Here, for convenience, it is called StarGlow.
+This post-effect presented here was presented by Masaki Kawase at GDC 2003.
+The sample in this chapter is "Star Glow" from
https://github.com/IndieVisualLab/UnityGraphicsProgramming4
.
++Figure 4.2: Original image +
+
++Figure 4.3: Image of detecting only high-brightness pixels +
+First, let's create an image (brightness image) that detects only bright areas. The same process is required for general glow. The source code of the shader and script for creating a luminance image is as follows. Note that the shader path is 1.
+StarGlow.cs
+RenderTexture brightnessTex += RenderTexture.GetTemporary(source.width / this.divide, + source.height / this.divide, + source.depth, + source.format); +… +base.material.SetVector +(this.idParameter, new Vector3(threshold, intensity, attenuation)); + +Graphics.Blit(source, brightnessTex, base.material, 1); ++
StarGlow.shader
+#define BRIGHTNESS_THRESHOLD _Parameter.x
+#define INTENSITY _Parameter.y
+#define ATTENUATION _Parameter.z
+…
+fixed4 frag(v2f_img input) : SV_Target
+{
+ float4 color = tex2D(_MainTex, input.uv);
+ return max(color - BRIGHTNESS_THRESHOLD, 0) * INTENSITY;
+}
+
+There are various methods for calculating the brightness, but the calculation method used in the classical glow implementation was used as it is. I also see shaders that perform processing such as grayscale once and then comparing the brightness.
+BRIGHTNESS_THRESHOLDIs the threshold for determining brightness, and INTENSITYis the bias to multiply brightness. colorMake sure that the larger the value given to, that is, the brighter the color, the easier it is to return a large value. The higher the threshold, the less likely it is that a value greater than 0 will be returned. Also, the larger the bias, the stronger the brightness image can be obtained.
ATTENUATIONIs not used at this point. Since the overhead required for exchanging values between CPU and GPU is smaller if they are passed as parameters at once, Vector3they are passed together here .
The most important thing at this point is that we are getting the luminance image as a small RenderTexture.
+In general, the higher the resolution of a post effect, the greater the load on the Fragment shader, which increases the number of calls and calculations. Furthermore, with regard to the glow effect, the processing load becomes even greater due to repeated processing. Star Glow is no exception to this example. Therefore, the load is reduced by reducing the resolution of the effect to the required level.
+The iterative process will be described later.
+ +
++Figure 4.4: Diagonally stretched luminance image +
+Blur the brightness image obtained in STEP1 and stretch it. By devising this stretching method, it expresses a sharply extending ray that is different from general glow.
+In the case of general glow, it is stretched by the Gaussian function in all directions, but in the case of star glow, it is stretched in a directional manner.
+StarGlow.cs
+Vector2 offset = new Vector2(-1, -1);
+// (Quaternion.AngleAxis (angle * x + this.angleOfStreak,
+// Vector3.forward) * Vector2.down).normalized;
+
+base.material.SetVector(this.idOffset, offset);
+base.material.SetInt (this.idIteration, 1);
+
+Graphics.Blit(brightnessTex, blurredTex1, base.material, 2);
+
+for (int i = 2; i <= this.iteration; i++)
+{
+ Repeated drawing
+}
+
+It is different from the actual processing, but offset = (1, 1)let's just explain it here . Also note that we are passing offsetand iterationto the shader.
Next, on the script side, drawing is repeated with shader path 2, but for the sake of simplicity, let's move on to the shader once. Notice that we are drawing in shader path 2.
+StarGlow.shader
+int _Iteration;
+float2 _Offset;
+
+struct v2f_starglow
+{
+ …
+ half power : TEXCOORD1;
+ half2 offset : TEXCOORD2;
+};
+
+v2f_starglow vert(appdata_img v)
+{
+ v2f_starglow or;
+ …
+ o.power = pow(4, _Iteration - 1);
+ o.offset = _MainTex_TexelSize.xy * _Offset * o.power;
+ return o;
+}
+
+float4 frag(v2f_starglow input) : SV_Target
+{
+ half4 color = half4(0, 0, 0, 0);
+ half2 uv = input.uv;
+
+ for (int j = 0; j < 4; j++)
+ {
+ color += saturate(tex2D(_MainTex, uv)
+ * pow(ATTENUATION, input.power * j));
+ uv += input.offset;
+ }
+
+ return color;
+}
+
+First, check from the Vertex shader. Indicates powerthe force with which the brightness attenuates when stretched, offsetand indicates the direction in which the brightness is stretched by the blur. It will be referenced in the Fragment shader described later.
These are calculated within the Vertex shader to refer to common values within the Fragment shader. It is not good to calculate sequentially in the Fragment shader because it increases the number of operations.
+Here it _Iteration = 1is. Therefore power = 4^0 = 1. Then offset = 画素の大きさ * (1, 1)you will get.
Now you are ready to sample pixels that are offset by exactly one pixel.
+Next is the Fragment shader. To see uvwo offsetone by only one minute to move the reference 4 times while, we are adding up the value of the pixel. However, the pixel value is pow(ATTENUATION, input.power * j)multiplied by.
ATTENUATIONIs a value that indicates how much the value of that pixel is attenuated. It affects the degree of blurring and attenuation when stretched.
If ATTENUATION = 0.7so, the first pixel to be sampled would be * 0.7, and the second pixel to be sampled would be 0.7 ^ 2 = * 0.49. It is easy to get an image when you look at the figure.
++Figure 4.5: A diagram showing the process of blurring +
+The figure on the left is the original brightness image before attenuation. _MainTexCorresponds to. uvLet's say the pixel now given to the Fragment shader but referenced is START at the bottom left. offset = (1, 1)Therefore, the pixels referenced in the four iterations are up to END in the upper right.
The value in a pixel is the brightness value of that pixel. Three from START are 0s and only END is 1. The attenuation factor of the above source code increases with each iteration, so the image looks exactly like the one in the middle. When this is added up, the final value obtained for the START pixel is color = 0.34.
If the Fragment shader processes each pixel in the same way, you will see the result shown on the right. You get a gradation like a blur. Also offsetit is described in a certain and earlier in the parameter that indicates the direction to stretch. However, as the effect on the appearance, it will extend in the opposite direction to the specified value.
++Figure 4.6: Further stretched luminance image +
+Let's get back to the script a little bit. There is this.iterationno explanation so far, _Iterationbut it was said to be 1. Actually, the same process is repeated any number of times while replacing the RenderTexture.
StarGlow.cs
+Vector2 offset = new Vector2(-1, -1);
+
+base.material.SetVector(this.idOffset, offset);
+base.material.SetInt (this.idIteration, 1);
+
+Graphics.Blit(brightnessTex, blurredTex1, base.material, 2);
+
+== The explanation from here to above corresponds to the explanation ==
+
+for (int i = 2; i <= this.iteration; i++)
+{
+ base.material.SetInt(this.idIteration, i);
+
+ Graphics.Blit(blurredTex1, blurredTex2, base.material, 2);
+
+ RenderTexture temp = blurredTex1;
+ blurredTex1 = blurredTex2;
+ blurredTex2 = temp;
+}
+
+Since the same process is repeated using the same path, the effect obtained does not change. However, _Iterationthe value of the shader parameter will be higher, which will increase the attenuation in the shader described earlier. Also, the input image will be a blurred image that has already been stretched.
Simply put, this iteration blurredTex1results in a blurry image that is even more stretched than the first .
This process is costly, so in reality I think it will be repeated at most 3 times. Also, there are four iterations in the shader, but this value was suggested in Kawase's announcement.
+ +
++Figure 4.7: Luminance image stretched in another direction +
+I didn't comment on STEP2.5 in the source code, but I chose 2.5 for the sake of explanation. As mentioned earlier, let's rotate and blur again offset = (1, 1)to create a ray that extends in multiple directions offset.
Suppose offset = (1, 1)we define a ray that extends in the opposite direction offset = (-1, -1). In the actual source code, only the number of rays offsetis rotated, but for the sake of explanation offset = (-1, -1).
StarGlow.cs
+for (int x = 1; x <= this.numOfStreak; x++)
+{
+ Vector2 offset = Quaternion.AngleAxis(angle * x + this.angleOfStreak,
+ Vector3.forward) * Vector2.down;
+ offset = offset.normalized;
+
+ for (int i = 2; i <= this.iteration; i++) {
+ bluuredTex1 is stretched by iterative processing
+ }
+
+ Graphics.Blit(blurredTex1, compositeTex, base.material, 3);
+}
+
+The finally obtained blur image blurredTex1is compositeTexoutput to the image for compositing . compositeTexIs a composite image of all blur images that extend in multiple directions.
At this time, the shader path used to combine the blur images is 3.
+StarGlow.shader
+Blend OneMinusDstColor One
+…
+fixed4 frag(v2f_img input) : SV_Target
+{
+ return tex2D(_MainTex, input.uv);
+}
+
+No special processing is done in this path, but the Blendsyntax is used to synthesize the images. I think that the composition method may be remade depending on the production, but I decided here OneMinusDstColor One. This is a soft composition method.
++Figure 4.8: Final Blur image +
+After obtaining a blur image that extends in multiple directions, the blur image is combined with the original image and output in the same way as a general glow. You Blendcan use the same syntax as in STEP 2.5 above to synthesize and output, but here, Blitto reduce the number of times and to make the synthesis method flexible, Pass 4we use the one for synthesis. ..
StarGlow.cs
+base.material.EnableKeyword(StarGlow.CompositeTypes[this.compositeType]); +base.material.SetColor(this.idCompositeColor, this.color); +base.material.SetTexture(this.idCompositeTex, compositeTex); + +Graphics.Blit(source, destination, base.material, 4); ++
StarGlow.shader
+#pragma multi_compile _COMPOSITE_TYPE_ADDITIVE _COMPOSITE_TYPE_SCREEN …
+…
+fixed4 frag(v2f_img input) : SV_Target
+{
+ float4 mainColor = tex2D(_MainTex, input.uv);
+ float4 compositeColor = tex2D(_CompositeTex, input.uv);
+
+ #if defined(_COMPOSITE_TYPE_COLORED_ADDITIVE)…
+ || defined(_COMPOSITE_TYPE_COLORED_SCREEN)
+
+ compositeColor.rgb
+ = (compositeColor.r + compositeColor.g + compositeColor.b)
+ * 0.3333 * _CompositeColor;
+
+ #endif
+
+ #if defined(_COMPOSITE_TYPE_SCREEN)…
+ || defined(_COMPOSITE_TYPE_COLORED_SCREEN)
+
+ return saturate(mainColor + compositeColor
+ - saturate(mainColor * compositeColor));
+
+ #elif defined(_COMPOSITE_TYPE_ADDITIVE)…
+ || defined(_COMPOSITE_TYPE_COLORED_ADDITIVE)
+
+ return saturate(mainColor + compositeColor);
+
+ #else
+
+ return compositeColor;
+
+ #endif
+}
+
+BlendAlthough the syntax is not used, the screen composition and additive composition are reproduced as they are. Furthermore, here, by adding a color that is arbitrarily multiplied, it is possible to express a star glow with a strong color.
Release all the resources you have used. There is no special explanation, but just in case it is described in the sample on the source code. If the implementation environment is limited, it may be possible to reuse the reserved resources, but here we will simply release it.
+StarGlow.cs
+base.material.DisableKeyword(StarGlow.CompositeTypes[this.compositeType]); + +RenderTexture.ReleaseTemporary(brightnessTex); +RenderTexture.ReleaseTemporary(blurredTex1); +RenderTexture.ReleaseTemporary(blurredTex2); +RenderTexture.ReleaseTemporary(compositeTex); ++
I explained the basic (as announced by Mr. Kawase) implementation method of Star Glow, but if you are not particular about real-time performance, you can express various rays by switching the calculation method and parameters of the brightness image multiple times. And so on.
+Even within the range described here, if you change the parameters at the timing of the iteration, for example, you will be able to create heterogeneous, more "like" and "tasteful" rays. Or you can use noise to change the parameters over time.
+It is not a physically correct ray, and if you need a more dramatic and advanced expression of rays, it will be realized by a method other than the post effect, but this effect that can be made gorgeous with a relatively simple structure is also available. It's very interesting so please give it a try.
+… It's a little heavy.
+ +
|
![]() |
|
In this chapter, we will explain one of the methods of dividing polygons into triangles, the "ear clipping method", hereinafter referred to as the "ear clipping method". In addition to the usual simple polygon triangulation, we will also explain the triangular division of polygons with holes and polygons that have a hierarchical structure.
+The sample in this chapter is "TriangulationByEarClipping" from
https://github.com/IndieVisualLab/UnityGraphicsProgramming4
.
Run the sample DrawTest scene. Left-click on GameView to make a dot on the screen. Continue left-clicking on another point to connect it with the first point with a line. If you repeat it, you will get a polygon. When drawing lines, be careful not to cross the lines. Right-click to split the polygon into triangles to generate a mesh. If you generate a polygon in the generated mesh, you will get a polygon with holes.
+
++Figure 5.1: Screen of running the sample +
+A simple polygon is a closed polygon that does not intersect at its own line segment.
+
++Figure 5.2: Left: Simple polygon, Right: Non-simple polygon +
+Any simple polygon can be triangulated. Dividing a simple polygon with n vertices into triangles creates n-2 triangles.
+ +There are many methods for dividing polygons into triangles, but this time we will explain the "ear cutting method", which is simple to implement. The "ear-cutting method" is divided based on the theorem "Two ears the orem". This "Ear" refers to "a triangle whose two sides are polygonal sides and the remaining one side exists inside the polygon", and this theorem states that "four or more sides". A simple polygon without a hole with has at least two ears. "
+
++Figure 5.3: Ears +
+The "ear trimming method" is an algorithm that searches for this "ear" triangle and removes it from the polygon. This "ear cutting method" is simpler than other division algorithms, but it is slow, so I don't think it can be used very much in situations where speed is required.
+ +First, look for "ears" in the given array of polygon vertices. The conditions for "ears" are the following two points.
+
++Figure 5.4: Ear conditions (within 180 degrees, no other vertices in the triangle) +
+Add the vertex vi that meets the above conditions to the ear list. This is done by the InitializeVertices function in the sample Triangulation.cs. Then create the triangles that make up the ear from the top of the ear list and remove the vertex vi from the vertex array.
Removing the vertex vi changes the shape of the polygon. For the remaining vertices vi-1, vi + 1, perform the above ear judgment again. If vertices vi-1, vi + 1 meet the ear criteria, they will be added to the end of the ear list, but they may also be removed from the ear list. This process corresponds to the CheckVertex function and EarClipping function of the sample Triangulation.cs.
++Figure 5.5: Polygon before and after deleting vertex vi +
+Let's illustrate a series of flows using a simple polygon as an example.
+
++Figure 5.6: A simple polygon +
+First, look for your ears. In this case, the ear list contains vertices 0,1,4,6. Vertices 2 and 5 are excluded because they are not convex vertices, and vertices 3 are excluded because they are contained in triangles 2, 3 and 4.
First, take out the first vertex 0 of the ear list. Make a triangle with vertices 1 and 6 before and after vertex 0. Remove vertex 0 from the vertex array and connect the previous and next vertices 1 and 6 to form a new polygon. Then, the ears are judged for vertices 1 and 6. Originally both were ears, but they remain ears even after the ear judgment. The ear list at this time is 1,4,6.
++Figure 5.7: Polygon with 0 vertices removed +
+Then take vertex 1 from the beginning of the ear list. Make a triangle with vertices 2 and 6 before and after vertex 1. Remove vertex 1 from the vertex array and connect the previous and next vertices 2 and 6 to form a new polygon. Then, the ears are judged for vertices 2 and 6. Since the vertex 1 is gone, the vertex 2 becomes a convex vertex and the ear condition is satisfied, so add it to the ear list. Vertex 6 remains in the ear. The ear list at this time is 4,6,2.
+
++Figure 5.8: Polygon with vertex 1 removed +
+Then take vertex 4 from the top of the ear list. Make a triangle with vertices 3 and 5 before and after vertex 4. Remove vertex 4 from the vertex array and connect the previous and next vertices 3 and 5 to form a new polygon. Then, the ears are judged for vertices 3 and 5. With the disappearance of vertex 4, the triangle created by vertices 2 and 5 before and after vertex 3 no longer contains other vertices, so add vertex 3 to the ear list. Also, since the internal angle of vertex 5 is 180 degrees or less, it becomes a convex vertex and the ear condition is satisfied, so add it to the ear list. The ear list at this time is 6,2,3,5.
+
++Figure 5.9: Polygon with vertex 4 removed +
+Then take vertex 6 from the top of the ear list. Make a triangle with vertices 2 and 5 before and after vertex 6. Remove vertex 6 from the vertex array and connect the previous and next vertices 2 and 5 to form a new polygon. Then, the ears are judged for vertices 2 and 5. Originally both were ears, but they remain ears even after the ear judgment. The ear list at this time is 2,3,5.
+
++Figure 5.10: Polygon with vertex 6 removed +
+Next, I took out vertex 2 from the top of the ear list ... I thought, but since there are only 3 vertices of the polygon left, I made a triangle as it is and the triangle division is finished. The final result of the triangle split is:
+
++Figure 5.11: Triangular division result +
+Next, I will explain the triangular division of a polygon with holes. Originally, the "ear cutting method" cannot be applied to polygons with holes, but if you make a notch in the outer polygon and connect it to the inner polygon as shown in the figure, the inner polygon will be the outer polygon. It will be part of the and you will be able to apply the ear-cutting method. This method is also possible for polygons with multiple holes.
+
++Figure 5.12: Joining inner and outer polygons (figure is a fairly exaggerated representation) +
+As a premise, the order of the vertices of the outer and inner polygons must be reversed. For example, if the outer polygon has vertices aligned clockwise, the inner polygon must align counterclockwise. The flow of joining is explained using the following polygon as an example.
+
++Figure 5.13: Polygon with holes +
+1. If there are multiple holes (inner polygons), look for the polygon with the largest X coordinate (on the right) and its vertices among the inner polygons.
+
++Figure 5.14: Vertex with the highest X coordinate +
+2. Let M be the vertex with the largest X coordinate. Draw a straight line from M to the right.
+
++Figure 5.15: Draw a line to the right from vertex M +
+3. Find the edge and intersection I of the outer polygon that intersects the line extending to the right from vertex M. If it intersects multiple sides, select the side of the intersection closest to vertex M.
+
++Figure 5.16: Vertex M and intersection I +
+4. Select the vertex P with the largest X coordinate among the vertices of the intersecting sides. Check if the triangle connecting the vertices M, I, P contains other vertices.
+
++Figure 5.17: Triangle M, I, P +
+5. If the triangles M, I, P do not contain other vertices, it can be divided, so connect the vertices P of the outer polygon to the vertices M of the inner polygon, and turn the inner polygon counterclockwise. I will go around. When connecting from M to the vertex P of the outer polygon again, the vertex M and the vertex P are duplicated to make another vertex (vertex M', P'). By separating the incoming line and the outgoing line, the lines seem to overlap, but the order of the vertices is a simple polygon that does not intersect.
+
++Figure 5.18: A diagram connecting the outer polygon and the inner polygon +
+6. If the triangles M, I, P contain other vertices R, select that vertex R, but if multiple vertices are included, line segments M, I and line segment M Select the vertex R with the smallest angle θ formed by, R, and perform processing 5.
+
++Figure 5.19: Vertex R with the smallest angle θ formed by line segments MI and MR +
+7. Go back to 1 and join with the other inner polygons.
+ +Next, I will explain the triangular division of a nested polygon. Since the process of joining polygons with holes and the process of dividing triangles were explained in the previous section, here we will mainly explain the procedure for building a tree of parent-child relationships of polygons.
+Let's take the following set of polygons as an example.
+
++Figure 5.20: Nested polygons +
+When you create a polygonal parent-child relationship, you get the following tree.
+
++Figure 5.21: Left: Nested polygon Right: Parent-child relationship +
+Extract polygon 1 at the top of the tree (excluding dummies) and polygons 2 and 4 of its children.
+
++Figure 5.22: Extract polygons 1, 2 and 4 +
+Join polygons 1, 2 and 4 in order from the right.
+
++Figure 5.23: Joining polygons 1, 2 and 4 +
+Divide the combined polygon into triangles.
+
++Figure 5.24: Triangulation of combined polygons +
+Remove the polygon that is divided into triangles. The rest of the parent-child relationship tree is 3 and 5. First, take it out from 3.
+
++Figure 5.25: Polygon 3 +
+Since 3 has no children, it is divided into triangles as it is.
+
++Figure 5.26: Polygon 3 divided into triangles +
+Remove the polygon that is divided into triangles. There are only 5 left in the parent-child relationship tree. 5 is a triangle and has no children, so it ends as it is. This completes the triangular division of the nested polygon.
+
++Figure 5.27: Last polygon 5 +
+Let's move on to the explanation of the sample source code that implements all three algorithms explained so far.
+ +First, define a Polygon class that manages an array of polygon vertices. The Polygon class holds information such as the array of vertex coordinates and the direction of the loop, and determines whether a polygon is included in the polygon.
+Polygon.cs
+public class Polygon
+{
+ // Loop direction
+ public enum LoopType
+ {
+ CW, // clockwise
+ CCW, // counterclockwise
+ ERR, // Indefinite (no orientation)
+ }
+
+ public Vector3 [] vertices; // Vertex array
+ public LoopType loopType; // Loop direction
+
+ // ~ omitted ~
+}
+
+This is a Triangulation class that actually divides a polygon into triangles. The main is the Triangulate function of the Triangulation class.
+ +Data structure definition in Triangulation.cs
+// Vertex array +List<Vector3> vertices = new List<Vector3>(); + +// List of vertex numbers (let's end and start connected) +LinkedList<int> indices = new LinkedList<int>(); + +// Ear vertex list +List<int> earTipList = new List<int>(); ++
It defines vertices that stores the array of vertex coordinates of the polygon to be processed, indices that stores the number (index) of the vertices of the polygon, and earTipList that stores the ears. Since indices need to refer to the vertices before and after, we use LinkedList, which has the property of a bidirectional list.
+ +First, if you are given an array of vertices that make up a polygon from the outside, store it in the list as a Polygon class.
+Polygon list
+// Polygon list
+List<Polygon> polygonList = new List<Polygon>();
+
+public void AddPolygon(Polygon polygon)
+{
+ polygonList.Add(polygon);
+}
+
+At the beginning of the Triangulate function, sort the Polygon list with polygonal data added in descending order of area of the rectangular area.
+Sorted part of Polygon list
+// Sort in descending order of area of rectangular area in polygon list +polygonList.Sort((a, b) => Mathf.FloorToInt( + (b.rect.width * b.rect.height) - (a.rect.width * a.rect.height) + )); ++
Next, we will pack the sorted Polygon list into the TreeNode class that creates the tree structure.
+The part that packs the Polygon list into a TreeNode
+// Create route (empty)
+polygonTree = new TreeNode<Polygon>();
+
+// Create a polygonal hierarchy
+foreach (Polygon polygon in polygonList)
+{
+ TreeNode<Polygon> tree = polygonTree;
+
+ CheckInPolygonTree(tree, polygon, 1);
+}
+
+The TreeNode looks like this: I think it's a common tree structure, but for an empty top-level node, it defines a flag isValue for the existence of its contents.
+TreeNode.cs
+public class TreeNode<T>
+{
+ public TreeNode<T> parent = null;
+ public List<TreeNode<T>> children = new List<TreeNode<T>>();
+
+ public T Value;
+ public bool isValue = false;
+
+ public TreeNode(T val)
+ {
+ Value = val;
+ isValue = true;
+ }
+
+ public TreeNode()
+ {
+ isValue = false;
+ }
+
+ public void AddChild(T val)
+ {
+ AddChild(new TreeNode<T>(val));
+ }
+
+ public void AddChild(TreeNode<T> tree)
+ {
+ children.Add(tree);
+ tree.parent = this;
+ }
+
+ public void RemoveChild(TreeNode<T> tree)
+ {
+ if (children.Contains(tree))
+ {
+ children.Remove(tree);
+ tree.parent = null;
+ }
+ }
+}
+
+Returning to Triangulation.cs, it is the contents of the CheckInPolygonTree function that creates a hierarchical structure of polygons. It checks whether the passed polygon fits inside its own polygon, and recursively determines whether it fits inside its own children. Makes the passed polygon its own child if it is included in itself but not in its children, or if there are no children.
+CheckInPolygonTree関数
+bool CheckInPolygonTree(TreeNode<Polygon> tree, Polygon polygon, int lv)
+{
+ // Does it have a polygon?
+ bool isInChild = false;
+ if (tree.isValue)
+ {
+ if (tree.Value.IsPointInPolygon(polygon))
+ {
+ // If it is included in itself, search if it is also included in the child
+ for(int i = 0; i < tree.children.Count; i++)
+ {
+ isInChild |= CheckInPolygonTree(
+ tree.children[i], polygon, lv + 1);
+ }
+
+ // Make it your own child if it is not included in the child
+ if (!isInChild)
+ {
+ // Invert the order of the vertices if necessary
+ // CW because it is Inner when even nesting
+ // CCW because it is Outer when odd nesting
+ if (
+ ((lv % 2 == 0) &&
+ (polygon.loopType == Polygon.LoopType.CW)) ||
+ ((lv % 2 == 1) &&
+ (polygon.loopType == Polygon.LoopType.CCW))
+ )
+ {
+ polygon.ReverseIndices();
+ }
+
+ tree.children.Add(new TreeNode<Polygon>(polygon));
+ return true;
+ }
+ }
+ else
+ {
+ // not included
+ return false;
+ }
+ }
+ else
+ {
+ // Search only for children if they have no value
+ for (int i = 0; i < tree.children.Count; i++)
+ {
+ isInChild |= CheckInPolygonTree(
+ tree.children[i], polygon, lv + 1);
+ }
+
+ // Make it your own child if it is not included in the child
+ if (!isInChild)
+ {
+ // Invert the order of the vertices if necessary
+ // CW because it is Inner when even nesting
+ // CCW because it is Outer when odd nesting
+ if (
+ ((lv % 2 == 0) &&
+ (polygon.loopType == Polygon.LoopType.CW)) ||
+ ((lv % 2 == 1) &&
+ (polygon.loopType == Polygon.LoopType.CCW))
+ )
+ {
+ polygon.ReverseIndices();
+ }
+ tree.children.Add(new TreeNode<Polygon>(polygon));
+ return true;
+ }
+ }
+
+ return isInChild;
+}
+
+If there are multiple inner polygons, select the vertex with the largest X coordinate among the inner polygons and that polygon. At that time, define a class that collects the X coordinate, vertex number, and polygon number information for judgment.
+XMaxData structure
+/// <summary>
+/// X coordinate maximum value and polygon information
+/// </summary>
+struct XMaxData
+{
+ public float xmax; // x coordinate maximum value
+ public int no; // Polygon number
+ public int index; // vertex number of xmax
+
+ public XMaxData(float x, int n, int ind)
+ {
+ xmax = x;
+ no = n;
+ index = ind;
+ }
+}
+
+Next, the actual joining process is divided into two processes: sorting multiple polygons in descending order of X coordinate, and joining. The first is the process of sorting multiple polygons in descending order of X coordinate.
+CombineOuterAndInners関数
+Vector3[] CombineOuterAndInners(Vector3[] outer, List<Polygon> inners)
+{
+ List<XMaxData> pairs = new List<XMaxData>();
+
+ // Find the inner polygon with the vertex with the largest X coordinate
+ for (int i = 0; i < inners.Count; i++)
+ {
+ float xmax = inners[i].vertices[0].x;
+ int len = inners[i].vertices.Length;
+ int xmaxIndex = 0;
+ for (int j = 1; j < len; j++)
+ {
+ float x = inners[i].vertices[j].x;
+ if(x > xmax)
+ {
+ xmax = x;
+ xmaxIndex = j;
+ }
+ }
+ pairs.Add(new XMaxData(xmax, i, xmaxIndex));
+ }
+
+ // Sort to the right (in descending order of xmax)
+ pairs.Sort((a, b) => Mathf.FloorToInt(b.xmax - a.xmax));
+
+ // Combine from right
+ for (int i = 0; i < pairs.Count; i++)
+ {
+ outer = CombinePolygon(outer, inners[pairs[i].no], pairs[i].index);
+ }
+
+ return outer;
+}
+
+Next is the join processing part. In the CombinePolygon function, draw a horizontal line from the vertex M with the largest X coordinate of the inner polygon and find the line segment of the outer polygon that intersects that line.
+Early stage of CombinePolygon function
+Vector3[] CombinePolygon(Vector3[] outer, Polygon inner, int xmaxIndex)
+{
+ Vector3 M = inner.vertices[xmaxIndex];
+
+ // Find the intersection
+ Vector3 intersectionPoint = Vector3.zero;
+ int index0 = 0;
+ int index1 = 0;
+
+ if (GeomUtil.GetIntersectionPoint(M,
+ new Vector3(maxX + 0.1f, M.y, M.z),
+ outer, ref intersectionPoint,
+ ref index0, ref index1))
+ {
+ ~ Omitted ~
+
+GeometryUtil.GetIntersectionPoint, a function that finds the intersection of line segments M and I and the line segment of the outer polygon, is as follows. The point is that the outer polygon is clockwise, so we only look for those whose starting point is above the line segments M and I and whose end point is below. Doing so will prevent the vertices from getting out of order if you select a line segment that connects the outer polygon to the inner polygon in the already joined inner and outer polygons.
+GetIntersectionPoint function
+public static bool GetIntersectionPoint(Vector3 start, Vector3 end,
+ Vector3[] vertices,
+ ref Vector3 intersectionP,
+ ref int index0, ref int index1)
+{
+ float distanceMin = float.MaxValue;
+ bool isHit = false;
+
+ for(int i = 0; i < vertices.Length; i++)
+ {
+ int index = i;
+ int next = (i + 1)% vertices.Length; // Next vertex
+
+ Vector3 iP = Vector3.zero;
+ Vector3 vstart = vertices[index];
+ Vector3 vend = vertices[next];
+
+ // The starting point of the intersecting polygonal line segment must be at least the line segment M, I
+ Vector3 diff0 = vstart - start;
+ if (diff0.y < 0f)
+ {
+ continue;
+ }
+
+ // The end point of the intersecting polygonal line segment is below the line segment M, I
+ Vector3 diff1 = vend - start;
+ if (diff1.y > 0f)
+ {
+ continue;
+ }
+
+ if (IsIntersectLine(start, end, vstart, vend, ref iP))
+ {
+ float distance = Vector3.Distance(start, iP);
+
+ if (distanceMin >= distance)
+ {
+ distanceMin = distance;
+ index0 = index;
+ index1 = next;
+ intersectionP = iP;
+ isHit = true;
+ }
+ }
+
+ }
+
+ return isHit;
+}
+
+After finding the intersection, check if the triangle created from the vertex with the largest X coordinate of the intersecting line segment, vertex M, and intersection I contains other vertices. To determine if a triangle contains vertices, a two-dimensional cross product is used to determine which side of the triangle's line segment the vertices are on. If the vertices are to the right of all lines, they are inside the triangle.
+IsTriangle and CheckLine functions in GeometryUtil.cs
+/// <summary>
+/// Returns the positional relationship between the line and the vertex
+/// </summary>
+/// <param name="o"></param>
+/// <param name="p1"></param>
+/// <param name="p2"></param>
+/// <returns> +1: Right of line -1: Left of line 0: On line </ returns>
+public static int CheckLine(Vector3 o, Vector3 p1, Vector3 p2)
+{
+ double x0 = o.x - p1.x;
+ double y0 = o.y - p1.y;
+ double x1 = p2.x - p1.x;
+ double y1 = p2.y - p1.y;
+
+ double x0y1 = x0 * y1;
+ double x1y0 = x1 * y0;
+ double det = x0y1 - x1y0;
+
+ return (it> 0D? +1: (it <0D? -1: 0));
+}
+
+/// <summary>
+/// Triangle (clockwise) and point inside / outside judgment
+/// </summary>
+/// <param name="o"></param>
+/// <param name="p1"></param>
+/// <param name="p2"></param>
+/// <param name="p3"></param>
+/// <returns> +1: outside -1: inside 0: online</returns>
+public static int IsInTriangle(Vector3 o,
+ Vector3 p1,
+ Vector3 p2,
+ Vector3 p3)
+{
+ int sign1 = CheckLine(o, p2, p3);
+ if (sign1 < 0)
+ {
+ return +1;
+ }
+
+ int sign2 = CheckLine(o, p3, p1);
+ if (sign2 < 0)
+ {
+ return +1;
+ }
+
+ int sign3 = CheckLine(o, p1, p2);
+ if (sign3 < 0)
+ {
+ return +1;
+ }
+
+ return (((sign1 != 0) && (sign2 != 0) && (sign3 != 0)) ? -1 : 0);
+}
+
+Now, the continuation of Combine Polygon. After finding the intersection, it is judged whether there are other vertices in the triangle, but the direction of the connection of the triangle is clockwise because the inside and outside judgment is made using the outer product.
+Middle 1 of CombinePolygon function
+if (GeomUtil.GetIntersectionPoint(M,
+ new Vector3(maxX + 0.1f, M.y, M.z), outer,
+ ref intersectionPoint, ref index0, ref index1))
+{
+ // Intersection found
+
+ // Get the rightmost vertex of the intersecting line segment
+ int pindex;
+ Vector3[] triangle = new Vector3[3];
+ if (outer[index0].x > outer[index1].x)
+ {
+ pindex = index0;
+ // The triangle will be reversed depending on the vertex of the selected line segment, so adjust it so that it is clockwise.
+ triangle[0] = M;
+ triangle[1] = outer[pindex];
+ triangle[2] = intersectionPoint;
+ }
+ else
+ {
+ pindex = index1;
+ triangle[0] = M;
+ triangle[1] = intersectionPoint;
+ triangle[2] = outer[pindex];
+ }
+
+If the intersection I and the vertex with the largest X coordinate of the line segment are the same, there is nothing to block from the vertex M, so it is not checked whether the triangle contains other vertices. If they are not the same, it is checked whether other vertices are included, but since the vertices included in the triangle are recessed vertices, the inclusion judgment is performed while satisfying that condition. If the triangle contains multiple vertices, the vertex with the smallest angle between the line segments M and I and the line segment M and the corresponding vertex is selected and stored in the finalIndex.
+Middle 2 of CombinePolygon function
+Vector3 P = outer[pindex];
+
+int finalIndex = pindex;
+
+// If the intersection and P are the same, there is nothing to block, so do not check the triangle
+if((Vector3.Distance(intersectionPoint, P) > float.Epsilon))
+{
+ float angleMin = 361f;
+
+ for(int i = 0; i < outer.Length; i++)
+ {
+
+ // Convex vertex / Reflective vertex check
+ int prevIndex = (i == 0)? outer.Length --1: i --1; // Previous vertex
+ int nextIndex = (i + 1)% outer.Length; // Next vertex
+ int nowIndex = i;
+
+ if (nowIndex == pindex) continue;
+
+ Vector3 outerP = outer[nowIndex];
+
+ if (outerP.x < M.x) continue;
+
+ // Ignore if the coordinates are the same duplicated at the time of division
+ if (Vector3.Distance(outerP, P) <= float.Epsilon) continue;
+
+ Vector3 prevVertex = outer[prevIndex];
+ Vector3 nextVertex = outer[nextIndex];
+ Vector3 nowVertex = outer[nowIndex];
+
+ // Is it a reflection vertex?
+ bool isReflex =! GeomUtil.IsAngleLessPI (nowVertex,
+ prevVertex,
+ nextVertex);
+
+ // Does the triangle contain "reflection vertices"?
+ if ((GeomUtil.IsInTriangle(outerP,
+ triangle[0],
+ triangle[1],
+ triangle[2]) <= 0)&&(isReflex))
+ {
+ // Invisible because the vertices are included in the triangle
+
+ // Find the angle between the M, I and M, outerP line segments (select the vertex with the shallowest angle)
+ float angle = Vector3.Angle(intersectionPoint - M, outerP - M);
+ if (angle < angleMin)
+ {
+ angleMin = angle;
+ finalIndex = nowIndex;
+ }
+ }
+ }
+}
+
+After finding the vertices (finalIndex) to join, join the vertex arrays of the inner and outer polygons.
+Second half of Combine Polygon
+Vector3 FinalP = outer[finalIndex];
+
+// Join (create a new polygon)
+List<Vector3> newOuterVertices = new List<Vector3>();
+
+// Add up to Index that divides outer
+for (int i = 0; i <= finalIndex; i++)
+{
+ newOuterVertices.Add(outer[i]);
+}
+
+// Add all inner
+for (int i = xmaxIndex; i < inner.vertices.Length; i++)
+{
+ newOuterVertices.Add(inner.vertices[i]);
+}
+for (int i = 0; i < xmaxIndex; i++)
+{
+ newOuterVertices.Add(inner.vertices[i]);
+}
+
+// Increase two vertices to split
+newOuterVertices.Add(M);
+newOuterVertices.Add(FinalP);
+
+// Add the index of the remaining outer
+for (int i = finalIndex + 1; i < outer.Length; i++)
+{
+ newOuterVertices.Add(outer[i]);
+}
+
+outer = newOuterVertices.ToArray();
+
+When the inner and outer polygons become one polygon, it is finally divided into triangles. First, initialize the index array of vertices and create an ear list.
+InitializeVertices function
+/// <summary>
+/// Initialization
+/// </summary>
+void InitializeVertices(Vector3[] points)
+{
+ vertices.Clear();
+ indices.Clear();
+ earTipList.Clear();
+
+ // Create index array
+ resultTriangulationOffset = resultVertices.Count;
+ for (int i = 0; i < points.Length; i++)
+ {
+ Vector3 nowVertex = points[i];
+ vertices.Add(nowVertex);
+
+ indices.AddLast(i);
+
+ resultVertices.Add(nowVertex);
+ }
+
+ // Search for convex triangles and ears
+ LinkedListNode<int> node = indices.First;
+ while (node != null)
+ {
+ CheckVertex(node);
+ node = node.Next;
+ }
+}
+
+The CheckVertex function that determines if a vertex is an ear looks like this:
+CheckVertex function
+void CheckVertex(LinkedListNode<int> node)
+{
+ // Convex vertex / Reflective vertex check
+ int prevIndex = (node.Previous == null) ?
+ indices.Last.Value :
+ node.Previous.Value; // Previous vertex
+ int nextIndex = (node.Next == null) ?
+ indices.First.Value :
+ node.Next.Value; // Next vertex
+ int nowIndex = node.Value;
+
+ Vector3 prevVertex = vertices[prevIndex];
+ Vector3 nextVertex = vertices[nextIndex];
+ Vector3 nowVertex = vertices[nowIndex];
+
+ bool isEar = false;
+
+ // Is the internal angle within 180 degrees?
+ if (GeomUtil.IsAngleLessPI(nowVertex, prevVertex, nextVertex))
+ {
+ // Ear check
+ // Within 180 degrees, the triangle does not contain other vertices
+ isEar = true;
+ foreach(int i in indices)
+ {
+ if ((i == prevIndex) || (i == nowIndex) || (i == nextIndex))
+ continue;
+
+ Vector3 p = vertices[i];
+
+ // Ignore if the coordinates are the same duplicated at the time of division
+ if (Vector3.Distance(p, prevVertex) <= float.Epsilon) continue;
+ if (Vector3.Distance(p, nowVertex) <= float.Epsilon) continue;
+ if (Vector3.Distance(p, nextVertex) <= float.Epsilon) continue;
+
+ if(GeomUtil.IsInTriangle(p,
+ prevVertex,
+ nowVertex,
+ nextVertex) <= 0)
+ {
+ isEar = false;
+ break;
+ }
+
+ }
+ if (isEar)
+ {
+ if (!earTipList.Contains(nowIndex))
+ {
+ // Add ears
+ earTipList.Add(nowIndex);
+ }
+ }
+ else
+ {
+ // Exclude if it is no longer an ear when it is already an ear
+ if (earTipList.Contains(nowIndex))
+ {
+ // Ear removal
+ earTipList.Remove(nowIndex);
+ }
+ }
+
+ }
+
+}
+
+The actual triangulation is done in the following EarClipping function. As mentioned above, the vertices are taken out from the top of the ear list and the triangle connected to the front and back vertices is output. Then, the procedure of deleting the vertices of the ear from the vertex index array and determining whether the vertices before and after are the ears is repeated.
+EarClipping function
+void EarClipping()
+{
+ int triangleIndex = 0;
+
+ while (earTipList.Count > 0)
+ {
+ int nowIndex = earTipList [0]; // Extract top
+
+ LinkedListNode<int> indexNode = indices.Find(nowIndex);
+ if (indexNode != null)
+ {
+ int prevIndex = (indexNode.Previous == null) ?
+ indices.Last.Value :
+ indexNode.Previous.Value; // Previous vertex
+ int nextIndex = (indexNode.Next == null) ?
+ indices.First.Value :
+ indexNode.Next.Value; // Next vertex
+
+ Vector3 prevVertex = vertices[prevIndex];
+ Vector3 nextVertex = vertices[nextIndex];
+ Vector3 nowVertex = vertices[nowIndex];
+
+ // Triangle creation
+ triangles.Add(new Triangle(
+ prevVertex,
+ nowVertex,
+ nextVertex, "(" + triangleIndex + ")"));
+
+ resultTriangulation.Add(resultTriangulationOffset + prevIndex);
+ resultTriangulation.Add(resultTriangulationOffset + nowIndex);
+ resultTriangulation.Add(resultTriangulationOffset + nextIndex);
+
+ triangleIndex++;
+
+ if (indices.Count == 3)
+ {
+ // End because it is the last triangle
+ break;
+ }
+
+ // Delete ear vertices
+ earTipList.RemoveAt (0); // Remove top
+ indices.Remove(nowIndex);
+
+ // Check the vertices before and after
+ int[] bothlist = { prevIndex, nextIndex };
+ for (int i = 0; i < bothlist.Length; i++)
+ {
+ LinkedListNode<int> node = indices.Find(bothlist[i]);
+ CheckVertex(node);
+ }
+ }
+ else
+ {
+ Debug.LogError("index now found");
+ break;
+ }
+ }
+
+ // UV calculation
+ for (int i = 0; i < vertices.Count; i++)
+ {
+ Vector2 uv2 = CalcUV(vertices[i], uvRect);
+ resultUVs.Add(uv2);
+ }
+}
+
+Make the result of triangle division into Mesh. In the EarClipping function, we prepare the necessary vertex array and index array (resultVertices and resultTriangulation) and pour them into Mesh.
+MakeMesh function
+void MakeMesh()
+{
+ mesh = new Mesh();
+ mesh.vertices = resultVertices.ToArray();
+ mesh.SetIndices(resultTriangulation.ToArray(),
+ MeshTopology.Triangles, 0);
+ mesh.RecalculateNormals();
+ mesh.SetUVs(0, resultUVs);
+
+ mesh.RecalculateBounds();
+
+ MeshFilter filter = GetComponent<MeshFilter>();
+ if(filter != null)
+ {
+ filter.mesh = mesh;
+ }
+}
+
+By the way, I also set the UV coordinates. UV coordinates are assigned within the rectangular area of the polygon.
+CalcUV
+Vector2 CalcUV(Vector3 vertex, Rect uvRect)
+{
+ float u = (vertex.x - uvRect.x) / uvRect.width;
+ float v = (vertex.y - uvRect.y) / uvRect.height;
+
+ return new Vector2(u, v);
+}
+
+I have explained the polygonal triangle division by the ear cutting method. It is important to use it, but if you draw a figure with the mouse, it will become a mesh in real time, you can make the outline data of the font a mesh, etc., but since it is not such a fast algorithm, if the number of vertices increases, it will be speedy. There will be problems (calculated in order by the CPU), but I find it interesting that complex polygons can be divided into triangles with a simple algorithm.
+ +https://www.geometrictools.com/Documentation/TriangulationByEarClipping.pdf
+
|
![]() |
|
In this chapter, we will explain the function called "Tessellation" that divides polygons on the GPU and how to displace the divided vertices by Displacement map.
The sample in this chapter is "Tessellation" from
https://github.com/IndieVisualLab/UnityGraphicsProgramming4
.
Tessellation is a function that divides polygons on the GPU, which is installed as standard in rendering pipelines such as DirectX, OpenGL, and Metal.
Normally, vertices, normals, tangents, UV information, etc. are transferred from the CPU to the GPU and flow to the rendering pipeline, but when processing high polygons, the transfer band between the CPU and GPU is overloaded, and the drawing speed bottle It will be a neck.
Since Tessellation provides the function to divide the mesh on the GPU, it is possible to process polygons that have been reduced to some extent on the CPU, subdivide them on the GPU, and restore them to fine displacement by Displacement map lookup. Will be.
In this book, I will mainly explain the Tessellation function in Unity.
Tessellation adds three stages to the drawing pipeline: "Hull Shader", "Tessellation", and "Domain Shader". Three stages will be added, but there are only two programmable stages, "Hull Shader" and "Domain Shader".
+
++Figure 6.1: Tessellation pipeline Source: Microsoft +
+[*1] https://docs.microsoft.com/en-us/windows/desktop/direct3d11/direct3d-11-advanced-stages-tessellation
Understanding the details of each stage here and implementing Hull Shader and Domain Shader is one way to deepen your understanding of Tessellation, but in Unity, Wrapper, which is very convenient, is Surface Shader. It is available in a form that can be incorporated into.
First, let's perform Tessellation and Displacement based on this Surface Shader.
I will explain about Tessellation supported by Surface Shader with comments in the comments.
+TessellationSurface.Shader
+Shader "Custom/TessellationDisplacement"
+{
+ Properties
+ {
+ _EdgeLength ("Edge length", Range(2,50)) = 15
+ _MainTex ("Base (RGB)", 2D) = "white" {}
+ _DispTex ("Disp Texture", 2D) = "black" {}
+ _NormalMap ("Normalmap", 2D) = "bump" {}
+ _Displacement ("Displacement", Range(0, 1.0)) = 0.3
+ _Color ("Color", color) = (1,1,1,0)
+ _SpecColor ("Spec color", color) = (0.5,0.5,0.5,0.5)
+ _Specular ("Specular", Range(0, 1) ) = 0
+ _Gloss ("Gloss", Range(0, 1) ) = 0
+ }
+ SubShader
+ {
+ Tags { "RenderType"="Opaque" }
+ LOD 300
+
+ CGPROGRAM
+
+ // tessellate: Specify a function that defines the number of patch divisions and method as tessEdge
+ // As vertex: disp, specify disp for the function that performs displacement.
+ // Called inside the Domain Shader inside the Wrapper
+ #pragma surface surf BlinnPhong addshadow fullforwardshadows
+ vertex:disp tessellate:tessEdge nolightmap
+ #pragma target 4.6
+ #include "Tessellation.cginc"
+
+ struct appdata
+ {
+ float4 vertex : POSITION;
+ float4 tangent : TANGENT;
+ float3 normal: NORMAL;
+ float2 texcoord : TEXCOORD0;
+ };
+
+ sampler2D _DispTex;
+ float _Displacement;
+ float _EdgeLength;
+ float _Specular;
+ float _Gloss;
+
+ // A function that specifies the number of divisions and the division method
+ // This function is called per patch, not per vertex
+ // Specify the number of edge divisions of the patch consisting of 3 vertices in xyz,
+ // Specify the number of divisions inside the patch in w and return it
+ float4 tessEdge (appdata v0, appdata v1, appdata v2)
+ {
+ //Tessellation.cginc has a function that defines three types of splitting methods
+
+ // Tessellation according to the distance from the camera
+ //UnityDistanceBasedTess
+
+ // Tessellation according to the edge length of the mesh
+ //UnityEdgeLengthBasedTess
+
+ // Culling function in UnityEdgeLengthBasedTess function
+ //UnityEdgeLengthBasedTessCull
+
+ return UnityEdgeLengthBasedTessCull(
+ v0.vertex, v1.vertex, v2.vertex,
+ _EdgeLength, _Displacement * 1.5f
+ );
+ }
+
+ // This is the disp function specified in the Displacement processing function.
+ // This function is in Wrapper after Tessellator
+ // Called in the Domain Shader.
+ // All the elements defined in appdata in this function are accessible, so
+ // Displacement and other processing such as vertex modulation are performed here.
+ void disp (inout appdata v)
+ {
+ // Here, we are performing vertex modulation in the normal direction using the Displacement map.
+ float d = tex2Dlod(
+ _DispTex,
+ float4(v.texcoord.xy,0,0)
+ ).r * _Displacement;
+ v.vertex.xyz += v.normal * d;
+ }
+
+ struct Input
+ {
+ float2 uv_MainTex;
+ };
+
+ sampler2D _MainTex;
+ sampler2D _NormalMap;
+ fixed4 _Color;
+
+ void surf (Input IN, inout SurfaceOutput o)
+ {
+ half4 c = tex2D (_MainTex, IN.uv_MainTex) * _Color;
+ o.Albedo = c.rgb;
+ o.Specular = _Specular;
+ o.Gloss = _Gloss;
+ o.Normal = UnpackNormal(tex2D(_NormalMap, IN.uv_MainTex));
+ }
+ ENDCG
+ }
+ FallBack "Diffuse"
+}
+
+Displacement processing using Surface Shader is realized with the above Shader. You can get great benefits with a very cheap implementation.
+ +Implementation when writing each Tessellation stage in Vertex / Fragment Shader.
+ +The Hull Shader is a programmable stage, called immediately after the Vertex Shader. Here, we mainly define "division method" and "how many divisions".
The Hull Shader consists of two functions, a "control point function" and a "patch constant function", which are processed in parallel by the GPU. The control point is the control point of the division source, and the patch has the topology to divide. For example, if you want to form a patch for each triangular polygon and divide it with a Tessellator, there are 3 control points and 1 patch.
The control point function works per control point, and the patch constant function works per patch.
Tessellation.Shader
+#pragma hull hull_shader
+
+// Structure used as input of hull shader system
+struct InternalTessInterp_appdata
+{
+ float4 vertex : INTERNALTESSPOS;
+ float4 tangent : TANGENT;
+ float3 normal: NORMAL;
+ float2 texcoord : TEXCOORD0;
+};
+
+// Tessellation coefficient structure defined and returned by the patch constant function
+struct TessellationFactors
+{
+ float edge[3] : SV_TessFactor;
+ float inside : SV_InsideTessFactor;
+};
+
+// hull constant shader (patch constant function)
+TessellationFactors hull_const (InputPatch<InternalTessInterp_appdata, 3> v)
+{
+ TessellationFactors o;
+ float4 tf;
+
+ // Split Utility function explained in the comment at the time of Tessellation on Surface shader
+ tf = UnityEdgeLengthBasedTessCull(
+ v[0].vertex, v[1].vertex, v[2].vertex,
+ _EdgeLength, _Displacement * 1.5f
+ );
+
+ // Set the number of edge divisions
+ o.edge [0] = tf.x;
+ o.edge[1] = tf.y;
+ o.edge [2] = tf.z;
+ // Set the number of divisions in the center
+ o.inside = tf.w;
+ return o;
+}
+
+// hull shader (control point function)
+
+// Triangular polygon with split primitive type tri
+[UNITY_domain("tri")]
+// Select the division ratio from integer, fractional_odd, fractional_even
+[UNITY_partitioning("fractional_odd")]
+// Topology after division triangle_cw is a clockwise triangle polygon Counterclockwise is triangle_ccw
+[UNITY_outputtopology("triangle_cw")]
+// Specify the patch constant function name
+[UNITY_patchconstantfunc("hull_const")]
+// Output control point. 3 outputs for triangular polygons
+[UNITY_outputcontrolpoints(3)]
+InternalTessInterp_appdata hull_shader (
+ InputPatch<InternalTessInterp_appdata,3> v,
+ uint id : SV_OutputControlPointID
+)
+{
+ return v[id];
+}
+
+
+Here, the patch is divided according to the tessellation factor (Tessellation Factors structure) returned by the Hull shader.
The Tessellation Stage is not programmable, so you cannot write a function.
Domain Shader is a programmable stage that reflects positions such as vertices, normals, tangents, and UVs based on the processing results of the Tessellation Stage.
A semantic parameter called SV_DomainLocation is input to the Domain Shader, so this parameter will be used to reflect the coordinates.
Also, if you want to perform displacement processing, describe it in Domain Shader. After Domain Shader, the process flows to Fragment Shader and the final drawing process is performed, but if the Geometry Shader function is specified in #pragma, it can also be sent to Geometry Shader.
Tessellation.Shader
+#pragma domain domain_shader
+
+struct v2f
+{
+ UNITY_POSITION(pos);
+ float2 uv_MainTex : TEXCOORD0;
+ float4 tSpace0 : TEXCOORD1;
+ float4 tSpace1 : TEXCOORD2;
+ float4 tSpace2 : TEXCOORD3;
+};
+
+sampler2D _MainTex;
+float4 _MainTex_ST;
+sampler2D _DispTex;
+float _Displacement
+
+v2f vert_process (appdata v)
+{
+ v2f o;
+ UNITY_INITIALIZE_OUTPUT(v2f,o);
+ o.pos = UnityObjectToClipPos(v.vertex);
+ o.uv_MainTex.xy = TRANSFORM_TEX(v.texcoord, _MainTex);
+ float3 worldPos = mul(unity_ObjectToWorld, v.vertex).xyz;
+ float3 worldNormal = UnityObjectToWorldNormal(v.normal);
+ fixed3 worldTangent = UnityObjectToWorldDir(v.tangent.xyz);
+ fixed tangentSign = v.tangent.w * unity_WorldTransformParams.w;
+ fixed3 worldBinormal = cross(worldNormal, worldTangent) * tangentSign;
+ o.tSpace0 = float4 (
+ worldTangent.x, worldBinormal.x, worldNormal.x, worldPos.x
+ );
+ o.tSpace1 = float4 (
+ worldTangent.y, worldBinormal.y, worldNormal.y, worldPos.y
+ );
+ o.tSpace2 = float4 (
+ worldTangent.z, worldBinormal.z, worldNormal.z, worldPos.z
+ );
+ return o;
+}
+
+void disp (inout appdata v)
+{
+ float d = tex2Dlod(_DispTex, float4(v.texcoord.xy,0,0)).r * _Displacement;
+ v.vertex.xyz -= v.normal * d;
+}
+
+// Domain shader function
+[UNITY_domain("tri")]
+v2f domain_shader (
+ TessellationFactors tessFactors,
+ const OutputPatch<InternalTessInterp_appdata, 3> vi,
+ float3 bars: SV_DomainLocation
+)
+{
+ appdata v;
+ UNITY_INITIALIZE_OUTPUT(appdata,v);
+ // Set each coordinate based on the SV_DomainLocation semantics calculated in the Tessellation stage.
+ v.vertex =
+ vi [0] .vertex * bary.x +
+ vi [1] .vertex * bary.y +
+ vi [2] .vertex * Bary.z;
+ v.tangent =
+ vi [0] .tangent * bary.x +
+ vi [1] .tangent * bary.y +
+ vi [2] .tangent * Bary.z;
+ v.normal =
+ vi [0] .normal * bary.x +
+ vi [1] .normal * Bary.y +
+ vi [2] .normal * Bary.z;
+ v.texcoord =
+ vi [0] .texcoord * bary.x +
+ vi [1] .texcoord * bary.y +
+ vi [2] .texcoord * Bary.z;
+
+ // This is the best place to do Displacement processing.
+ disp (v);
+
+ // Finally, describe the process just before passing to the fragment shader.
+ v2f o = vert_process (v);
+ return o;
+}
+
+The above is the process when incorporating Tessellation into Vertex / Fragment Shader.
+Finally, I will attach an example. In this example, the fluid RenderTexture output of the grid method described in "Unity Graphics Programming vol.1" is used as the Height map, and the Plane mesh originally included in Unity is subjected to Tessellation and Displacement processing.
Originally it is a Plane mesh with a limited number of vertices, but you can see that the mesh follows with a high particle size without breaking.
++Figure 6.2: Fluid Displacement +
+In this chapter, we introduced "Tessellation".
Tessellation is a technology that has withered to some extent, but I think that it is easy to use from performance optimization to creative use, so I hope you will use it where you need it.
|
![]() |
|
This chapter provides an overview of Poisson Disk Sampling (hereinafter referred to as "PDS") and an explanation of the CPU implementation algorithm.
+Fast Poisson Disk Sampling in Arbitary Dimensions (high-speed Poisson disk sampling in any dimension: hereinafter, "FPDS" is used) is adopted for the implementation in the CPU. This algorithm was proposed by Robert Bridson of the University of British Columbia in a 2007 paper submitted to SIGGRAPH.
+ +Some people may not know what PDS is in the first place, so this section will explain PDS.
+First, plot a large number of points in a suitable space. Next, consider a distance d greater than 0 . At this time, as shown in Fig . 7.1 , the distribution in which all the points are at random positions but all the points are separated by at least d or more is called the Poisson-disk distribution. In other words, even if two points are randomly selected from Figure 7.1 , the distance between them will not always be less than d in any combination . Sampling such a Poisson-disk distribution, in other words, generating a Poisson-disk distribution by calculation, is called PDS.
+With this PDS, you can get a random point cloud with uniformity. Therefore, it is used for sampling in filtering processing such as antialiasing, and sampling for determining composite pixels in texture composition processing.
+
++Figure 7.1: Plot random points +
+The biggest feature of FPDS is that it is dimension-independent. Most of the methods proposed so far are based on the assumption of two-dimensional calculation, and it has not been possible to efficiently perform three-dimensional sampling. Therefore, FPDS was proposed to perform high-speed calculations in any dimension.
+FPDS can be calculated in O (N) and can be implemented based on the same algorithm in any dimension. This section describes the FPDS process step by step.
+ +Three parameters are used in the calculation of FPDS.
+These parameters can be freely entered by the user. The above parameters are also used in the following explanations.
+ +Divide the sampling space into Grids to speed up the calculation of the distance between points. Here, the number of dimensions of the sampling space is n , and the size of each divided space (hereinafter referred to as "cell") is \ frac {r} {\ sqrt {n}} . Figure 7.2 As shown in \ sqrt {n} is, n the length of each axis in the dimension represents the absolute value of the first vector.
+
++Figure 7.2: When n = 3 +
+Then, the sampled points belong to the cell that contains the coordinates. Since the size of each cell is \ frac {r} {\ sqrt {n}} , the center distance between adjacent cells is also \ frac {r} {\ sqrt {n}} . In other words, by dividing the space by \ frac {r} {\ sqrt {n}} , as you can see from Figure 7.3 , at most one point belongs to each cell. Furthermore, when searching the neighborhood of a certain cell, by searching the surrounding cells by \ pm {n} , all the cells within the minimum distance r can be searched.
+
++Figure 7.3: Partition of a set at n = 2 +
+Also, this Grid can be represented by an n-dimensional array, so it is very easy to implement. By assigning the sampled point to the cell corresponding to its coordinates, it is possible to easily search for other points that exist nearby.
++
// Get a 3D array that represents a 3D grid
+Vector3?[, ,] GetGrid(Vector3 bottomLeftBack, Vector3 topRightForward
+ , float min, int iteration)
+{
+ // Sampling space
+ var dimension = (topRightForward - bottomLeftBack);
+ // Multiply the minimum distance by the reciprocal of √3 (I want to avoid division)
+ var cell = min * InvertRootTwo;
+
+ return new Vector3?[
+ Mathf.CeilToInt(dimension.x / cell) + 1,
+ Mathf.CeilToInt(dimension.y / cell) + 1,
+ Mathf.CeilToInt(dimension.z / cell) + 1
+ ];
+}
+
+// Get the Index of the cell corresponding to the coordinates
+Vector3Int GetGridIndex(Vector3 point, Settings set)
+{
+ // Calculate Index by dividing the distance from the reference point by the cell size
+ return new Vector3Int(
+ Mathf.FloorToInt((point.x - set.BottomLeftBack.x) / set.CellSize),
+ Mathf.FloorToInt((point.y - set.BottomLeftBack.y) / set.CellSize),
+ Mathf.FloorToInt((point.z - set.BottomLeftBack.z) / set.CellSize)
+ );
+}
+
+Calculate the first sample point that will be the starting point for the calculation. At this point, no matter which coordinates are sampled, there is no other point closer than the distance r, so one completely random coordinate is determined.
+Based on the coordinates of this calculated sample point, it belongs to the corresponding cell and is added to the active list and sampling list. This active list is a list that stores the starting points for sampling. Sampling will be performed sequentially based on the points saved in this active list.
++
// Find one random coordinate
+void GetFirstPoint(Settings set, Bags bags)
+{
+ var first = new Vector3(
+ Random.Range(set.BottomLeftBack.x, set.TopRightForward.x),
+ Random.Range(set.BottomLeftBack.y, set.TopRightForward.y),
+ Random.Range(set.BottomLeftBack.z, set.TopRightForward.z)
+ );
+ var index = GetGridIndex(first, set);
+
+ bags.Grid[index.x, index.y, index.z] = first;
+ // Sampling list, eventually returning this List as a result
+ bags.SamplePoints.Add(first);
+ // Active list, sampling around this List
+ bags.ActivePoints.Add(first);
+}
+
+Randomly select Index i from the active list and let the coordinates stored in i be x_i . (Of course, at the very beginning , i is 0 because it is only the point generated in "7.3.3 Calculate the initial sample points" .)
+With this x_i as the center, other points will be sampled with respect to nearby coordinates. By repeating this, the entire space can be sampled.
+
++Figure 7.4: Select initial sampling point +
++
// Randomly select points from the active list +var index = Random.Range(0, bags.ActivePoints.Count); +var point = bags.ActivePoints[index]; ++
Random coordinates x ^ {\ prime} _i are calculated within an n-dimensional sphere (a circle for 2D and a sphere for 3D) with a radius of r or more and 2r or less centered on x_i . Next, check if there are other points with a distance closer than r around the calculated x ^ {\ prime} _i .
+Here, the distance calculation for all other points is a process with a higher load as the number of other points increases. Therefore, in order to solve this problem, the Grid generated in "7.3.2 Dividing the sampling space into Grid" is used, and the calculation is performed by examining only the cells around the cell to which x ^ {\ prime} _i belongs. Reduce the amount. If there are other points in the surrounding cells, x ^ {\ prime} _i is discarded, and if there are no other points, x ^ {\ prime} _i belongs to the corresponding cell, and the active list and sampling list Add to.
+
++Figure 7.5: Sampling other points relative to the initial sampling point +
++
// Find the next sampling point based on the point coordinates
+private static bool GetNextPoint(Vector3 point, Settings set, Bags bags)
+{
+ // Find a random point in the range r ~ 2r around the point coordinates
+ var p = point +
+ GetRandPosInSphere(set.MinimumDistance, 2f * set.MinimumDistance);
+
+ // If it is out of the sampling space, it will be treated as sampling failure.
+ if(set.Dimension.Contains(p) == false) { return false; }
+
+ var minimum = set.MinimumDistance * set.MinimumDistance;
+ var index = GetGridIndex(p, set);
+ var drop = false;
+
+ // Calculate the range of Grid to search
+ var around = 3;
+ var fieldMin = new Vector3Int(
+ Mathf.Max(0, index.x - around), Mathf.Max(0, index.y - around),
+ Mathf.Max(0, index.z - around)
+ );
+ var fieldMax = new Vector3Int(
+ Mathf.Min(set.GridWidth, index.x + around),
+ Mathf.Min(set.GridHeight, index.y + around),
+ Mathf.Min(set.GridDepth, index.z + around)
+ );
+
+ // Check if there are other points in the surrounding Grid
+ for(var i = fieldMin.x; i <= fieldMax.x && drop == false; i++)
+ {
+ for(var j = fieldMin.y; j <= fieldMax.y && drop == false; j++)
+ {
+ for(var k = fieldMin.z; k <= fieldMax.z && drop == false; k++)
+ {
+ var q = bags.Grid[i, j, k];
+ if(q.HasValue && (q.Value - p).sqrMagnitude <= minimum)
+ {
+ drop = true;
+ }
+ }
+ }
+ }
+
+ if(drop == true) { return false; }
+
+ // Adopted because there are no other points in the vicinity
+ bags.SamplePoints.Add(p);
+ bags.ActivePoints.Add(p);
+ bags.Grid[index.x, index.y, index.z] = p;
+ return true;
+}
+
+With x_i at the center, only one point was sampled in "7.3.5 Sampling" , but this is repeated k times. If x_i for k repeated times, if you were not able to sample even one point, x_i to remove from the active list.
+
++Figure 7.6: K = 7 when the +
+Then, when the repetition of k is finished, it returns to "7.3.4 Select a reference point from the active list" . You can sample the entire space by repeating this until the active list reaches zero.
++
// Repeat sampling
+public static List<Vector3> Sampling(Vector3 bottomLeft, Vector3 topRight,
+ float minimumDistance, int iterationPerPoint)
+{
+ var settings = GetSettings (
+ bottomLeft,
+ topRight,
+ minimumDistance,
+ iterationPerPoint <= 0 ?
+ DefaultIterationPerPoint : iterationPerPoint
+ );
+ var bags = new Bags()
+ {
+ Grid = new Vector3?[
+ settings.GridWidth + 1,
+ settings.GridHeight + 1,
+ settings.GridDepth + 1
+ ],
+ SamplePoints = new List<Vector3>(),
+ ActivePoints = new List<Vector3>()
+ };
+ GetFirstPoint(settings, bags);
+
+ do
+ {
+ var index = Random.Range(0, bags.ActivePoints.Count);
+ var point = bags.ActivePoints[index];
+
+ var found = false;
+ for(var k = 0; k < settings.IterationPerPoint; k++)
+ {
+ found = found | GetNextPoint(point, settings, bags);
+ }
+
+ if(found == false) { bags.ActivePoints.RemoveAt(index); }
+ }
+ while(bags.ActivePoints.Count > 0);
+
+ return bags.SamplePoints;
+}
+
+In other words, if you briefly explain the overall flow
+It means that. Since parallelization is not proposed in this algorithm, if the sampling space Rn is wide or the minimum distance r is small, a certain amount of calculation time is required, but it is easy to sample in any dimension. , A very attractive advantage.
+ +By the processing up to this point , the sampling result can be obtained as shown in Fig. 7.7 . This image is a circle created and placed by Geometry Shader at the sampled coordinates. You can see that the circles do not overlap and are filled to the full extent.
+
++Figure 7.7: Visualization of sampling results +
+As I mentioned at the beginning, Poisson disc sampling is used in a wide range of places, from antialiasing and image composition to image effects such as Blur and evenly spaced objects. It doesn't give you a clear visual result on its own, but it's often used behind the high-quality visuals we usually see. It can be said that it is one of the algorithms that is worth knowing when doing visual programming.
+ +
|
![]() |
|
-Nakamura will reach / @mattatz
+A programmer who creates installations, signage, the Web (front-end / back-end), smartphone apps, etc. I am interested in video expression and design tool development.
+ + +Former game developer, programmer making interactive art. I like the design and development of moderately complicated mechanisms and libraries. I've been sleeping well lately.
+ + +Interaction engineer. In the field of video expression such as installation, signage, stage production, music video, concert video, VJ, etc., we are producing content that makes use of real-time and procedural characteristics. I have been active several times in a unit called Aqueduct with sugi-cho and mattatz.
+It is inevitable to keep up with it, and I am living somehow while becoming tattered. Please also use "Unity Shader Programming" for getting started with shaders.
+ + +An interactive engineer who works in an atmosphere. I often post Gene videos on Twitter. I do VJ once in a while. Recently I'm interested in VR.
+ + +Former technical artist of a game development company. I like art, design and music, so I turned to interactive art. My hobbies are samplers, synths, musical instruments, records, and equipment. I started Twitter.
+ + +Interaction engineer. I am interested in visualization of simulations by CG, and I would like to make visualizations that shake people's emotions more, rather than visualizing them accurately. I like to make it, but I find it more fun to know more than that. My favorite school classroom is the drawing room or the library.
+ \ No newline at end of file diff --git a/html-translated/vol4/Contributors_files/cleardot.gif b/html-translated/vol4/Contributors_files/cleardot.gif new file mode 100644 index 0000000..1d11fa9 Binary files /dev/null and b/html-translated/vol4/Contributors_files/cleardot.gif differ diff --git a/html-translated/vol4/Contributors_files/element_main.js b/html-translated/vol4/Contributors_files/element_main.js new file mode 100644 index 0000000..4c5de3c --- /dev/null +++ b/html-translated/vol4/Contributors_files/element_main.js @@ -0,0 +1,486 @@ +(function(){/* + + Copyright The Closure Library Authors. + SPDX-License-Identifier: Apache-2.0 +*/ +var aa='" style="background-image:url(',ba="-disabled",ca="-document.getElementById('",da="/translate_a/t",ea="/translate_suggestion?client=",fa='
|
![]() |
|
This book is the fourth volume of the "Unity Graphics Programming" series, which explains the technology related to graphics programming by Unity. This series provides introductory content and applications for beginners, as well as tips for intermediate and above, on a variety of topics that the authors are interested in.
+The source code explained in each chapter is published in the github repository ( https://github.com/IndieVisualLab/UnityGraphicsProgramming4 ), so you can read this manual while executing it at hand.
+The difficulty level varies depending on the article, and depending on the amount of knowledge of the reader, some content may be unsatisfactory or too difficult. Depending on your knowledge, it's a good idea to read articles on the topic you are interested in. For those who usually do graphics programming at work, I hope it will lead to more effect drawers, and students are interested in visual coding, I have touched Processing and openFrameworks, but I still have 3DCG. For those who are feeling a high threshold, I would be happy if it would be an opportunity to introduce Unity and learn about the high expressiveness of 3DCG and the start of development.
+IndieVisualLab is a circle created by colleagues (& former colleagues) in the company. In-house, we use Unity to program the contents of exhibited works in the category generally called media art, and we are using Unity, which is a bit different from the game system. In this book, knowledge that is useful for using Unity in the exhibited works may be scattered.
+ +Some of the contents explained in this manual use Compute Shader, Geometry Shader, etc., and the execution environment in which DirectX 11 operates is recommended, but there are also chapters where the contents are completed by the program (C #) on the CPU side.
+I think that the behavior of the sample code released may not be correct due to the difference in environment, but please take measures such as reporting an issue to the github repository and replacing it as appropriate.
+ +If you have any impressions, concerns, or other requests regarding this book (such as wanting to read the explanation about 〇〇), please feel free to use the Web form ( https://docs.google.com/forms/d/e/1FAIpQLSdxeansJvQGTWfZTBN_2RTuCK_kRqhA6QHTZKVXHCijQnC8zw/ Please let us know via viewform ) or email (lab.indievisual@gmail.com).
\ No newline at end of file diff --git a/html-translated/vol4/Preface_files/cleardot.gif b/html-translated/vol4/Preface_files/cleardot.gif new file mode 100644 index 0000000..1d11fa9 Binary files /dev/null and b/html-translated/vol4/Preface_files/cleardot.gif differ diff --git a/html-translated/vol4/Preface_files/element_main.js b/html-translated/vol4/Preface_files/element_main.js new file mode 100644 index 0000000..4c5de3c --- /dev/null +++ b/html-translated/vol4/Preface_files/element_main.js @@ -0,0 +1,486 @@ +(function(){/* + + Copyright The Closure Library Authors. + SPDX-License-Identifier: Apache-2.0 +*/ +var aa='" style="background-image:url(',ba="-disabled",ca="-document.getElementById('",da="/translate_a/t",ea="/translate_suggestion?client=",fa='
|
![]() |
|