YourKit Java Profiler Open Source License Request
Laurent Bourgès
bourges.laurent at gmail.com
Tue May 7 13:02:27 UTC 2013
Andrew,
thanks so much for advertising oprofile: it works like a charm !
Apparently, I understood the documentation and the default CPU_CLK_UNHALTED
setting is perfect.
Moreover, it is able to annotate java source code (dtrace like) and it is
so easy:
opannotate --source -o src/ -t 0.1
--search-dirs=/home/bourgesl/libs/openjdk/pisces/src/sun/java2d/pisces/
PS: It would be so great if there is a netbeans plugin (apparently an
eclipse one exists in the fedora packages) ...
Here is an part of annoted java code (2 major hotspot methods):
/* int sun.java2d.pisces.Renderer$ScanlineIterator.next() total: 4380416
34.2926 */
: int next() {
: final float[] _edges = rdr.edges;
86967 0.6808 : final int[] _edgesInt = rdr.edgesInt; /* int
sun.java2d.pisces.Renderer$ScanlineIterator.next() total: 4380416 34.2926 */
2653 0.0208 : final int cury = nextY++;
89373 0.6997 : final int bucket = cury - rdr.boundsMinY;
24478 0.1916 : int count = this.edgeCount;
1101 0.0086 : int[] ptrs = this.edgePtrs;
19457 0.1523 : final int bucketcount =
rdr.edgeBucketCounts[bucket];
:
33547 0.2626 : if ((bucketcount & 0x1) != 0) {
16662 0.1304 : final int offYmax = YMAX;
: int newCount = 0;
72462 0.5673 : for (int i = 0, ecur; i < count; i++) {
81526 0.6382 : ecur = ptrs[i];
99950 0.7825 : if (_edgesInt[ecur + offYmax] > cury) {
209365 1.6390 : ptrs[newCount++] = ecur;
: }
: }
25606 0.2005 : count = newCount;
: }
:
: int ptrLen = bucketcount >> 1;
33571 0.2628 : if (ptrs.length < count + ptrLen) {
50928 0.3987 : boolean ptrInitial = (ptrs ==
edgePtrs_initial);
: this.edgePtrs = ptrs =
Helpers.widenArray(rdrCtx, ptrs, count, ptrLen, arrayMaxUsed);
7024 0.0550 : if (ptrInitial && doCleanDirty) {
: IntArrayCache.fill(edgePtrs_initial, 0,
arrayMaxUsed, 0);
: }
: }
:
: final int nul = NULL;
26781 0.2097 : for (int ecur = rdr.edgeBuckets[bucket]; ecur
!= nul; ecur = _edgesInt[ecur /* + NEXT */]) {
340270 2.6638 : ptrs[count++] = ecur;
: // REMIND: Adjust start Y if necessary
: }
:
21188 0.1659 : this.edgeCount = count;
:// if ((count & 0x1) != 0) {
:// System.out.println("ODD NUMBER OF
EDGES!!!!");
:// }
:
15596 0.1221 : int[] xings = this.crossings;
31884 0.2496 : if (xings.length < count) {
10004 0.0783 : if (crossings == crossings_initial) {
: IntArrayCache.fill(crossings, 0,
arrayMaxUsed, 0);
: } else {
: rdrCtx.putIntArray(crossings,
arrayMaxUsed); // last known value for arrayMaxUsed
: }
: // Get larger array:
: this.crossings = xings =
rdrCtx.getIntArray(count); // count or ptrs.length ?
: }
: // LBO: max used mark
: if (count > arrayMaxUsed) { arrayMaxUsed =
count; }
:
5708 0.0447 : final int offSlope = SLOPE;
3331 0.0261 : final int offOr = OR;
:
: float curx;
: int cross, jcross;
:
135873 1.0637 : for (int i = 0, ecur, j; i < count; i++) {
21831 0.1709 : ecur = ptrs[i];
227570 1.7816 : curx = _edges[ecur /* + CURX */];
159331 1.2473 : _edges[ecur /* + CURX */] = curx +
_edges[ecur + offSlope];
:
594904 4.6573 : cross = ((int) curx) << 1;
2985 0.0234 : if (_edgesInt[ecur + offOr] != 0 /* > 0 */)
{
269643 2.1109 : cross |= 1;
: }
:
: // LBO: right shift crossings ...
: j = i;
128253 1.0040 : while (--j >= 0) {
102837 0.8051 : jcross = xings[j];
283135 2.2166 : if (jcross <= cross) {
92418 0.7235 : break;
: }
70142 0.5491 : xings[j + 1] = jcross;
257937 2.0193 : ptrs[j + 1] = ptrs[j];
: }
304554 2.3842 : xings[j + 1] = cross;
350968 2.7476 : ptrs[j + 1] = ecur;
: }
68603 0.5371 : return count;
: }
:
: boolean hasNext() {
: return nextY < maxY;
: }
:
: int curY() {
: return nextY - 1;
: }
: }
/* void sun.java2d.pisces.Renderer._endRendering(int, int, int, int) total:
2601080 20.3628 */
: private void _endRendering(final int bboxx0, final int
bboxx1,
: int ymin, int ymax)
: {
: // Mask to determine the relevant bit of the
crossing sum
: // 0x1 if EVEN_ODD, all bits if NON_ZERO
: final int mask = (windingRule == WIND_EVEN_ODD) ?
0x1 : ~0x0;
:
: // Useful when processing tile line by tile line
5500 0.0431 : final int[] alpha = alphaLine; /* void
sun.java2d.pisces.Renderer._endRendering(int, int, int, int) total: 2601080
20.3628 */
:
39171 0.3067 : final PiscesCache _cache = this.cache;
:
: // Now we iterate through the scanlines. We must
tell emitRow the coord
: // of the first non-transparent pixel, so we must
keep accumulators for
: // the first and last pixels of the section of the
current pixel row
: // that we will emit.
: // We also need to accumulate pix_bbox*, but the
iterator does it
: // for us. We will just get the values from it once
this loop is done
1199 0.0094 : int pix_maxX = Integer.MIN_VALUE;
: int pix_minX = Integer.MAX_VALUE;
:
: int y = boundsMinY; // needs to be declared here so
we emit the last row properly.
:
354 0.0028 : for (final ScanlineIterator it =
scanlineIterator.init(ymin, ymax);
1974 0.0155 : it.hasNext(); )
: {
525 0.0041 : final int numCrossings = it.next();
82494 0.6458 : y = it.curY();
:
6392 0.0500 : if (numCrossings > 0) {
8991 0.0704 : final int[] crossings = it.crossings; //
array may change
:
: // LBO: TODO: explain crossing processing:
Jim, please ? ...
48601 0.3805 : int lowx = crossings[0] >> 1;
33739 0.2641 : int highx = crossings[numCrossings - 1] >>
1;
94313 0.7383 : int x0 = Math.max(lowx, bboxx0);
: int x1 = Math.min(highx, bboxx1);
:
: pix_minX = Math.min(pix_minX, x0 >>
SUBPIXEL_LG_POSITIONS_X);
44084 0.3451 : pix_maxX = Math.max(pix_maxX, x1 >>
SUBPIXEL_LG_POSITIONS_X);
:
: // TODO: fix alpha last index = pix_xmax + 1
: // ie x1 >> SUBPIXEL_LG_POSITIONS_X
(inclusive)
: // alpha[pix_xmax + 1] <=> alpha[x1 >>
SUBPIXEL_LG_POSITIONS_X + 1]
: // in contrary to half-open pattern used by
pix_maxX = max(x1 >> SUBPIXEL_LG_POSITIONS_X)
:
53976 0.4226 : int sum = 0;
: int prev = bboxx0;
: for (int i = 0; i < numCrossings; i++) {
61179 0.4789 : int curxo = crossings[i];
116872 0.9149 : int curx = curxo >> 1;
: // to turn {0, 1} into {-1, 1},
multiply by 2 and subtract 1.
86834 0.6798 : int crorientation = ((curxo & 0x1) <<
1) - 1;
:
: // LBO: TODO: explain alpha
computation: Jim, please ? ...
56304 0.4408 : if ((sum & mask) != 0) {
145231 1.1370 : x0 = Math.max(prev, bboxx0);
88536 0.6931 : x1 = Math.min(curx, bboxx1);
70122 0.5490 : if (x0 < x1) {
140577 1.1005 : x0 -= bboxx0; // turn x0, x1
from coords to indeces
73769 0.5775 : x1 -= bboxx0; // in the alpha
array.
:
10572 0.0828 : int pix_x = x0 >>
SUBPIXEL_LG_POSITIONS_X;
42994 0.3366 : int pix_xmaxm1 = (x1 - 1) >>
SUBPIXEL_LG_POSITIONS_X;
:
: if (pix_x == pix_xmaxm1) {
: // Start and end in same
pixel
94868 0.7427 : int tmp = (x1 - x0);
6644 0.0520 : alpha[pix_x] += tmp;
62346 0.4881 : alpha[pix_x + 1] -= tmp;
126253 0.9884 : } else {
80656 0.6314 : int pix_xmax = x1 >>
SUBPIXEL_LG_POSITIONS_X;
6179 0.0484 : int tmp = (x0 &
SUBPIXEL_MASK_X);
25400 0.1988 : alpha[pix_x] +=
SUBPIXEL_POSITIONS_X - tmp;
224672 1.7589 : alpha[pix_x + 1] += tmp;
84426 0.6609 : tmp = (x1 &
SUBPIXEL_MASK_X);
1039 0.0081 : alpha[pix_xmax] -=
SUBPIXEL_POSITIONS_X - tmp;
184161 1.4417 : alpha[pix_xmax + 1] -= tmp;
: }
: }
: }
106838 0.8364 : sum += crorientation;
59094 0.4626 : prev = curx;
: }
:
: }
:
: // even if this last row had no crossings,
alpha will be zeroed
: // from the last emitRow call. But this doesn't
matter because
: // maxX < minX, so no row will be emitted to
the piscesCache.
19036 0.1490 : if ((y & SUBPIXEL_MASK_Y) == SUBPIXEL_MASK_Y) {
13020 0.1019 : if (pix_maxX >= pix_minX) {
1464 0.0115 : emitRow(_cache, alpha, y >>
SUBPIXEL_LG_POSITIONS_Y, pix_minX, pix_maxX);
: } else {
130734 1.0235 : _cache.clearAARow(y >>
SUBPIXEL_LG_POSITIONS_Y);
: }
45 3.5e-04 : pix_minX = Integer.MAX_VALUE;
: pix_maxX = Integer.MIN_VALUE;
: }
: } // scan line iterator
:
: // Emit final row
56073 0.4390 : if (pix_maxX >= pix_minX) {
:// System.out.println("EmitFinalRow = " + (y >>
SUBPIXEL_LG_POSITIONS_Y));
1345 0.0105 : emitRow(_cache, alpha, y >>
SUBPIXEL_LG_POSITIONS_Y, pix_minX, pix_maxX);
: }
2484 0.0194 : }
Thanks again,
Laurent
2013/5/7 Andrew Haley <aph at redhat.com>
> On 05/07/2013 09:44 AM, Laurent Bourgčs wrote:
>
> > I confirm oprofile (0.96 on my fedora 14) works just fine (see below).
> >
> > Do you recommend me to use the latest (git) version ? 0.96 is quite old
> > (2011)
>
> Only if the version you're using doesn't work.
>
> > Could you explain me a bit how to get sample counts corresponding to the
> > complete benchmark (few minutes long) ?
>
> I don't understand. As far as I can see that is what you have.
>
> > should I use the event argument to set the highest count (reset) value ?
> >
> > By default, oprofile uses: CPU_CLK_UNHALTED:100000:0:1:1
> >
> > opcontrol --event=CPU_CLK_UNHALTED:400000
> >
> > What is the maximum value I can set ?
>
> I always use the default. The more you increase sample frequency the
> more overhead there is.
>
> The real test is to experiment and see if it makes any difference.
>
> Andrew.
>
>
More information about the discuss
mailing list