RubyMatchData.java

/***** BEGIN LICENSE BLOCK *****
 * Version: EPL 1.0/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Eclipse Public
 * License Version 1.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.eclipse.org/legal/epl-v10.html
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * Copyright (C) 2001 Alan Moore <alan_moore@gmx.net>
 * Copyright (C) 2001-2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
 * Copyright (C) 2002 Benoit Cerrina <b.cerrina@wanadoo.fr>
 * Copyright (C) 2002-2004 Anders Bengtsson <ndrsbngtssn@yahoo.se>
 * Copyright (C) 2004 Thomas E Enebo <enebo@acm.org>
 * Copyright (C) 2004 Charles O Nutter <headius@headius.com>
 * Copyright (C) 2004 Stefan Matthias Aust <sma@3plus4.de>
 * 
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the EPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the EPL, the GPL or the LGPL.
 ***** END LICENSE BLOCK *****/
package org.jruby;

import java.util.Arrays;
import java.util.Iterator;

import org.jcodings.Encoding;
import org.joni.NameEntry;
import org.joni.Regex;
import org.joni.Region;
import org.joni.exception.JOniException;
import org.jruby.anno.JRubyMethod;
import org.jruby.anno.JRubyClass;
import org.jruby.runtime.Block;
import org.jruby.runtime.ClassIndex;
import org.jruby.runtime.ObjectAllocator;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.Visibility;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.runtime.Visibility;
import org.jruby.util.ByteList;
import org.jruby.util.StringSupport;

/**
 * @author olabini
 */
@JRubyClass(name="MatchData")
public class RubyMatchData extends RubyObject {
    Region regs;        // captures
    int begin, end;     // begin and end are used when not groups defined
    RubyString str;     // source string
    Regex pattern;
    RubyRegexp regexp;
    boolean charOffsetUpdated;
    Region charOffsets;

    public static RubyClass createMatchDataClass(Ruby runtime) {
        RubyClass matchDataClass = runtime.defineClass("MatchData", runtime.getObject(), MATCH_DATA_ALLOCATOR);
        runtime.setMatchData(matchDataClass);

        matchDataClass.setClassIndex(ClassIndex.MATCHDATA);
        matchDataClass.setReifiedClass(RubyMatchData.class);
        
        runtime.defineGlobalConstant("MatchingData", matchDataClass);
        matchDataClass.kindOf = new RubyModule.JavaClassKindOf(RubyMatchData.class);

        matchDataClass.getMetaClass().undefineMethod("new");
        matchDataClass.defineAnnotatedMethods(RubyMatchData.class);
        return matchDataClass;
    }

    private static ObjectAllocator MATCH_DATA_ALLOCATOR = new ObjectAllocator() {
        @Override
        public IRubyObject allocate(Ruby runtime, RubyClass klass) {
            return new RubyMatchData(runtime, klass);
        }
    };

    public RubyMatchData(Ruby runtime) {
        super(runtime, runtime.getMatchData());
    }

    public RubyMatchData(Ruby runtime, RubyClass metaClass) {
        super(runtime, metaClass);
    }

    @Override
    public void copySpecialInstanceVariables(IRubyObject clone) {
        RubyMatchData match = (RubyMatchData)clone;
        match.regs = regs;
        match.begin = begin;
        match.end = end;
        match.pattern = pattern;
        match.regexp = regexp;
        match.charOffsetUpdated = charOffsetUpdated;
        match.charOffsets = charOffsets;
    }

    @Override
    public ClassIndex getNativeClassIndex() {
        return ClassIndex.MATCHDATA;
    }

    private static final class Pair implements Comparable<Pair> {
        int bytePos, charPos;
        @Override
        public int compareTo(Pair pair) {
            return bytePos - pair.bytePos;
        }
    }

    private void updatePairs(ByteList value, Encoding encoding, Pair[] pairs) {
        Arrays.sort(pairs);

        int length = pairs.length;
        byte[]bytes = value.getUnsafeBytes();
        int p = value.getBegin();
        int s = p;
        int c = 0;
        
        for (int i = 0; i < length; i++) {
            int q = s + pairs[i].bytePos;
            c += StringSupport.strLength(encoding, bytes, p, q);
            pairs[i].charPos = c;
            p = q;
        }
    }

    private void updateCharOffsetOnlyOneReg(ByteList value, Encoding encoding) {
        if (charOffsets == null || charOffsets.numRegs < 1) charOffsets = new Region(1);
        
        if (encoding.maxLength() == 1) {
            charOffsets.beg[0] = begin;
            charOffsets.end[0] = end;
            charOffsetUpdated = true;
            return;
        }

        Pair[] pairs = new Pair[2];
        pairs[0] = new Pair();
        pairs[0].bytePos = begin;
        pairs[1] = new Pair();
        pairs[1].bytePos = end;

        updatePairs(value, encoding, pairs);

        Pair key = new Pair();
        key.bytePos = begin;
        charOffsets.beg[0] = pairs[Arrays.binarySearch(pairs, key)].charPos;
        key.bytePos = end;
        charOffsets.end[0] = pairs[Arrays.binarySearch(pairs, key)].charPos;        
    }

    private void updateCharOffsetManyRegs(ByteList value, Encoding encoding) {
        int numRegs = regs.numRegs;

        if (charOffsets == null || charOffsets.numRegs < numRegs) charOffsets = new Region(numRegs);
        
        if (encoding.maxLength() == 1) {
            for (int i = 0; i < numRegs; i++) {
                charOffsets.beg[i] = regs.beg[i];
                charOffsets.end[i] = regs.end[i];
            }
            return;
        }

        Pair[] pairs = new Pair[numRegs * 2];
        for (int i = 0; i < pairs.length; i++) pairs[i] = new Pair();

        int numPos = 0;
        for (int i = 0; i < numRegs; i++) {
            if (regs.beg[i] < 0) continue;
            pairs[numPos++].bytePos = regs.beg[i];
            pairs[numPos++].bytePos = regs.end[i];
        }

        updatePairs(value, encoding, pairs);

        Pair key = new Pair();
        for (int i = 0; i < regs.numRegs; i++) {
            if (regs.beg[i] < 0) {
                charOffsets.beg[i] = charOffsets.end[i] = -1;
                continue;
            }
            key.bytePos = regs.beg[i];
            charOffsets.beg[i] = pairs[Arrays.binarySearch(pairs, key)].charPos;
            key.bytePos = regs.end[i];
            charOffsets.end[i] = pairs[Arrays.binarySearch(pairs, key)].charPos;
        }        
    }

    private void updateCharOffset() {
        if (charOffsetUpdated) return;

        ByteList value = str.getByteList();
        Encoding enc = value.getEncoding();

        if (regs == null) {
            updateCharOffsetOnlyOneReg(value, enc);
        } else {
            updateCharOffsetManyRegs(value, enc);
        }

        charOffsetUpdated = true;
    }

    private static final int MATCH_BUSY = USER2_F;

    // rb_match_busy
    public final void use() {
        flags |= MATCH_BUSY; 
    }

    public final boolean used() {
        return (flags & MATCH_BUSY) != 0;
    }

    void check() {
        if (str == null) throw getRuntime().newTypeError("uninitialized Match");
    }

    private void checkLazyRegexp() {
        if (regexp == null) regexp = RubyRegexp.newRegexp(getRuntime(), (ByteList)pattern.getUserObject(), pattern);
    }
    
    private RubyString makeShared(Ruby runtime, RubyString str, int begin, int length) {
        return str.makeShared19(runtime, begin, length);
    }

    private RubyArray match_array(Ruby runtime, int start) {
        check();
        if (regs == null) {
            if (start != 0) return runtime.newEmptyArray();
            if (begin == -1) {
                return runtime.newArray(runtime.getNil());
            } else {
                RubyString ss = makeShared(runtime, str, begin, end - begin);
                if (isTaint()) ss.setTaint(true);
                return runtime.newArray(ss);
            }
        } else {
            RubyArray arr = runtime.newArray(regs.numRegs - start);
            for (int i=start; i<regs.numRegs; i++) {
                if (regs.beg[i] == -1) {
                    arr.append(runtime.getNil());
                } else {
                    RubyString ss = makeShared(runtime, str, regs.beg[i], regs.end[i] - regs.beg[i]);                   
                    if (isTaint()) ss.setTaint(true); 
                    arr.append(ss);
                }
            }
            return arr;
        }
        
    }

    public IRubyObject group(long n) {
        return RubyRegexp.nth_match((int)n, this);
    }

    public IRubyObject group(int n) {
        return RubyRegexp.nth_match(n, this);
    }

    public int getNameToBackrefNumber(String name) {
        try {
            byte[] bytes = name.getBytes();
            return pattern.nameToBackrefNumber(bytes, 0, bytes.length, regs);
        } catch (JOniException je) {
            throw getRuntime().newIndexError(je.getMessage());
        }
    }

    // This returns a list of values in the order the names are defined (named capture local var
    // feature uses this).
    public IRubyObject[] getNamedBackrefValues(Ruby runtime) {
        if (pattern.numberOfNames() == 0) return NULL_ARRAY;

        IRubyObject[] values = new IRubyObject[pattern.numberOfNames()];

        int j = 0;
        for (Iterator<NameEntry> i = pattern.namedBackrefIterator(); i.hasNext();) {
            NameEntry e = i.next();

            int nth = pattern.nameToBackrefNumber(e.name, e.nameP, e.nameEnd, regs);
            values[j++] = RubyRegexp.nth_match(nth, this);
        }

        return values;
    }

    @JRubyMethod
    @Override
    public IRubyObject inspect() {
        if (str == null) return anyToString();

        Ruby runtime = getRuntime();
        RubyString result = runtime.newString();
        result.cat((byte)'#').cat((byte)'<');
        result.append(getMetaClass().getRealClass().to_s());

        NameEntry[]names = new NameEntry[regs == null ? 1 : regs.numRegs];

        if (pattern.numberOfNames() > 0) {
            for (Iterator<NameEntry> i = pattern.namedBackrefIterator(); i.hasNext();) {
                NameEntry e = i.next();
                for (int num : e.getBackRefs()) names[num] = e;
            }
        }

        for (int i=0; i<names.length; i++) {
            result.cat((byte)' ');
            if (i > 0) {
                NameEntry e = names[i];
                if (e != null) {
                    result.cat(e.name, e.nameP, e.nameEnd - e.nameP);
                } else {
                    result.cat((byte)('0' + i));
                }
                result.cat((byte)':');
            }
            IRubyObject v = RubyRegexp.nth_match(i, this);
            if (v.isNil()) {
                result.cat("nil".getBytes());
            } else {
                result.append(((RubyString)v).inspect19());
            }
        }

        return result.cat((byte)'>');
    }

    @JRubyMethod
    public IRubyObject regexp(ThreadContext context, Block block) {
        check();
        checkLazyRegexp();
        return regexp;
    }

    @JRubyMethod
    public IRubyObject names(ThreadContext context, Block block) {
        check();
        checkLazyRegexp();
        return regexp.names(context);
    }

    /** match_to_a
     *
     */
    @JRubyMethod
    @Override
    public RubyArray to_a() {
        return match_array(getRuntime(), 0);
    }

    @JRubyMethod(rest = true)
    public IRubyObject values_at(IRubyObject[] args) {
        return to_a().values_at(args);
    }

    /** match_captures
     *
     */
    @JRubyMethod
    public IRubyObject captures(ThreadContext context) {
        return match_array(context.runtime, 1);
    }

    private int nameToBackrefNumber(RubyString str) {
        ByteList value = str.getByteList();
        try {
            return pattern.nameToBackrefNumber(value.getUnsafeBytes(), value.getBegin(), value.getBegin() + value.getRealSize(), regs);
        } catch (JOniException je) {
            throw getRuntime().newIndexError(je.getMessage());
        }
    }

    public final int backrefNumber(IRubyObject obj) {
        check();
        if (obj instanceof RubySymbol) {
            return nameToBackrefNumber((RubyString)((RubySymbol)obj).id2name());
        } else if (obj instanceof RubyString) {
            return nameToBackrefNumber((RubyString)obj);
        } else {
            return RubyNumeric.num2int(obj);
        }
    }

    /** match_aref
    *
    */
    public IRubyObject op_aref(IRubyObject idx) {
        return op_aref19(idx);
    }

    /** match_aref
     *
     */
    public IRubyObject op_aref(IRubyObject idx, IRubyObject rest) {
        return op_aref19(idx, rest);
    }

    /** match_aref
     *
     */
    @JRubyMethod(name = "[]")
    public IRubyObject op_aref19(IRubyObject idx) {
        check();
        IRubyObject result = op_arefCommon(idx);
        return result == null ? ((RubyArray)to_a()).aref19(idx) : result;
    }

    /** match_aref
    *
    */
    @JRubyMethod(name = "[]")
    public IRubyObject op_aref19(IRubyObject idx, IRubyObject rest) {
        IRubyObject result;
        return !rest.isNil() || (result = op_arefCommon(idx)) == null ? ((RubyArray)to_a()).aref19(idx, rest) : result;
    }

    private IRubyObject op_arefCommon(IRubyObject idx) {
        if (idx instanceof RubyFixnum) {
            int num = RubyNumeric.fix2int(idx);
            if (num >= 0) return RubyRegexp.nth_match(num, this);
        } else {
            if (idx instanceof RubySymbol) {
                return RubyRegexp.nth_match(nameToBackrefNumber((RubyString)((RubySymbol)idx).id2name()), this);
            } else if (idx instanceof RubyString) {
                return RubyRegexp.nth_match(nameToBackrefNumber((RubyString)idx), this);
            }
        }
        return null;
    }

    /** match_size
     *
     */
    @JRubyMethod(name = {"size", "length"})
    public IRubyObject size(ThreadContext context) {
        check();
        Ruby runtime = context.runtime;
        return regs == null ? RubyFixnum.one(runtime) : RubyFixnum.newFixnum(runtime, regs.numRegs);
    }

    /** match_begin
     *
     */
    public IRubyObject begin(ThreadContext context, IRubyObject index) {
        return begin19(context, index);
    }

    @JRubyMethod(name = "begin")
    public IRubyObject begin19(ThreadContext context, IRubyObject index) {
        int i = backrefNumber(index);
        Ruby runtime = context.runtime;
        int b = beginCommon(runtime, i);
        if (b < 0) return runtime.getNil();
        if (!str.singleByteOptimizable()) {
            updateCharOffset();
            b = charOffsets.beg[i];
        }
        return RubyFixnum.newFixnum(runtime, b);
    }

    private int beginCommon(Ruby runtime, int i) {
        check();
        if (i < 0 || (regs == null ? 1 : regs.numRegs) <= i) throw runtime.newIndexError("index " + i + " out of matches");
        return regs == null ? begin : regs.beg[i];
    }

    /** match_end
     *
     */
    public IRubyObject end(ThreadContext context, IRubyObject index) {
        return end19(context, index);
    }

    @JRubyMethod(name = "end")
    public IRubyObject end19(ThreadContext context, IRubyObject index) {
        int i = backrefNumber(index);
        Ruby runtime = context.runtime;
        int e = endCommon(runtime, i);
        if (e < 0) return runtime.getNil();
        if (!str.singleByteOptimizable()) {
            updateCharOffset();
            e = charOffsets.end[i];
        }
        return RubyFixnum.newFixnum(runtime, e);
    }

    private int endCommon(Ruby runtime, int i) {
        check();
        if (i < 0 || (regs == null ? 1 : regs.numRegs) <= i) throw runtime.newIndexError("index " + i + " out of matches");
        return regs == null ? end : regs.end[i];
    }

    /** match_offset
     *
     */
    public IRubyObject offset(ThreadContext context, IRubyObject index) {
        return offset19(context, index);
    }

    @JRubyMethod(name = "offset")
    public IRubyObject offset19(ThreadContext context, IRubyObject index) {
        return offsetCommon(context, backrefNumber(index), true);
    }

    private IRubyObject offsetCommon(ThreadContext context, int i, boolean is_19) {
        check();
        Ruby runtime = context.runtime;
        if (i < 0 || (regs == null ? 1 : regs.numRegs) <= i) throw runtime.newIndexError("index " + i + " out of matches");
        int b, e;
        if (regs == null) {
            b = begin;
            e = end;
        } else {
            b = regs.beg[i];
            e = regs.end[i];
        }
        if (b < 0) return runtime.newArray(runtime.getNil(), runtime.getNil());
        if (is_19 && !str.singleByteOptimizable()) {
            updateCharOffset();
            b = charOffsets.beg[i];
            e = charOffsets.end[i];
        }
        return runtime.newArray(RubyFixnum.newFixnum(runtime, b), RubyFixnum.newFixnum(runtime, e));
    }

    /** match_pre_match
     *
     */
    @JRubyMethod
    public IRubyObject pre_match(ThreadContext context) {
        check();
        if (begin == -1) {
            return context.runtime.getNil();
        }
        return makeShared(context.runtime, str, 0, begin).infectBy(this);
    }

    /** match_post_match
     *
     */
    @JRubyMethod
    public IRubyObject post_match(ThreadContext context) {
        check();
        if (begin == -1) {
            return context.runtime.getNil();
        }
        return makeShared(context.runtime, str, end, str.getByteList().length() - end).infectBy(this);
    }

    /** match_to_s
     *
     */
    @JRubyMethod
    @Override
    public IRubyObject to_s() {
        check();
        IRubyObject ss = RubyRegexp.last_match(this);
        if (ss.isNil()) ss = RubyString.newEmptyString(getRuntime());
        if (isTaint()) ss.setTaint(true);
        return ss;
    }

    /** match_string
     *
     */
    @JRubyMethod
    public IRubyObject string() {
        check();
        return str; //str is frozen
    }

    @JRubyMethod(required = 1, visibility = Visibility.PRIVATE)
    @Override
    public IRubyObject initialize_copy(IRubyObject original) {
        if (this == original) return this;

        checkFrozen();

        if (!(original instanceof RubyMatchData)) {
            throw getRuntime().newTypeError("wrong argument class");
        }

        RubyMatchData origMatchData = (RubyMatchData)original;
        str = origMatchData.str;
        regs = origMatchData.regs;

        return this;
    }

    @Override
    public boolean equals(Object other) {
        if (this == other) return true;
        if (!(other instanceof RubyMatchData)) return false;

        RubyMatchData match = (RubyMatchData)other;

        return (this.str == match.str || (this.str != null && this.str.equals(match.str))) &&
                (this.regexp == match.regexp || (this.regexp != null && this.regexp.equals(match.regexp))) &&
                (this.charOffsets == match.charOffsets || (this.charOffsets != null && this.charOffsets.equals(match.charOffsets))) &&
                this.begin == match.begin &&
                this.end == match.end &&
                this.charOffsetUpdated == match.charOffsetUpdated;
    }

    @JRubyMethod(name = {"eql?", "=="}, required = 1)
    @Override
    public IRubyObject eql_p(IRubyObject obj) {
        return getRuntime().newBoolean(equals(obj));
    }

    @JRubyMethod
    @Override
    public RubyFixnum hash() {
        check();
        return getRuntime().newFixnum(pattern.hashCode() ^ str.hashCode());
    }

}