Back to Angel

Formattrans

data/formattrans.ipynb

3.1.02.3 KB
Original Source
python
def dummy2dense(fname: str):
    dim = int(fname.split("_")[1][0:-1])

    data = []
    with open(fname) as fid:
        for line in fid:
            feat = ["0"] * dim
            tmplist = line.strip().split()
            label = tmplist.pop(0)

            for i in tmplist:
                feat[int(i)] = "1"

            data.append("{label} {feat}".format(label=label, feat=" ".join(feat)))

    opfname = fname.split(".")[0] + ".dense"
    fid = open(opfname, "w")
    fid.write("\n".join(data))
    fid.close()


def dummy2libsvm(fname: str):
    data = []
    with open(fname) as fid:
        for line in fid:
            feat = []
            tmplist = line.strip().split()
            label = tmplist.pop(0)

            for i in tmplist:
                feat.append(str(int(i)+1) + ":1")

            data.append("{label} {feat}".format(label=label, feat=" ".join(feat)))

    opfname = fname.split(".")[0] + ".libsvm"
    fid = open(opfname, "w")
    fid.write("\n".join(data))
    fid.close()


def libsvm2dummy(fname: str):
    data = []
    with open(fname) as fid:
        for line in fid:
            feat = []
            tmplist = line.strip().split()
            label = tmplist.pop(0)

            for item in tmplist:                
                feat.append(str(int(item.split(":")[0])-1))

            data.append("{label} {feat}".format(label=label, feat=" ".join(feat)))

    opfname = fname.split(".")[0] + ".dummy"
    fid = open(opfname, "w")
    fid.write("\n".join(data))
    fid.close()


def libsvm2dense(fname: str):
    dim = int(fname.split("_")[1][0:-1])
    
    data = []
    with open(fname) as fid:
        for line in fid:
            feat = ["0.0"] * dim
            tmplist = line.strip().split()
            label = tmplist.pop(0)

            for item in tmplist:
                (idx, val) = item.split(":")
                idx = int(idx)-1
                feat[idx] = val

            data.append("{label} {feat}".format(label=label, feat=" ".join(feat)))

    opfname = fname.split(".")[0] + ".dense"
    fid = open(opfname, "w")
    fid.write("\n".join(data))
    fid.close()
python
filename = r"census\census_148d_train.dummy"
dummy2libsvm(filename)
dummy2dense(filename)
python
filename = r"abalone\abalone_8d_train.libsvm"
libsvm2dense(filename)