data/formattrans.ipynb
def dummy2dense(fname: str):
dim = int(fname.split("_")[1][0:-1])
data = []
with open(fname) as fid:
for line in fid:
feat = ["0"] * dim
tmplist = line.strip().split()
label = tmplist.pop(0)
for i in tmplist:
feat[int(i)] = "1"
data.append("{label} {feat}".format(label=label, feat=" ".join(feat)))
opfname = fname.split(".")[0] + ".dense"
fid = open(opfname, "w")
fid.write("\n".join(data))
fid.close()
def dummy2libsvm(fname: str):
data = []
with open(fname) as fid:
for line in fid:
feat = []
tmplist = line.strip().split()
label = tmplist.pop(0)
for i in tmplist:
feat.append(str(int(i)+1) + ":1")
data.append("{label} {feat}".format(label=label, feat=" ".join(feat)))
opfname = fname.split(".")[0] + ".libsvm"
fid = open(opfname, "w")
fid.write("\n".join(data))
fid.close()
def libsvm2dummy(fname: str):
data = []
with open(fname) as fid:
for line in fid:
feat = []
tmplist = line.strip().split()
label = tmplist.pop(0)
for item in tmplist:
feat.append(str(int(item.split(":")[0])-1))
data.append("{label} {feat}".format(label=label, feat=" ".join(feat)))
opfname = fname.split(".")[0] + ".dummy"
fid = open(opfname, "w")
fid.write("\n".join(data))
fid.close()
def libsvm2dense(fname: str):
dim = int(fname.split("_")[1][0:-1])
data = []
with open(fname) as fid:
for line in fid:
feat = ["0.0"] * dim
tmplist = line.strip().split()
label = tmplist.pop(0)
for item in tmplist:
(idx, val) = item.split(":")
idx = int(idx)-1
feat[idx] = val
data.append("{label} {feat}".format(label=label, feat=" ".join(feat)))
opfname = fname.split(".")[0] + ".dense"
fid = open(opfname, "w")
fid.write("\n".join(data))
fid.close()
filename = r"census\census_148d_train.dummy"
dummy2libsvm(filename)
dummy2dense(filename)
filename = r"abalone\abalone_8d_train.libsvm"
libsvm2dense(filename)