Skip to content

Update pandas-wrapper.py; Add myio.py #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions tony/myio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
'''
myio.py
SNLP.UdS.SS16
@author: Tony Hong
'''

import os


def read_file(path):
'''
read raw text from file
'''
raw_text = open(path, 'r').read().decode('utf-8')
return raw_text


def get_file_dict(container, file_dir, sp_filetype=''):
result = dict()
if type(container) == type({}):
filenames = container.itervalues()
elif type(container) == type([]):
filenames = container
else:
return result

if sp_filetype:
suffix = '.' + sp_filetype
else:
suffix = ''

for k in filenames:
names = k.split('.')
name = names[0]
if suffix:
result[name] = os.path.join(file_dir, name + suffix)
else:
result[name] = os.path.join(file_dir, k)

return result

def get_text_dict(file_dict):
'''
set up text dict from file dict
'''
result = dict()
for k, v in file_dict.iteritems():
result[k] = read_file(v)
return result
56 changes: 56 additions & 0 deletions tony/pandas-wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
'''
Pandas module of tony
'''

import numpy as np
import pandas as pd

# TODO: only 2-level, too specific, need extension
def getVectorL2(series, label):
'''Retrieve label-1-level vector from 2-level series.
'''
try:
return series.ix[label]
except KeyError, e:
return pd.DataFrame().sum()

def getValueL2(series, label1, label2):
'''Retrieve label-2-value from 2-level series.
'''
try:
return series.ix[label1].ix[label2]
except KeyError, e:
return 0


def getValue(series, *labels):
'''Retrieve label-n-value from n-level series.
'''
result = series
i = 0
try:
for l in labels:
indexLambda = getIndexLambda(thisSeries)
result = indexLambda(l)
i = i + 1
except KeyError, e:
if i < len(labels):
return pd.DataFrame().sum()
else:
return 0

def getIndexLambda(series):
return lambda i: series.ix[i]


def main():
df = pd.DataFrame({ 'A' : 1.,
'B' : pd.Timestamp('20130102'),
'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
'D' : np.array([3] * 4,dtype='int32'),
'E' : pd.Categorical(["test","train","test","train"]),
'F' : 'foo' })


if __name__ == '__main__':
main()
12 changes: 0 additions & 12 deletions tony/util.py

This file was deleted.

30 changes: 30 additions & 0 deletions tony/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
'''
Utilities module of Tony
'''

def isiterable(obj):
'''Verify that an object is iterable if it implemented the iterator protocol.

This function would return True for strings as well as most Python collection types.
'''
try:
iter(obj)
return True
except TypeError: # not iterable
return False


def remove_punctuation(value):
'''make a list of the operations you want to apply to a particular set of strings.
'''
return re.sub('[!#?]', '', value)

clean_ops = [str.strip, remove_punctuation, str.title]

def clean_strings(strings, ops):
result = []
for value in strings:
for function in ops:
value = function(value)
result.append(value)
return result