################## pd tf 相关使用技巧 ##################
python 函数只能都放在一个包里。。。python 的with 相当于 golang 的 deferpython 包在导入时不能互相嵌套, test1 import test2 test2 import test1, 一旦如此, 就会无法调用;
## 列表、字典判断 IO 异常处理 ##
#列表、字典不为空的判断:
if l == []: if m == {}:#字典中查找是否存在key:
if 'key' in test.keys():#LinkedMap
from collections import OrderedDict, defaultdict #IOwith open("./sql.txt",'w') as fw: l = map(lambda x:x+"\n",l) fw.writelines(list(l))with open("./sql.txt",'r') as fr:
lines = fr.readlines() print(lines)os.system(ex) # 执行bash命令
#获取时间
datetime.today().strftime('%Y%m%d')#获取文件地址:
path = os.getcwd()file_path = os.path.join(path,'prod.cfg') #字符串切分: 直接用[]if line.find("#")!=-1:line=line[0:line.find('#')] #异常处理:try:except Exception as e: raise e
## 匿名函数 ##
# map sorted filter from typing import Any, Tuple, Iterator # mapl = [1, 2, 3, 4, 5]t1: Iterator[Tuple[Any, int]] = map(lambda x: (x, 1), l)# sorted
m = dict({"a": 1, "b": 0})t = sorted(m.items(), key=lambda d: d[1], reverse=False) # 按照value排序, 升序
# filterf = list(filter(lambda x: x[0].find('b') == -1, m))## 枚举迭代删除 ## # for enumerate
some_string = "wtf"
some_dict = {}for i, some_dict[i] in enumerate(some_string): passprint(some_dict)
# for zipindex = [1,2,3]words = ['a','b','c']for i, w in zip(index, words): pass # 反转列表for i in reversed(index): passfor i in index[::-1]:
pass # all any 判断列表中的所有值是否与条件匹配;r = any(i != 1 for i in index)
print(r)## 在迭代时 删除原表 需要借助副本 ##
list_3 = [1,2,3,4]
for idx, item in enumerate(list_3[:]): list_3.remove(item)print(list_3)list_3 = [1,2,3,4]
list_temp = list_3.copy()for idx, item in enumerate(list_3[:]): list_3.remove(item)print(list_3)
## Pandas操作 ## import pandas as pd
data = {'a':[1,2,3],
'c':[4,5,6],'b':[7,8,9]}# 创建DF
frame = pd.DataFrame(data,index=['1','2','3'])# group by
d: Union[Union[SeriesGroupBy, DataFrameGroupBy], Any] = df.groupby("vin")for key, group_data in d:
# key, group_data 是一个list[DataFrame] for i in range(0, len(group_data)): group_data.iloc[i]["mileage"] #遍历iloc第i行df, 取出mileage列; pass# 如果想改变值, 无法在iloc切片上直接改变, 需要复制一份加到list中;
# 读取多列:result = df[["task_name","task_name_en"]]# 读取多行:result.iloc[[0,1,2,3]] # df拼接pd.DataFrame([c1,c2]), pd.concat([p1,p2])时, 首先要保证各df的列数相同,如果还是不行:t = {"task_name":result["task_name"].to_list(),"table_name":result["table_name"].to_list(),"content_crt":l_crt, "content_ist":l_ist}f = pd.DataFrame(t) #numpy:np.random.randint(-1,1,size=(5,5)) | np.random.uniform #不重复的numpy.take(m,1) # 取出每一行的第m列
## 通过map 改变pd字段的值;
gender_map = {'F':0, 'M':1}
users['Gender'] = users['Gender'].map(gender_map)age_map = {val:ii for ii,val in enumerate(set(users['Age'])) } # 用字典做替换: { 原始值 : 新值 }
users['Age'] = users['Age'].map(age_map)pattern = re.compile(r'^(.*)\((\d+)\)$') # 反斜杠+( 转义( Toy Story (1995) -> Toy Story
title_map = {val: pattern.match(val).group(1) for ii,val in enumerate(set(movies['Title'])) }
#### tensorflow 测试 ####
import tensorflow as tf
import numpy as npuid_max = 500
batch_size = 10embed_dim = 32filter_num = 8feature_num = 20
data = np.zeros((batch_size,feature_num))
uid_data = np.reshape(data, [batch_size, feature_num])
sess = tf.InteractiveSession()
uid = tf.placeholder(tf.int32, [None, feature_num], name="uid")
uid_embed_matrix = tf.Variable(tf.random_uniform([uid_max, 32], -1, 1),
name="uid_embed_matrix")# 根据指定用户ID找到他对应的嵌入层uid_embed_layer = tf.nn.embedding_lookup(uid_embed_matrix, uid, name="uid_embed_layer")new_layer = tf.reduce_sum(uid_embed_layer, axis=1, keep_dims=True) # 按列加和, 维度保持不变;
new_layer = tf.expand_dims(uid_embed_layer, -1) # 对卷积而言特殊使用的, 转为 (batch_size, feature_num, 32, 1)
filter_weights = tf.Variable(tf.truncated_normal([2, embed_dim, 1, filter_num],stddev=0.1),name = "filter_weights") # 卷积部分
filter_bias = tf.Variable(tf.constant(0.1, shape=[filter_num]), name="filter_bias")conv_layer = tf.nn.conv2d(new_layer, filter_weights, [1,1,1,1], padding="VALID", name="conv_layer")
relu_layer = tf.nn.relu(tf.nn.bias_add(conv_layer,filter_bias), name ="relu_layer")maxpool_layer = tf.nn.max_pool(relu_layer, [1,15 - 2 + 1 ,1,1], [1,1,1,1], padding="VALID", name="maxpool_layer") sess.run(tf.initialize_all_variables())feed_dict = {uid:uid_data}
layer = uid_embed_layer.eval(feed_dict)
print(layer.shape)