python2 编码修改

def to_unicode(unicode_or_str):
    ''' 将str字符串转换为unicode类型
    '''
    if isinstance(unicode_or_str, str):
        value = unicode_or_str.decode('utf-8')
    else:
        value = unicode_or_str
    return value

def to_str(unicode_or_str):
    ''' 将unicode字符串转换为str类型
    '''
    if isinstance(unicode_or_str, unicode):
        value = unicode_or_str.encode('utf-8')
    else:
        value = unicode_or_str
    return value    

基础语法

conditions

# list init
[x for x in range(1, 10)]
[x for x in range(1, 10) if x % 2]
[ x if x%2 else x*100 for x in range(1, 10) ]

# oneline
'Yes' if fruit == 'Apple' else 'No'

if i > 3: print("We are done.")

count = 0 if count == N else N+1

loop

[i ** 3 for i in range(0, x+1, 3)]

input

input([prompt])

# input:
a=input('input:')

# 以空格输入
a_list=a.split(' ')

Functions

def sign(x):
    if x > 0:
        return 'positive'
    elif x < 0:
        return 'negative'
    else:
        return 'zero'

for x in [-1, 0, 1]:
    print(sign(x))
# Prints "negative", "zero", "positive"

# while
i = 1
while i < 6:
  print(i)
  i += 1

import

# /home/el/foo/fox.py
def what_does_the_fox_say():
	print('cry')

# /home/el/foo/main.py
import fox

fox.what_does_the_fox_say()


# import parent dir py
import sys
import os

sys.path.append(os.path.abspath('..'))
print(sys.path)
import water_config

range

range(start, stop, step)

运算符

# 按位取反
~

小数点取后两位

# 80.23
round(80.23456, 2)

Extended Slices

L = range(10)
# [0, 2, 4, 6, 8]
L[::2]

s='abcd'
# 'ac'
s[::2]
#'dcba'
s[::-1]

a = range(3)
#[0, 1, 2]
a
a[1:3] = [4, 5, 6]
# [0, 4, 5, 6]
a

a=range(10)
#[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
a
#[0, 1, 2, 3, 4, 5, 6, 7, 8]
a[:-1]

Magic Methods

# 动态获取对象的所有属性
__dict__

# 打印__str__的返回值
__str__

SimpleServer

python -m http.server 8080

nohup

nohup python -u ./cmd.py > cmd.log 2>&1 &

dict

thisdict = {
  "brand": "Ford",
  "model": "Mustang",
  "year": 1964
}

# Check if Key Exists
if "model" in thisdict:
	print("yes")

# add 
thisdict["color"] = "red"

# remove
thisdict.pop("model")

# 获取属性, 可迭代
# 用元祖来存放key和vlaue
dict.items()

# 只包含key
dict.keys()

# 只包含value
dict.values()

# loop
for key, value in d.items():

python 执行bash命令

import subprocess

subprocess.call(['ls','-l'])

process = subprocess.Popen(['ls', '-l'])
(output, err) = process.communicate()

tuple

# A tuple is a collection which is ordered and unchangeable.
thistuple = ("apple", "banana", "cherry")
print(thistuple[1])

# Change Tuple Values
x = ("apple", "banana", "cherry")
y = list(x)
y[1] = "kiwi"
x = tuple(y)

判断变量是否存在

# To check the existence of a local variable:
if 'myVar' in locals():
  # myVar exists.

# To check the existence of a global variable:
if 'myVar' in globals():
  # myVar exists.

判断变量类型

x = 12
>>> isinstance(x, int)
True

python 默认执行

if __name__ == '__main__':

List comprehensions can also contain conditions:

nums = [0, 1, 2, 3, 4]
even_squares = [x ** 2 for x in nums if x % 2 == 0]
print(even_squares)  # Prints "[0, 4, 16]"

zip() in conjunction with the * operator can be used to unzip a list:

>>> x = [1, 2, 3]
>>> y = [4, 5, 6]
>>> zipped = zip(x, y)
>>> list(zipped)
[(1, 4), (2, 5), (3, 6)]
>>> x2, y2 = zip(*zip(x, y))
>>> x == list(x2) and y == list(y2)
True

面向对象

Classes

class Greeter(object):

    # Constructor
    def __init__(self, name):
        self.name = name  # Create an instance variable
        # 保护变量
        self._say = "haha"
        # 私有变量
        self.__data = "test"
    
    # 私有方法
    def __say():
        print(self._say)

    # Instance method
    def greet(self, loud=False):
        if loud:
            print('HELLO, %s!' % self.name.upper())
        else:
            print('Hello, %s' % self.name)
    
    # call 方法
    def __call__(a, b):
        self.a = a
        self.b = b

g = Greeter('Fred')  # Construct an instance of the Greeter class
g.greet()            # Call an instance method; prints "Hello, Fred"
g.greet(loud=True)   # Call an instance method; prints "HELLO, FRED!"
# oncall 方法
g(a, b)

call & init

class Foo:
    def __init__(self, a, b, c):
        # ...

x = Foo(1, 2, 3) # __init__

class Foo:
    def __call__(self, a, b, c):
        # ...

x = Foo()
x(1, 2, 3) # __call__

@property装饰器 Python内置的@property装饰器可以把类的方法伪装成属性调用的方式。也就是本来是Foo.func()的调用方法，变成Foo.func的方式。在很多场合下，这是一种非常有用的机制。

class People:

    def __init__(self, name, age):
        self.__name = name
        self.__age = age

    @property
    def age(self):
        return self.__age

    @age.setter
    def age(self, age):
        if isinstance(age, int):
            self.__age = age
        else:
            raise ValueError

    @age.deleter
    def age(self):
        print("删除年龄数据！")

obj = People("jack", 18)
print(obj.age)
obj.age = 19
print("obj.age:  ", obj.age)
del obj.age

---------------------------
打印结果：
18
obj.age:   19
删除年龄数据！

# 使用方法
obj = People("jack", 18)
a = obj.age    # 获取值
obj.age = 19    # 重新赋值
del obj.age     # 删除属性

setattr

# The setattr() function sets the value of the specified attribute of the specified object.
class Person:
  name = "John"
  age = 36
  country = "Norway"

setattr(Person, 'age', 40)

vars

# The vars() function returns the __dic__ attribute of an object.
# The __dict__ attribute is a dictionary containing the object's changeable attributes.

class Person:
  name = "John"
  age = 36
  country = "norway"

x = vars(Person) 

高级用法

lambda

# Program to double each item in a list using map()
my_list = [1, 5, 4, 6, 8, 11, 3, 12]

new_list = list(map(lambda x: x * 2 , my_list))

# Output: [2, 10, 8, 12, 16, 22, 6, 24]
print(new_list)

# 冒号表示匿名函数的参数
f = lambda x: x*x
# 25
f(5)

map

#Map applies a function to all the items in an input_list.
map(function_to_apply, list_of_inputs)

items = [1, 2, 3, 4, 5]
squared = list(map(lambda x: x**2, items))

#Most of the times we use lambdas with map so I did the same. Instead of a list of inputs we can even have a list of functions!
def multiply(x):
    return (x*x)
def add(x):
    return (x+x)

funcs = [multiply, add]
for i in range(5):
    value = list(map(lambda x: x(i), funcs))
    print(value)

# Output:
# [0, 0]
# [1, 2]
# [4, 4]
# [9, 6]
# [16, 8]

collections

from collections import namedtuple

# Python code to demonstrate namedtuple() and 
# Access by name, index and getattr() 
  
# importing "collections" for namedtuple() 
import collections 
  
# Declaring namedtuple() 
Student = collections.namedtuple('Student',['name','age','DOB']) 
  
# Adding values 
S = Student('Nandini','19','2541997') 
  
# Access using index 
print ("The Student age using index is : ",end ="") 
print (S[1]) 
  
# Access using name  
print ("The Student name using keyname is : ",end ="") 
print (S.name) 
  
# Access using getattr() 
print ("The Student DOB using getattr() is : ",end ="") 
print (getattr(S,'DOB'))

slice

# A slice object is used to specify how to slice a sequence. You can specify where to start the slicing, and where to end. You can also specify the step, which allows you to e.g. slice only every other item.
# slice(start, end, step)
a = ("a", "b", "c", "d", "e", "f", "g", "h")
x = slice(3, 5)
# ('d', 'e')
print(a[x])

kwargs

# You can use **kwargs to let your functions take an arbitrary number of keyword arguments

>>> def print_keyword_args(**kwargs):
...     # kwargs is a dict of the keyword args passed to the function
...     for key, value in kwargs.items():
...         print "%s = %s" % (key, value)
... 
>>> print_keyword_args(first_name="John", last_name="Doe")
first_name = John
last_name = Doe

def some_kwargs(kwarg_1, kwarg_2, kwarg_3):
    print("kwarg_1:", kwarg_1)
    print("kwarg_2:", kwarg_2)
    print("kwarg_3:", kwarg_3)

kwargs = {"kwarg_1": "Val", "kwarg_2": "Harper", "kwarg_3": "Remy"}
some_kwargs(**kwargs)

yield

def createGenerator():
    mylist = range(3)
    for i in mylist:
        yield i*i

# create a generator
mygenerator = createGenerator()

# mygenerator is an object!
print(mygenerator)
# <generator object createGenerator at 0xb7555c34>

for i in mygenerator:
    print(i)
# 0
# 1
# 4

# get generator content
next(mygenerator)

getopt

def parseArgs(self):
    try:
        # 短参数后面跟:代表可以接收值，长参数后面跟=代表接收一个值
        options, args = getopt.getopt(sys.argv[1:], 'cgf:', ['common', 'sg', 'filename='])
        for opt, value in options:
            if opt in ('-c', '--common'):
                self.water_name = "beijing_do_s"
                self.csv_file = "../csv/Beijing_DO_s.csv"
            if opt in ('-g', '--sg'):
                self.water_name = "beijing_do_s_savgol"
                self.csv_file = "../csv/Beijing_DO_s_savgol.csv"
            if opt in ('-f','--filename'):
                fileName = value
                print("[*] Filename is ",fileName)
                # do something
                exit()
    except getopt.GetoptError as msg:
        print(msg)
        sys.exit(2)

# 调用
# 短参数使用的时候是参数名[空格]参数值
# 长参数使用的时候是参数名=参值

进度条

import time
from tqdm import tqdm

with tqdm(total=200) as pbar:
    pbar.set_description('Processing:')
    # total表示总的项目, 循环的次数20*10(每次更新数目) = 200(total)
    for i in range(20):
        # 进行动作, 这里是过0.1s
        time.sleep(0.1)
        # 进行进度更新, 这里设置10个
        pbar.update(10)
Processing:: 100%|██████████| 200/200 [00:02<00:00, 91.94it/s]

计时

import time
# 开始时间
start_time = time.time()
# 需要计算的方法
time.sleep(1)
end_time = time.time()
print("proc time: {:.2f}s".format(end_time - start_time))

itertools

import itertools

# groupby1
groups = []
uniquekeys = []
data = sorted(data, key=keyfunc)
for k, g in groupby(data, keyfunc):
    groups.append(list(g))      # Store group iterator as a list
    uniquekeys.append(k)

# groupby2
for (uid, group) in itertools.groupby(iter([(yield line.strip().split('\t')) for line in sys.stdin]),lambda item: item[0]):
    try:
        if not uid or len(uid) > 20 or uid == 'null':
            continue
    except:
        continue

sort

list.sort(key=..., reverse=...)
sorted(list, key=..., reverse=...)

enumerate

for counter, value in enumerate(some_list):
    print(counter, value)

Anaconda

anaconda2

# Create a conda environment named tensorflow to run a version of Python by invoking the following command:
conda create -n tensorflow pip python=2.7

# remove conda environment
conda remove -n tensorflow --all

# Activate the conda environment by issuing the following command:
source activate tensorflow

# win activate
activate tensorflow

# deactivate
source deactivate

# 列出所有环境
conda info --envs

# 换源
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/
conda config --set show_channel_urls yes

# site-packages
/home/joseph/anaconda3/envs/neo4j/lib/python3.6/site-packages

# 查看conda配置
conda config --show

pip

# 列出已安装的包
pip freeze or pip list

# 导出包到requirements.txt
pip freeze > requirements.txt

# 安装
pip install <包名> or pip install -r requirements.txt

# 卸载包
pip uninstall <包名> 或 pip uninstall -r requirements.txt

# 升级包
pip install -U <包名>

# 显示包所在的目录
pip show -f <包名>

# 安装指定版本
pip install numpy==1.13.1

# 搜索包
pip search <搜索关键字>

# 显示已安装包的版本
pip show <搜索关键字>

# 换源
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple <包名>

# 阿里源
pip install -i https://mirrors.aliyun.com/pypi/simple/ xx

查看包的所有依赖版本

# main.py
from pip._vendor import pkg_resources

def find_dependencies(package_name):
    package = pkg_resources.working_set.by_key[package_name]

    print([str(dependency) for dependency in package.requires()])


find_dependencies('requests')

jupyter

# 添加核
source activate <tensorflow>
pip install ipykernel

# list kernel
jupyter kernelspec list

# add kernel(处于想添加kernel的环境中)
python -m ipykernel install --name <tensorflow> --user

# remove kernel
jupyter kernelspec remove <kernelname>

# 生成配置文件
jupyter notebook --generate-config

# 生成密码
jupyter notebook password

# 修改文件

# 外网可访问
c.NotebookApp.ip='*'
c.NotebookApp.password = u'sha:ce...刚才复制的那个密文'
c.NotebookApp.open_browser = False
c.NotebookApp.port =8888 #可自行指定一个端口, 访问时使用该端口

# 后台挂起
nohup jupyter notebook > jupyter.log 2>&1 &

文件管理

path

os.path.join('/home/build/test/sandboxes/', todaystr, '/new_sandbox/')

# 获取当前目录
os.getcwd()

# create dir if not exist
import os
if not os.path.exists(directory):
    os.makedirs(directory)

# remove file
os.remove("demofile.txt")

# remove folder
os.rmdir("myfolder")

# 列出文件夹中的文件
for filename in os.listdir(csv_dir):
	if filename.endswith(".csv"):
		filenames.append(filename)
            continue
        else:
            continue

# 合并文件
with open(target_file, 'w', encoding='UTF-8') as outfile:
	for fname in filenames:
    	with open(csv_dir+fname, encoding='UTF-8') as infile:
            for line in infile:
                outfile.write(line)

生成日期格式的文件

tradeday = datetime.datetime.today()
filename = tradeday.strftime(config['pos_diff_filename'])

csv_filename = os.path.join(results_file_path, f'vdbench_{test_name}_stats_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv')

file io

#'a' is for append, or use
#'w' to write with truncation
#'r' to read the content of the file
with open('somefile.txt', 'a', encoding='UTF-8') as the_file:
    the_file.write('Hello\n')
    contents = the_file.read()

# return the 5 first characters of the file
f.read(5)

# return one line of the file
f1 = f.readline()
for x in f1:
	print(x)


# looping through the lines of the file
for x in f:
	print(x)

# or
f = open('myfile', 'w', encoding='UTF-8')
f.write('hi there\n')  # python will convert \n to os.linesep
f.close()  # you can omit in most cases as the destructor will call it

# create file
f = open("myfile.txt", "x")

# format
with open("results/gen.{}_epoch{}.json".format(config.graph_name, num_epoch), 'w') as fout:
	fout.write(json.dumps(results))

# Double precision numbers
print("%.2f" % a)

数据处理

json

import json

# some JSON:
x =  '{ "name":"John", "age":30, "city":"New York"}'

# parse x:
y = json.loads(x)

# the result is a Python dictionary:
print(y["age"])

# a Python object (dict):
x = {
  "name": "John",
  "age": 30,
  "city": "New York"
}

# convert into JSON:
y = json.dumps(x)

# the result is a JSON string:
print(y)

pickle

import pickle
a = ['test value','test value 2','test value 3']

file_Name = "testfile.pkl"
# open the file for writing
fileObject = open(file_Name,'wb') 

# this writes the object a to the
# file named 'testfile.pkl'
pickle.dump(a,fileObject)

# here we close the fileObject
fileObject.close()

# we open the file for reading
fileObject = open(file_Name.pkl,'rb')
# load the object from the file into var b
b = pickle.load(fileObject, encoding='latin1')
b
['test value','test value 2','test value 3']

logger

import logging

# 定义
def get_logger(log_dir, name, log_filename='info.log', level=logging.INFO):
    logger = logging.getLogger(name)
    logger.setLevel(level)
    # Add file handler and stdout handler
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    file_handler = logging.FileHandler(os.path.join(log_dir, log_filename))
    file_handler.setFormatter(formatter)
    # Add console handler.
    console_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setFormatter(console_formatter)
    logger.addHandler(file_handler)
    logger.addHandler(console_handler)
    # Add google cloud log handler
    logger.info('Log directory: %s', log_dir)
    return logger

# 使用
logger = get_logger(self._log_dir, __name__, 'info.log', level=log_level)
logger.info(kwargs)

datetime

import datetime

x = datetime.datetime.now()

# 2020-05-17 00:00:00
x = datetime.datetime(2020, 5, 17)

x = datetime.datetime(2018, 6, 1, 4)
#June
print(x.strftime("%B"))

#3点12分
x = datetime.time(3,12,0)

# using - as separator
datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")

# str to datetime
from datetime import datetime
datetime_object = datetime.strptime('Jun 1 2005  1:33PM', '%b %d %Y %I:%M%p')

regular expression正则表达式

import re

# re.match只匹配字符串的开始，如果字符串开始不符合正则表达式，则匹配失败，函数返回None；而re.search匹配整个字符串，直到找到一个匹配。
# 使用group(num) 或 groups() 匹配对象函数来获取匹配表达式。
re.match(pattern, string, flags=0)
re.search(pattern, string, flags=0)

# 非贪婪模式
(.*?)

re.I #忽略大小写
re.M #多行模式

line = "Cats are smarter than dogs";
# Python的r前缀，就不用考虑转义的问题了
searchObj = re.search( r'(.*) are (.*?) .*', line, re.M|re.I)

searchObj.group() #Cats are smarter than dogs
searchObj.group(1) #Cats
searchObj.group(2) #smarter

math

# Return the ceiling of x as a float, the smallest integer value greater than or equal to x.
math.ceil(x)
# Return the floor of x as a float, the largest integer value less than or equal to x.
math.floor(x)

random

# seed
random.seed(5)

# get random number
random.random()

# returns a randomly selected element from the specified range
random.randrange(start, stop, step)

Set

# A set is a collection which is unordered and unindexed. In Python sets are written with curly brackets.
thisset = {"apple", "banana", "cherry"}
print(thisset)

# To add one item to a set use the add() method.
thisset.add("orange")

# Add multiple items to a set, using the update() method:
thisset.update(["orange", "mango", "grapes"])

# To remove an item in a set, use the remove(), or the discard() method.
thisset.remove("banana")

# The intersection() method returns a set that contains the similarity between two or more sets.
# set.intersection(set1, set2 ... etc)
y = {"c", "d", "e"}
z = {"f", "g", "c"}
#{'c'}
result = x.intersection(y, z)

keyword argument unpacking syntax

# argument unpacking
*args
# keyword argument unpacking
**kwargs
# usually a tuple, always an iterable[1]
args = (1, 2, 3)
# f(*args) → f(1, 2, 3)

# usually a dict, always a mapping*
kwargs = {"a": 1, "b": 2, "c": 3}
# f(**kwargs) -> f(a=1, b=2, c=3)

argparse

import argparse

parser = argparse.ArgumentParser(description='命令行中传入一个数字')
#type是要传入的参数的数据类型  help是该参数的提示信息
parser.add_argument('integers', type=str, help='传入的数字')

args = parser.parse_args()

print(args.integers)

parser = argparse.ArgumentParser(description='姓名')
parser.add_argument('--family', type=str, default='张',help='姓')
parser.add_argument('--name', type=str, default='三', help='名')
args = parser.parse_args()

#打印姓名
print(args.family+args.name)

raise

x = -1

if x < 0:
  raise Exception("Sorry, no numbers below zero")

assert

x = "hello"

#if condition returns True, then nothing happens:
assert x == "hello"

#if condition returns False, AssertionError is raised:
assert x == "goodbye", "x should be 'hello'"

Raise an exception 抛出异常

if x < 0:
    raise Exception("Sorry, no numbers below zero")

多线程

multiprocessing

from multiprocessing import Process
import os

# 子进程要执行的代码
def run_proc(name):
    print 'Run child process %s (%s)...' % (name, os.getpid())

if __name__=='__main__':
    print 'Parent process %s.' % os.getpid()
    p = Process(target=run_proc, args=('test',))
    print 'Process will start.'
    p.start()
    p.join()
    print 'Process end.'

# 进程池
from multiprocessing import Pool
import os, time, random

def long_time_task(name):
    print 'Run task %s (%s)...' % (name, os.getpid())
    start = time.time()
    time.sleep(random.random() * 3)
    end = time.time()
    print 'Task %s runs %0.2f seconds.' % (name, (end - start))

if __name__=='__main__':
    print 'Parent process %s.' % os.getpid()
    p = Pool()
    for i in range(5):
        p.apply_async(long_time_task, args=(i,))
    print 'Waiting for all subprocesses done...'
    p.close()
    p.join()
    print 'All subprocesses done.'

XML解析

<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank>1</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank>4</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank>68</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>

import xml.etree.ElementTree as ET
tree = ET.parse('country_data.xml')
root = tree.getroot()

# parse from string
root = ET.fromstring(country_data_as_string)

# root
>>> root.tag
'data'
>>> root.attrib
{}

# loop
>>> for child in root:
...     print(child.tag, child.attrib)
...
country {'name': 'Liechtenstein'}
country {'name': 'Singapore'}
country {'name': 'Panama'}

# get text
>>> root[0][1].text
'2008'

单元测试

测试模块

class Dict(dict):

    def __init__(self, **kw):
        super().__init__(**kw)

    def __getattr__(self, key):
        try:
            return self[key]
        except KeyError:
            raise AttributeError(r"'Dict' object has no attribute '%s'" % key)

    def __setattr__(self, key, value):
        self[key] = value

单元测试模块

import unittest

from mydict import Dict

class TestDict(unittest.TestCase):

    def test_init(self):
        d = Dict(a=1, b='test')
        self.assertEqual(d.a, 1)
        self.assertEqual(d.b, 'test')
        self.assertTrue(isinstance(d, dict))

    def test_key(self):
        d = Dict()
        d['key'] = 'value'
        self.assertEqual(d.key, 'value')

    def test_attr(self):
        d = Dict()
        d.key = 'value'
        self.assertTrue('key' in d)
        self.assertEqual(d['key'], 'value')

    def test_keyerror(self):
        d = Dict()
        with self.assertRaises(KeyError):
            value = d['empty']

    def test_attrerror(self):
        d = Dict()
        with self.assertRaises(AttributeError):
            value = d.empty

运行单元测试

if __name__ == '__main__':
    unittest.main()

性能测试

整体性能测试

# 终端使用python -m cProfile命令
python -m cProfile -o output.prof your_script.py

# 查看性能图表
pip install snakeviz
snakeviz output.prof

使用line_profiler深入分析特定函数

# 使用 line_profiler 或 memory_profiler
pip install line_profiler memory_profiler

# 在代码中添加装饰器@profile到你想分析的函数
kernprof -l -v your_script.py